Shiny07---用Shiny完成分箱调参工作

网友投稿 593 2022-09-02

Shiny07---用Shiny完成分箱调参工作

Shiny07---用Shiny完成分箱调参工作

业务提需求,希望可以自动寻找阈值,完成分箱工作,继而找到合适的区间,区别好坏用户。采用R软件的smbinning包,提供自动最优分箱和手动切分两种方式,便于业务同事自动化的切分区间和观察结果。 提供ui和sever脚本,供大家参考。

UI脚本

# 参数调整 --------------------------------------------------------------------tabPanel("参数调整", sidebarPanel( #侧边栏的控制键 width = 3, #设置侧边栏的宽度 #时间控件 #时间控件 dateRangeInput( inputId = "para_date", label = h2(strong(span("应还日期", style = "color:blue"))), start = Sys.Date() - 7, #开始日期设置 end = Sys.Date() - 1, language = "zh-CN", width = "90%", separator = "至" ), textInput(inputId="seq_bin",h2(strong(span("输入分组间隔", style = "color:blue"))), "Sequence"), ## 主维度选择 radioButtons( #单选框 inputId = "para_feature_choose", label = h2(strong(span("维度选择", style = "color:blue"))), choices = list( "魔蝎运营商" = "para_mx_operator", "其他" = "para_others" ), selected = "para_mx_operator"#默认T+1数据 ), ## 子维度选择 radioButtons( #单选框 inputId = "para_branch_dimension", label = h2(strong(span("维度选择", style = "color:blue"))), choices = list( "最近一个月通话时长" = "mx_last1month_call_time", "最近一个月通话次数" = "mx_last1month_call_nums", "最近一个月主叫次数" = "mx_last1month_dial_nums", "最近一个月被叫次数" = "mx_last1month_dialed_nums", "最近一个月通话号码数目" = "mx_last1month_call_number", "最近一个月主叫号码数" = "mx_last1month_dial_number", "最近一个月被叫号码数" = "mx_last1month_dialed_number", "最近一个月通话归属地数" = "mx_last1month_call_city" ), selected = "mx_last1month_call_time" ), ## 提交按钮 actionButton(inputId = "para_update", label = "提 交",icon=icon('play-circle')#按钮图案 )#按钮,触发机制一样,第一次看到 ), # Show a summary of the dataset and an HTML table with the # requested number of observations mainPanel(tabsetPanel(type = "pills", #tabs标准外观,"pills"对三个标签字段着色 tabPanel("Summary", DT::dataTableOutput("para_Summary") ), tabPanel("Detail", DT::dataTableOutput("para_bin_Summary") ) ) ))

Server脚本

# 参数调整 -------------------------------------------------------------------- datasetInput_para <- eventReactive( eventExpr = input$para_update,## 提交按钮 valueExpr = { # 加上进度条显示 progress <- Progress$new(session, min = 1, max = 10 # ,style = "old" ) on.exit(progress$close()) progress$set(message = '程序执行中') for (i in 1:5) { progress$set(value = i) Sys.sleep(0.5) } ## 判断不同的维度,分别运行相关的函数 if(input$para_feature_choose == "para_mx_operator"){ para_data <- para_mx_operator_f(input$para_date[1],input$para_date[2]) }else if(input$para_feature_choose == "para_others"){ para_data <- head(mtcars) } for (i in 6:10) { progress$set(value = i) Sys.sleep(0.5) } para_data }, ignoreNULL = T ) observe({ ## 分析主维度 x <- input$para_feature_choose # Can use character(0) to remove all choices if (is.null(x)){ x <- character(0) } ## 子维度的动态展示 if (x == "para_mx_operator") { choiceNames <- c( "最近一个月通话时长", "最近一个月通话次数", "最近一个月主叫次数", "最近一个月被叫次数", "最近一个月通话号码数目", "最近一个月主叫号码数", "最近一个月被叫号码数", "最近一个月通话归属地数" ) choiceValues <- c( "mx_last1month_call_time", # 通话时长 "mx_last1month_call_nums", # 通话次数 "mx_last1month_dial_nums", # 主叫次数 "mx_last1month_dialed_nums", # 被叫次数 "mx_last1month_call_number", # 通话号码数 "mx_last1month_dial_number", # 主叫号码数 "mx_last1month_dialed_number", # 被叫号码数 "mx_last1month_call_city"# 通话归属地 ) } else if (x == "para_others") { choiceNames <- c("没有备选") choiceValues <- c("no_choice") } # Can also set the label and select items updateRadioButtons(session, inputId = "para_branch_dimension", choiceNames = choiceNames, choiceValues = choiceValues ) ## Summary展示 output$para_Summary <- DT::renderDataTable(DT::datatable({ ## 最近一个月通话时长 if(input$para_branch_dimension == "mx_last1month_call_time"){ analyze_sub_f(data = datasetInput_para(),x = "total_time",y = "user_label") }else if(input$para_branch_dimension == "mx_last1month_call_nums"){ ## 最近一个月通话次数 analyze_sub_f(data = datasetInput_para(),x = "total_num",y = "user_label") }else if(input$para_branch_dimension == "mx_last1month_dial_nums"){ ## 最近一个月主叫次数 analyze_sub_f(data = datasetInput_para(),x = "total_dial_num",y = "user_label") }else if(input$para_branch_dimension == "mx_last1month_dialed_nums"){ ## 最近一个月被叫次数 analyze_sub_f(data = datasetInput_para(),x = "total_dialed_num",y = "user_label") }else if(input$para_branch_dimension == "mx_last1month_call_number"){ ## 最近一个月通话号码数目 analyze_sub_f(data = datasetInput_para(),x = "total_peer_num",y = "user_label") }else if(input$para_branch_dimension == "mx_last1month_dial_number"){ ## 最近一个月主叫号码数 analyze_sub_f(data = datasetInput_para(),x = "total_dial_peer_num",y = "user_label") }else if(input$para_branch_dimension == "mx_last1month_dialed_number"){ ## 最近一个月被叫号码数 analyze_sub_f(data = datasetInput_para(),x = "total_dialed_peer_num",y = "user_label") }else if(input$para_branch_dimension == "mx_last1month_call_city"){ ## 最近一个月被叫号码数 analyze_sub_f(data = datasetInput_para(),x = "total_city_num",y = "user_label") } }, rownames = FALSE, options = list(autoWidth = TRUE))) ## Detail展示 output$para_bin_Summary <- DT::renderDataTable(DT::datatable({ if(input$para_branch_dimension == "mx_last1month_call_time"){ bin_sub_f(data = datasetInput_para(),x = "total_time",y = "user_label",seq_bin = input$seq_bin) }else if(input$para_branch_dimension == "mx_last1month_call_nums"){ ## 最近一个月通话次数 bin_sub_f(data = datasetInput_para(),x = "total_num",y = "user_label",seq_bin = input$seq_bin) }else if(input$para_branch_dimension == "mx_last1month_dial_nums"){ ## 最近一个月主叫次数 bin_sub_f(data = datasetInput_para(),x = "total_dial_num",y = "user_label",seq_bin = input$seq_bin) }else if(input$para_branch_dimension == "mx_last1month_dialed_nums"){ ## 最近一个月被叫次数 bin_sub_f(data = datasetInput_para(),x = "total_dialed_num",y = "user_label",seq_bin = input$seq_bin) }else if(input$para_branch_dimension == "mx_last1month_call_number"){ ## 最近一个月通话号码数目 bin_sub_f(data = datasetInput_para(),x = "total_peer_num",y = "user_label",seq_bin = input$seq_bin) }else if(input$para_branch_dimension == "mx_last1month_dial_number"){ ## 最近一个月主叫号码数 bin_sub_f(data = datasetInput_para(),x = "total_dial_peer_num",y = "user_label",seq_bin = input$seq_bin) }else if(input$para_branch_dimension == "mx_last1month_dialed_number"){ ## 最近一个月被叫号码数 bin_sub_f(data = datasetInput_para(),x = "total_dialed_peer_num",y = "user_label",seq_bin = input$seq_bin) }else if(input$para_branch_dimension == "mx_last1month_call_city"){ ## 最近一个月被叫号码数 bin_sub_f(data = datasetInput_para(),x = "total_city_num",y = "user_label",seq_bin = input$seq_bin) } }, rownames = FALSE, options = list(autoWidth = TRUE))) })

Function脚本

一些计算的逻辑放在这个脚本下,以供Server脚本调用

# 运营商数据 -----------------------------------------------------------------para_mx_operator_f <- function(start_date = Sys.Date() - 7, end_date = Sys.Date() - 1) { library(RODBC)#使用配置文件,连接Mysql channel <- odbcConnect("MySQL", uid = "user0001", pwd = "******") sql <- paste0("SELECT t3.*, t4.total_time, t4.total_num, t4.total_dial_num, t4.total_dialed_num, t4.total_peer_num, t4.total_dial_peer_num, t4.total_dialed_peer_num, t4.total_city_num FROM (SELECT t1.id AS order_id, t2.user_id, t2.id, t1.user_label FROM (SELECT id, user_id, gmt_create, CASE WHEN status=8 THEN 1 WHEN status=9 THEN 0 END AS user_label FROM t_order_info WHERE status IN (5,6,7,8,9,10,11) AND date(reim_date)>='",start_date,"' AND date(reim_date)<='",end_date,"') t1 LEFT JOIN (SELECT id, user_id, gmt_create FROM t_mx_operator_basic_info) t2 ON t1.user_id=t2.user_id WHERE date(t1.gmt_create)>=date(t2.gmt_create) GROUP BY t1.id ORDER BY t2.gmt_create DESC) t3 LEFT JOIN (SELECT * FROM t_mx_operator_call_analysis WHERE mouth_type=3) t4 ON t3.id=t4.basic_info_id ") mx_operator_data <- sqlQuery(channel, sql, stringsAsFactors = FALSE) return(mx_operator_data) }# 分箱函数 --------------------------------------------------------------------## 子维度分析函数## 参数:数据框、自变量、因变量、分组间隔序列bin_sub_f <- function(data,x,y,seq_bin){ ## 如果不采取人工划分区间 if(seq_bin == "Sequence"){ library(smbinning) result_total_num <- smbinning(df = data, y = y, x = x, p = 0.05) ## 自动分组可能存在一些异常 if (length(result_total_num) > 1) { data_total_num <- data.frame(result_total_num$ivtable[, c("Cutpoint", "CntGood", "CntBad", "CntRec", "BadRate", "IV")]) } else{ data_total_num <- data.frame(Information = "No significant splits") } }else{ library(smbinning) ## 手动输入间隔 cuts <- as.numeric(unlist(stringr::str_split(seq_bin,","))) result_total_num <- smbinning.custom(df = data, y = y, x = x, cuts = cuts) ## 自动分组可能存在一些异常 if (length(result_total_num) > 1) { data_total_num <- data.frame(result_total_num$ivtable[, c("Cutpoint", "CntGood", "CntBad", "CntRec", "BadRate", "IV")]) } else{ data_total_num <- data.frame(Information = "No significant splits") } } return(data_total_num)}# 子维度数据分析 -----------------------------------------------------------------analyze_sub_f <- function(data,x,y){ # 百分比转化函数 ----------------------------------------------------------------- turn_percentage <- function(var){ return(paste0(round(var*100,2),"%")) } ## 正常还款、逾期已还、逾期未还 ## 频数统计 freq_userlabel <- data.frame(table(data[,y])) colnames(freq_userlabel) <- c( "Name","Value") ## 频率统计 prop_userlabel <- data.frame(prop.table(table(data[,y]))) colnames(prop_userlabel) <- c( "Name","Value") prop_userlabel$Value <- turn_percentage(prop_userlabel$Value) ## 月通话次数的统计 stat_calls_of_lastmonth <- data.frame(Name=paste0(names(summary(data[,x]))),Value=as.vector(summary(data[,x]))) summary_table <- rbind(freq_userlabel,prop_userlabel,stat_calls_of_lastmonth) return(summary_table)}

结果展示

Summary

脚本的主要部分都在,可以参看。shiny的中文资料比较少,可以去RStudio的官网看下官方文档,可能方便理解点。

2018-04-05 于杭州

版权声明:本文内容由网络用户投稿,版权归原作者所有,本站不拥有其著作权,亦不承担相应法律责任。如果您发现本站中有涉嫌抄袭或描述失实的内容,请联系我们jiasou666@gmail.com 处理,核实后本网站将在24小时内删除侵权内容。

上一篇:经常用到的PHP时间类完整实例,可直接用
下一篇:Boosting之Adaboost简单实现
相关文章

 发表评论

暂时没有评论,来抢沙发吧~