library(tidyverse)
library(data.table)

合并函数到一个文档方便查看。

list.files('180901_靳晓松_函数',full.names = T) %>% 
    as_tibble() %>% 
    mutate(fun = map(value,read_file)) %>% 
    unnest() %>% 
    .$fun %>% 
    paste0(.,paste0(rep('\n',3),collapse = '')) %>% 
    str_flatten() %>% 
    write_file('function_sum.txt')

导入测试数据。

freq_sort

freq_nosort <- function(data, var, fill = "#1f77b4", title='变量频数查询') {
  data %>%
  dplyr::count(eval(parse(text = var))) %>%
  ggplot(aes(x = `eval(parse(text = var))`, y = n)) +
  geom_bar(stat = "identity", fill = fill) +
  geom_text(aes(label = n), vjust = -0.5, size = 3, family = "MYH") +
  labs(x = "", y = "", title = title)
  # eval(parse(text = mytheme_b))
}
data <- test_data
var <- 'bad_good'
fill <- "#1f77b4"
title <- '变量频数查询'
data %>%
  dplyr::count(eval(parse(text = var))) %>%
  ggplot(aes(x = `eval(parse(text = var))`, y = n)) +
  geom_bar(stat = "identity", fill = fill) +
  geom_text(aes(label = n), vjust = -0.5, size = 3, family = "MYH") +
  labs(x = "", y = "", title = title)

  # eval(parse(text = mytheme_b))
  1. 每个bar给数据值非常好,但是我建议给有效数字,或者用,分开,见修改后函数。
  2. 建议titlethemedefault值,不然影响函数调用。比如,每个人都必须要输入titletheme才能调用函数,否则报错,但是这是个trivial的问题。
  3. xy可以不需要删除,建议给default值方便用户设计个性化。
  4. 其他大部分函数可以按照这个来修改。
function()
data <- test_data
function()
data <- test_data
var <- 'bad_good'
fill <- "#1f77b4"
title <- '变量频数查询'
data %>%
  dplyr::count(eval(parse(text = var))) %>%
  ggplot(aes(x = `eval(parse(text = var))`, y = n)) +
  geom_bar(stat = "identity", fill = fill) +
  geom_text(aes(label = n), vjust = -0.5, size = 3, family = "MYH") +
  labs(x = "", y = "", title = title)

  # eval(parse(text = mytheme_b))
freq_nosort <- function(data, var, fill = "#1f77b4", title='变量频数查询', theme = 'theme_bw()', x = var, y = '频数') {
library(tidyverse)  
library(formattable)
data %>%
    count(eval(parse(text = var))) %>%
    ggplot(aes(x = `eval(parse(text = var))`, y = n)) +
    geom_bar(stat = "identity", fill = fill) +
    geom_text(aes(label = accounting(n,digits = 0)), vjust = -0.5, size = 3, family = "MYH") +
    labs(x = x, y = y, title = title) +
    eval(parse(text = theme))
}
freq_nosort(test_data,'bad_good')

xvar_pass_cumsum_line

xvar_pass_cumsum_line <- function(data, xvar, yvar, fill = "#1f77b4", title) {
  data %>% 
  dplyr::count(eval(parse(text = xvar)), eval(parse(text = yvar))) %>%
  spread(`eval(parse(text = yvar))`, n) %>%
  mutate(bad = ifelse(bad %in% NA, 0, bad),
         badcumsum  = cumsum(bad),
         goodcumsum = cumsum(good),
         percumsum  = round(badcumsum / (badcumsum + goodcumsum) * 100, 1)) %>%
  ggplot(aes(x = `eval(parse(text = xvar))`, y = percumsum, color = "red")) +
  geom_line(group = 1, size = 1) +
  geom_point(shape = 19, size = 2) +
  geom_text(aes(label = paste0(percumsum, "%")), 
            vjust = -0.5, size = 3, family = "MYH", color = "black") +
  scale_color_manual(values = fill) +
  labs(x = "", y = "", title = title) +
  eval(parse(text = mytheme_b)) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1), legend.position = "none")
}
data <- test_data
var <- 'bad_good'
fill <- "#1f77b4"
data %>% 
    count(type1,bad_good) %>% 
    spread(bad_good,n) %>% 
    mutate(
        bad = ifelse(bad %in% NA, 0, bad)
        ,badcumsum  = cumsum(bad)
        ,goodcumsum = cumsum(good)
        ,percumsum  = round(badcumsum / (badcumsum + goodcumsum) * 100, 1)) %>%
  ggplot(aes(x = type1, y = percumsum, color = "red")) +
  geom_line(group = 1, size = 1) +
  geom_point(shape = 19, size = 2) +
  geom_text(aes(label = paste0(percumsum, "%")), 
            vjust = -0.5, size = 3, family = "MYH", color = "black") +
  scale_color_manual(values = fill)

这种滚动逾期率的思路我可以借鉴下,这个函数看起来没什么问题。 但是可以简化很多,代码如下。

  1. cumsum没有na.rm参数,因此需要对goodbad都执行替换,这里最简单的方式是加一条代码mutate_if(is.double,~ifelse(is.na(.),0,.)) %>%
  2. is.character替换成as.factor
data <- test_data
var <- 'bad_good'
fill <- "#1f77b4"
data %>% 
    mutate_if(is.character,as.factor) %>% 
    mutate_if(is.double,~ifelse(is.na(.),0,.)) %>% 
    count(type1,bad_good) %>% 
    spread(bad_good,n) %>% 
    mutate(
        badcumsum  = cumsum(bad)
        ,goodcumsum = cumsum(good)
        ,percumsum  = round(badcumsum / (badcumsum + goodcumsum) * 100, 1)) %>%
    ggplot(aes(x = type1, y = percumsum)) +
    geom_line(group = 1, size = 1) +
    geom_point(shape = 19, size = 2) +
    geom_text(aes(label = paste0(percumsum, "%")), 
            vjust = -0.5, size = 3, family = "MYH", color = "black") +
    scale_color_manual(values = fill)

LS0tDQp0aXRsZTogIkZ1bmN0aW9uIFJldmlldyINCmF1dGhvcjogIkppYXhpYW5nIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KYGBge3IgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShkYXRhLnRhYmxlKQ0KYGBgDQoNCuWQiOW5tuWHveaVsOWIsOS4gOS4quaWh+aho+aWueS+v+afpeeci+OAgg0KDQpgYGB7ciBldmFsPUZ9DQpsaXN0LmZpbGVzKCcxODA5MDFf6Z2z5pmT5p2+X+WHveaVsCcsZnVsbC5uYW1lcyA9IFQpICU+JSANCiAgICBhc190aWJibGUoKSAlPiUgDQogICAgbXV0YXRlKGZ1biA9IG1hcCh2YWx1ZSxyZWFkX2ZpbGUpKSAlPiUgDQogICAgdW5uZXN0KCkgJT4lIA0KICAgIC4kZnVuICU+JSANCiAgICBwYXN0ZTAoLixwYXN0ZTAocmVwKCdcbicsMyksY29sbGFwc2UgPSAnJykpICU+JSANCiAgICBzdHJfZmxhdHRlbigpICU+JSANCiAgICB3cml0ZV9maWxlKCdmdW5jdGlvbl9zdW0udHh0JykNCmBgYA0KDQrlr7zlhaXmtYvor5XmlbDmja7jgIINCg0KYGBge3J9DQp0ZXN0X2RhdGEgPC0gZnJlYWQoJzE4MDkwMV/pnbPmmZPmnb5f5rWL6K+V5pWw5o2uLmNzdicpDQpgYGANCg0KIyBgZnJlcV9zb3J0YA0KDQpgYGB7cn0NCmZyZXFfbm9zb3J0IDwtIGZ1bmN0aW9uKGRhdGEsIHZhciwgZmlsbCA9ICIjMWY3N2I0IiwgdGl0bGU9J+WPmOmHj+mikeaVsOafpeivoicpIHsNCiAgZGF0YSAlPiUNCiAgZHBseXI6OmNvdW50KGV2YWwocGFyc2UodGV4dCA9IHZhcikpKSAlPiUNCiAgZ2dwbG90KGFlcyh4ID0gYGV2YWwocGFyc2UodGV4dCA9IHZhcikpYCwgeSA9IG4pKSArDQogIGdlb21fYmFyKHN0YXQgPSAiaWRlbnRpdHkiLCBmaWxsID0gZmlsbCkgKw0KICBnZW9tX3RleHQoYWVzKGxhYmVsID0gbiksIHZqdXN0ID0gLTAuNSwgc2l6ZSA9IDMsIGZhbWlseSA9ICJNWUgiKSArDQogIGxhYnMoeCA9ICIiLCB5ID0gIiIsIHRpdGxlID0gdGl0bGUpDQogICMgZXZhbChwYXJzZSh0ZXh0ID0gbXl0aGVtZV9iKSkNCn0NCmBgYA0KDQpgYGB7cn0NCmRhdGEgPC0gdGVzdF9kYXRhDQp2YXIgPC0gJ2JhZF9nb29kJw0KZmlsbCA8LSAiIzFmNzdiNCINCnRpdGxlIDwtICflj5jph4/popHmlbDmn6Xor6InDQpkYXRhICU+JQ0KICBkcGx5cjo6Y291bnQoZXZhbChwYXJzZSh0ZXh0ID0gdmFyKSkpICU+JQ0KICBnZ3Bsb3QoYWVzKHggPSBgZXZhbChwYXJzZSh0ZXh0ID0gdmFyKSlgLCB5ID0gbikpICsNCiAgZ2VvbV9iYXIoc3RhdCA9ICJpZGVudGl0eSIsIGZpbGwgPSBmaWxsKSArDQogIGdlb21fdGV4dChhZXMobGFiZWwgPSBuKSwgdmp1c3QgPSAtMC41LCBzaXplID0gMywgZmFtaWx5ID0gIk1ZSCIpICsNCiAgbGFicyh4ID0gIiIsIHkgPSAiIiwgdGl0bGUgPSB0aXRsZSkNCiAgIyBldmFsKHBhcnNlKHRleHQgPSBteXRoZW1lX2IpKQ0KYGBgDQoNCjEuIOavj+S4qmJhcue7meaVsOaNruWAvOmdnuW4uOWlve+8jOS9huaYr+aIkeW7uuiurue7meacieaViOaVsOWtl++8jOaIluiAheeUqGAsYOWIhuW8gO+8jOingeS/ruaUueWQjuWHveaVsOOAgg0KMS4g5bu66K6uYHRpdGxlYOWSjGB0aGVtZWDnu5lgZGVmYXVsdGDlgLzvvIzkuI3nhLblvbHlk43lh73mlbDosIPnlKjjgILmr5TlpoLvvIzmr4/kuKrkurrpg73lv4XpobvopoHovpPlhaVgdGl0bGVg5ZKMYHRoZW1lYOaJjeiDveiwg+eUqOWHveaVsO+8jOWQpuWImeaKpemUme+8jOS9huaYr+i/meaYr+S4qnRyaXZpYWznmoTpl67popjjgIINCjwhLS0gMS4gYGV2YWwocGFyc2UodGV4dCA9IHZhcikpYOWPr+S7pemHjee9ru+8jOi/meagt+WBh+iuvumHjeWkjeS7o+eggeOAgiAtLT4NCjEuIGB4YOWSjGB5YOWPr+S7peS4jemcgOimgeWIoOmZpO+8jOW7uuiurue7mWRlZmF1bHTlgLzmlrnkvr/nlKjmiLforr7orqHkuKrmgKfljJbjgIINCjEuIOWFtuS7luWkp+mDqOWIhuWHveaVsOWPr+S7peaMieeFp+i/meS4quadpeS/ruaUueOAgg0KDQpgYGB7cn0NCmRhdGEgPC0gdGVzdF9kYXRhDQp2YXIgPC0gJ2JhZF9nb29kJw0KZmlsbCA8LSAiIzFmNzdiNCINCnRpdGxlIDwtICflj5jph4/popHmlbDmn6Xor6InDQpkYXRhICU+JQ0KICBkcGx5cjo6Y291bnQoZXZhbChwYXJzZSh0ZXh0ID0gdmFyKSkpICU+JQ0KICBnZ3Bsb3QoYWVzKHggPSBgZXZhbChwYXJzZSh0ZXh0ID0gdmFyKSlgLCB5ID0gbikpICsNCiAgZ2VvbV9iYXIoc3RhdCA9ICJpZGVudGl0eSIsIGZpbGwgPSBmaWxsKSArDQogIGdlb21fdGV4dChhZXMobGFiZWwgPSBuKSwgdmp1c3QgPSAtMC41LCBzaXplID0gMywgZmFtaWx5ID0gIk1ZSCIpICsNCiAgbGFicyh4ID0gIiIsIHkgPSAiIiwgdGl0bGUgPSB0aXRsZSkNCiAgIyBldmFsKHBhcnNlKHRleHQgPSBteXRoZW1lX2IpKQ0KYGBgDQoNCmBgYHtyfQ0KZnJlcV9ub3NvcnQgPC0gZnVuY3Rpb24oZGF0YSwgdmFyLCBmaWxsID0gIiMxZjc3YjQiLCB0aXRsZT0n5Y+Y6YeP6aKR5pWw5p+l6K+iJywgdGhlbWUgPSAndGhlbWVfYncoKScsIHggPSB2YXIsIHkgPSAn6aKR5pWwJykgew0KbGlicmFyeSh0aWR5dmVyc2UpICANCmxpYnJhcnkoZm9ybWF0dGFibGUpDQpkYXRhICU+JQ0KICAgIGNvdW50KGV2YWwocGFyc2UodGV4dCA9IHZhcikpKSAlPiUNCiAgICBnZ3Bsb3QoYWVzKHggPSBgZXZhbChwYXJzZSh0ZXh0ID0gdmFyKSlgLCB5ID0gbikpICsNCiAgICBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5IiwgZmlsbCA9IGZpbGwpICsNCiAgICBnZW9tX3RleHQoYWVzKGxhYmVsID0gYWNjb3VudGluZyhuLGRpZ2l0cyA9IDApKSwgdmp1c3QgPSAtMC41LCBzaXplID0gMywgZmFtaWx5ID0gIk1ZSCIpICsNCiAgICBsYWJzKHggPSB4LCB5ID0geSwgdGl0bGUgPSB0aXRsZSkgKw0KICAgIGV2YWwocGFyc2UodGV4dCA9IHRoZW1lKSkNCn0NCmBgYA0KDQpgYGB7cn0NCmZyZXFfbm9zb3J0KHRlc3RfZGF0YSwnYmFkX2dvb2QnKQ0KYGBgDQoNCg0KIyBgeHZhcl9wYXNzX2N1bXN1bV9saW5lYA0KDQpgYGB7cn0NCnh2YXJfcGFzc19jdW1zdW1fbGluZSA8LSBmdW5jdGlvbihkYXRhLCB4dmFyLCB5dmFyLCBmaWxsID0gIiMxZjc3YjQiLCB0aXRsZSkgew0KICBkYXRhICU+JSANCiAgZHBseXI6OmNvdW50KGV2YWwocGFyc2UodGV4dCA9IHh2YXIpKSwgZXZhbChwYXJzZSh0ZXh0ID0geXZhcikpKSAlPiUNCiAgc3ByZWFkKGBldmFsKHBhcnNlKHRleHQgPSB5dmFyKSlgLCBuKSAlPiUNCiAgbXV0YXRlKGJhZCA9IGlmZWxzZShiYWQgJWluJSBOQSwgMCwgYmFkKSwNCiAgICAgICAgIGJhZGN1bXN1bSAgPSBjdW1zdW0oYmFkKSwNCiAgICAgICAgIGdvb2RjdW1zdW0gPSBjdW1zdW0oZ29vZCksDQogICAgICAgICBwZXJjdW1zdW0gID0gcm91bmQoYmFkY3Vtc3VtIC8gKGJhZGN1bXN1bSArIGdvb2RjdW1zdW0pICogMTAwLCAxKSkgJT4lDQogIGdncGxvdChhZXMoeCA9IGBldmFsKHBhcnNlKHRleHQgPSB4dmFyKSlgLCB5ID0gcGVyY3Vtc3VtLCBjb2xvciA9ICJyZWQiKSkgKw0KICBnZW9tX2xpbmUoZ3JvdXAgPSAxLCBzaXplID0gMSkgKw0KICBnZW9tX3BvaW50KHNoYXBlID0gMTksIHNpemUgPSAyKSArDQogIGdlb21fdGV4dChhZXMobGFiZWwgPSBwYXN0ZTAocGVyY3Vtc3VtLCAiJSIpKSwgDQogICAgICAgICAgICB2anVzdCA9IC0wLjUsIHNpemUgPSAzLCBmYW1pbHkgPSAiTVlIIiwgY29sb3IgPSAiYmxhY2siKSArDQogIHNjYWxlX2NvbG9yX21hbnVhbCh2YWx1ZXMgPSBmaWxsKSArDQogIGxhYnMoeCA9ICIiLCB5ID0gIiIsIHRpdGxlID0gdGl0bGUpICsNCiAgZXZhbChwYXJzZSh0ZXh0ID0gbXl0aGVtZV9iKSkgKw0KICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZSA9IDYwLCBoanVzdCA9IDEpLCBsZWdlbmQucG9zaXRpb24gPSAibm9uZSIpDQp9DQpgYGANCg0KYGBge3J9DQpkYXRhIDwtIHRlc3RfZGF0YQ0KdmFyIDwtICdiYWRfZ29vZCcNCmZpbGwgPC0gIiMxZjc3YjQiDQpkYXRhICU+JSANCiAgICBjb3VudCh0eXBlMSxiYWRfZ29vZCkgJT4lIA0KICAgIHNwcmVhZChiYWRfZ29vZCxuKSAlPiUgDQogICAgbXV0YXRlKA0KICAgICAgICBiYWQgPSBpZmVsc2UoYmFkICVpbiUgTkEsIDAsIGJhZCkNCiAgICAgICAgLGJhZGN1bXN1bSAgPSBjdW1zdW0oYmFkKQ0KICAgICAgICAsZ29vZGN1bXN1bSA9IGN1bXN1bShnb29kKQ0KICAgICAgICAscGVyY3Vtc3VtICA9IHJvdW5kKGJhZGN1bXN1bSAvIChiYWRjdW1zdW0gKyBnb29kY3Vtc3VtKSAqIDEwMCwgMSkpICU+JQ0KICBnZ3Bsb3QoYWVzKHggPSB0eXBlMSwgeSA9IHBlcmN1bXN1bSwgY29sb3IgPSAicmVkIikpICsNCiAgZ2VvbV9saW5lKGdyb3VwID0gMSwgc2l6ZSA9IDEpICsNCiAgZ2VvbV9wb2ludChzaGFwZSA9IDE5LCBzaXplID0gMikgKw0KICBnZW9tX3RleHQoYWVzKGxhYmVsID0gcGFzdGUwKHBlcmN1bXN1bSwgIiUiKSksIA0KICAgICAgICAgICAgdmp1c3QgPSAtMC41LCBzaXplID0gMywgZmFtaWx5ID0gIk1ZSCIsIGNvbG9yID0gImJsYWNrIikgKw0KICBzY2FsZV9jb2xvcl9tYW51YWwodmFsdWVzID0gZmlsbCkNCmBgYA0KDQrov5nnp43mu5rliqjpgL7mnJ/njofnmoTmgJ3ot6/miJHlj6/ku6XlgJ/pibTkuIvvvIzov5nkuKrlh73mlbDnnIvotbfmnaXmsqHku4DkuYjpl67popjjgIINCuS9huaYr+WPr+S7peeugOWMluW+iOWkmu+8jOS7o+eggeWmguS4i+OAgg0KDQoxLiBgY3Vtc3VtYOayoeaciWBuYS5ybWDlj4LmlbDvvIzlm6DmraTpnIDopoHlr7lgZ29vZGDlkoxgYmFkYOmDveaJp+ihjOabv+aNou+8jOi/memHjOacgOeugOWNleeahOaWueW8j+aYr+WKoOS4gOadoeS7o+eggWBtdXRhdGVfaWYoaXMuZG91YmxlLH5pZmVsc2UoaXMubmEoLiksMCwuKSkgJT4lIGANCjEuIGBpcy5jaGFyYWN0ZXJg5pu/5o2i5oiQYGFzLmZhY3RvcmANCg0KYGBge3J9DQpkYXRhIDwtIHRlc3RfZGF0YQ0KdmFyIDwtICdiYWRfZ29vZCcNCmZpbGwgPC0gIiMxZjc3YjQiDQpkYXRhICU+JSANCiAgICBtdXRhdGVfaWYoaXMuY2hhcmFjdGVyLGFzLmZhY3RvcikgJT4lIA0KICAgIG11dGF0ZV9pZihpcy5kb3VibGUsfmlmZWxzZShpcy5uYSguKSwwLC4pKSAlPiUgDQogICAgY291bnQodHlwZTEsYmFkX2dvb2QpICU+JSANCiAgICBzcHJlYWQoYmFkX2dvb2QsbikgJT4lIA0KICAgIG11dGF0ZSgNCiAgICAgICAgYmFkY3Vtc3VtICA9IGN1bXN1bShiYWQpDQogICAgICAgICxnb29kY3Vtc3VtID0gY3Vtc3VtKGdvb2QpDQogICAgICAgICxwZXJjdW1zdW0gID0gcm91bmQoYmFkY3Vtc3VtIC8gKGJhZGN1bXN1bSArIGdvb2RjdW1zdW0pICogMTAwLCAxKSkgJT4lDQogICAgZ2dwbG90KGFlcyh4ID0gdHlwZTEsIHkgPSBwZXJjdW1zdW0pKSArDQogICAgZ2VvbV9saW5lKGdyb3VwID0gMSwgc2l6ZSA9IDEpICsNCiAgICBnZW9tX3BvaW50KHNoYXBlID0gMTksIHNpemUgPSAyKSArDQogICAgZ2VvbV90ZXh0KGFlcyhsYWJlbCA9IHBhc3RlMChwZXJjdW1zdW0sICIlIikpLCANCiAgICAgICAgICAgIHZqdXN0ID0gLTAuNSwgc2l6ZSA9IDMsIGZhbWlseSA9ICJNWUgiLCBjb2xvciA9ICJibGFjayIpICsNCiAgICBzY2FsZV9jb2xvcl9tYW51YWwodmFsdWVzID0gZmlsbCkNCmBgYA0KDQo=