tutoring2

正则化

李家翔 2019-03-16

## 需求: 从V1中按照标识字符去提取某些内容,衍生两列
# 返回AY1后的第3-4位字符,列名为A
# 返回MW1后的第1位字符,  列名为B
#### 注意:若AY1前的第一个字符是字符则返回NA
        #  若MW1前的第一个字符是字符则返回NA
## 比如其中某段字符是"AA222AY10001"则返回"01";若该段字符是"AAAY10001"则返回NA;
suppressMessages(library(tidyverse))
s <- "0141084950999991978010100004+36150-005350SY-MT+0005LXGB V0200901N01131030001CN0090001N1+01501+01301101811ADDAA199000091AY121999GA1031+004209059GA2061+030009039GF106991031051004501071999MA1101801999999MD1210051+9999MW1051REMSYN017700// 83614 86360"
get_ay1 <- function(s){
    if (str_detect(s,"\\dAY1")) {
        return(NA)
    } else {
        str_match(s,"AY1.{2}(\\d{2})")[2]
    }
}
get_mw1 <- function(s){
    if (str_detect(s,"\\dMW1")) {
        return(NA)
    } else {
        str_match(s,"MW1(\\d)")[2]
    }
}
data.frame(s = s) %>% 
    mutate(
        ay1 = map(s,get_ay1)
        ,mw1 = map(s,get_mw1)
    ) %>% 
    unnest()
##                                                                                                                                                                                                                                                        s
## 1 0141084950999991978010100004+36150-005350SY-MT+0005LXGB V0200901N01131030001CN0090001N1+01501+01301101811ADDAA199000091AY121999GA1031+004209059GA2061+030009039GF106991031051004501071999MA1101801999999MD1210051+9999MW1051REMSYN017700// 83614 86360
##   ay1 mw1
## 1  NA  NA