select 
    content
from t181115_subsetmainsms_ljx
order by rand(123)
limit 100
library(rebus)
or('http','cn')
## <regex> (?:http|cn)
START %R%
    '【' %R%
    one_or_more(ANY_CHAR) %R%
    '】'
## <regex> ^【[.]+】
'额度'
## [1] "额度"
'\\d{4,}'
## [1] "\\d{4,}"
(?:http|cn)
select content
from xyjl.t181115_subsetmainsms_ljx
where
    regexp_like(content,'(?:http|cn)')
    and regexp_like(content,'额度')
    and regexp_like(content,'\\d{4,}')
    and !regexp_like(content,'审核')
order by rand(123)
select 
    avg(
        regexp_like(content,'(?:http|cn)')
        and regexp_like(content,'额度')
        and regexp_like(content,'\\d{4,}')
        -- and !regexp_like(content,'审核')
        -- 干扰很少
    )
from xyjl.t181115_subsetmainsms_ljx
之后处理短信的规则
avg(A and B and C and !D) > 5%rand(123) 100个,查看准确率avg(A and B and C and !D and E) > 5%