There are three kind of variables

  1. factor variable
  2. continous variable
  3. binary variable
    data type is dbl in data.frame but there are only two distinct value (like 1,0).
library(tidyverse)
#> ─ Attaching packages ────────────────────────────────────── tidyverse 1.2.1 ─
#> ✔ ggplot2 3.0.0     ✔ purrr   0.2.5
#> ✔ tibble  1.4.2     ✔ dplyr   0.7.6
#> ✔ tidyr   0.8.1     ✔ stringr 1.3.1
#> ✔ readr   1.1.1     ✔ forcats 0.3.0
#> ─ Conflicts ──────────────────────────────────────── tidyverse_conflicts() ─
#> ✖ dplyr::filter() masks stats::filter()
#> ✖ dplyr::lag()    masks stats::lag()
set.seed(123)
sample_data <- 
cbind(
    matrix(
        c(
            1:100
        )
        ,nrow = 10
        ,byrow = F
    )
    ,matrix(
        c(
            letters[1:20]
        )
        ,nrow = 10
        ,byrow = T
    )
    ,matrix(
        c(
            rep(0:1,5)
        )
        ,nrow = 10
        ,byrow = T
    )
) %>% 
    `colnames<-`(paste0('v',1:13)) %>% 
    `rownames<-`(1:10) %>% 
    as.data.frame() %>% 
    mutate_at(vars(1:10,13),as.integer) %>% 
    mutate_at(vars(1:10),~cut_number(.,3))
sample_data
#>        v1     v2     v3     v4     v5     v6     v7     v8     v9    v10
#> 1   [1,4]  [1,4]  [1,4]  [1,4]  [1,4]  [1,4]  [1,4]  [1,4]  [1,4]  [1,4]
#> 2   [1,4]  [1,4]  [1,4]  [1,4]  [1,4]  [1,4]  [1,4]  [1,4]  [1,4]  [1,4]
#> 3   [1,4]  [1,4]  [1,4]  [1,4]  [1,4]  [1,4]  [1,4]  [1,4]  [1,4]  [1,4]
#> 4   (4,7]  [1,4]  [1,4]  [1,4]  [1,4]  [1,4]  [1,4]  [1,4]  [1,4]  (4,7]
#> 5   (4,7]  (4,7]  (4,7]  (4,7]  (4,7]  (4,7]  (4,7]  (4,7]  (4,7]  (4,7]
#> 6   (4,7]  (4,7]  (4,7]  (4,7]  (4,7]  (4,7]  (4,7]  (4,7]  (4,7]  (4,7]
#> 7  (7,10]  (4,7]  (4,7]  (4,7]  (4,7]  (4,7]  (4,7]  (4,7]  (4,7] (7,10]
#> 8  (7,10] (7,10] (7,10] (7,10] (7,10] (7,10] (7,10] (7,10] (7,10] (7,10]
#> 9  (7,10] (7,10] (7,10] (7,10] (7,10] (7,10] (7,10] (7,10] (7,10] (7,10]
#> 10  [1,4] (7,10] (7,10] (7,10] (7,10] (7,10] (7,10] (7,10] (7,10]  [1,4]
#>    v11 v12 v13
#> 1    a   b   1
#> 2    c   d   2
#> 3    e   f   1
#> 4    g   h   2
#> 5    i   j   1
#> 6    k   l   2
#> 7    m   n   1
#> 8    o   p   2
#> 9    q   r   1
#> 10   s   t   2
library(caret)
#> 载入需要的程辑包:lattice
#> 
#> 载入程辑包:'caret'
#> The following object is masked from 'package:purrr':
#> 
#>     lift
sample_data_after_onehot <- 
    sample_data %>% 
    predict(dummyVars(~.,data=.),newdata =.) %>%
    as.data.frame()
sample_data_after_onehot
#>    v1.[1,4] v1.(4,7] v1.(7,10] v2.[1,4] v2.(4,7] v2.(7,10] v3.[1,4]
#> 1         1        0         0        1        0         0        1
#> 2         1        0         0        1        0         0        1
#> 3         1        0         0        1        0         0        1
#> 4         0        1         0        1        0         0        1
#> 5         0        1         0        0        1         0        0
#> 6         0        1         0        0        1         0        0
#> 7         0        0         1        0        1         0        0
#> 8         0        0         1        0        0         1        0
#> 9         0        0         1        0        0         1        0
#> 10        1        0         0        0        0         1        0
#>    v3.(4,7] v3.(7,10] v4.[1,4] v4.(4,7] v4.(7,10] v5.[1,4] v5.(4,7]
#> 1         0         0        1        0         0        1        0
#> 2         0         0        1        0         0        1        0
#> 3         0         0        1        0         0        1        0
#> 4         0         0        1        0         0        1        0
#> 5         1         0        0        1         0        0        1
#> 6         1         0        0        1         0        0        1
#> 7         1         0        0        1         0        0        1
#> 8         0         1        0        0         1        0        0
#> 9         0         1        0        0         1        0        0
#> 10        0         1        0        0         1        0        0
#>    v5.(7,10] v6.[1,4] v6.(4,7] v6.(7,10] v7.[1,4] v7.(4,7] v7.(7,10]
#> 1          0        1        0         0        1        0         0
#> 2          0        1        0         0        1        0         0
#> 3          0        1        0         0        1        0         0
#> 4          0        1        0         0        1        0         0
#> 5          0        0        1         0        0        1         0
#> 6          0        0        1         0        0        1         0
#> 7          0        0        1         0        0        1         0
#> 8          1        0        0         1        0        0         1
#> 9          1        0        0         1        0        0         1
#> 10         1        0        0         1        0        0         1
#>    v8.[1,4] v8.(4,7] v8.(7,10] v9.[1,4] v9.(4,7] v9.(7,10] v10.[1,4]
#> 1         1        0         0        1        0         0         1
#> 2         1        0         0        1        0         0         1
#> 3         1        0         0        1        0         0         1
#> 4         1        0         0        1        0         0         0
#> 5         0        1         0        0        1         0         0
#> 6         0        1         0        0        1         0         0
#> 7         0        1         0        0        1         0         0
#> 8         0        0         1        0        0         1         0
#> 9         0        0         1        0        0         1         0
#> 10        0        0         1        0        0         1         1
#>    v10.(4,7] v10.(7,10] v11.a v11.c v11.e v11.g v11.i v11.k v11.m v11.o
#> 1          0          0     1     0     0     0     0     0     0     0
#> 2          0          0     0     1     0     0     0     0     0     0
#> 3          0          0     0     0     1     0     0     0     0     0
#> 4          1          0     0     0     0     1     0     0     0     0
#> 5          1          0     0     0     0     0     1     0     0     0
#> 6          1          0     0     0     0     0     0     1     0     0
#> 7          0          1     0     0     0     0     0     0     1     0
#> 8          0          1     0     0     0     0     0     0     0     1
#> 9          0          1     0     0     0     0     0     0     0     0
#> 10         0          0     0     0     0     0     0     0     0     0
#>    v11.q v11.s v12.b v12.d v12.f v12.h v12.j v12.l v12.n v12.p v12.r v12.t
#> 1      0     0     1     0     0     0     0     0     0     0     0     0
#> 2      0     0     0     1     0     0     0     0     0     0     0     0
#> 3      0     0     0     0     1     0     0     0     0     0     0     0
#> 4      0     0     0     0     0     1     0     0     0     0     0     0
#> 5      0     0     0     0     0     0     1     0     0     0     0     0
#> 6      0     0     0     0     0     0     0     1     0     0     0     0
#> 7      0     0     0     0     0     0     0     0     1     0     0     0
#> 8      0     0     0     0     0     0     0     0     0     1     0     0
#> 9      1     0     0     0     0     0     0     0     0     0     1     0
#> 10     0     1     0     0     0     0     0     0     0     0     0     1
#>    v13
#> 1    1
#> 2    2
#> 3    1
#> 4    2
#> 5    1
#> 6    2
#> 7    1
#> 8    2
#> 9    1
#> 10   2
library(irlba)
#> 载入需要的程辑包:Matrix
#> 
#> 载入程辑包:'Matrix'
#> The following object is masked from 'package:tidyr':
#> 
#>     expand
pca_score <- prcomp_irlba(sample_data_after_onehot,n=2,center = T,scale. = F)
  1. sample_data is discretized for all continuous variable by cut_number.
  2. Here I use prcomp_irlba instead of prcomp partly because prcomp is hard to train big data.
  3. dummyVars function I use is to one-hot encoding the category variables.
r_output <- 
    pca_score$rotation %>% 
    `rownames<-`(sample_data_after_onehot %>% names) %>% 
    as.data.frame() %>% 
    rownames_to_column('variable')
r_output
#>      variable          PC1           PC2
#> 1    v1.[1,4]  0.181584194 -0.0767965148
#> 2    v1.(4,7] -0.036772084  0.1760748168
#> 3   v1.(7,10] -0.144812109 -0.0992783020
#> 4    v2.[1,4]  0.262610278  0.0202091466
#> 5    v2.(4,7] -0.145356978  0.2172683960
#> 6   v2.(7,10] -0.117253300 -0.2374775426
#> 7    v3.[1,4]  0.262610278  0.0202091466
#> 8    v3.(4,7] -0.145356978  0.2172683960
#> 9   v3.(7,10] -0.117253300 -0.2374775426
#> 10   v4.[1,4]  0.262610278  0.0202091466
#> 11   v4.(4,7] -0.145356978  0.2172683960
#> 12  v4.(7,10] -0.117253300 -0.2374775426
#> 13   v5.[1,4]  0.262610278  0.0202091466
#> 14   v5.(4,7] -0.145356978  0.2172683960
#> 15  v5.(7,10] -0.117253300 -0.2374775426
#> 16   v6.[1,4]  0.262610278  0.0202091466
#> 17   v6.(4,7] -0.145356978  0.2172683960
#> 18  v6.(7,10] -0.117253300 -0.2374775426
#> 19   v7.[1,4]  0.262610278  0.0202091466
#> 20   v7.(4,7] -0.145356978  0.2172683960
#> 21  v7.(7,10] -0.117253300 -0.2374775426
#> 22   v8.[1,4]  0.262610278  0.0202091466
#> 23   v8.(4,7] -0.145356978  0.2172683960
#> 24  v8.(7,10] -0.117253300 -0.2374775426
#> 25   v9.[1,4]  0.262610278  0.0202091466
#> 26   v9.(4,7] -0.145356978  0.2172683960
#> 27  v9.(7,10] -0.117253300 -0.2374775426
#> 28  v10.[1,4]  0.181584194 -0.0767965148
#> 29  v10.(4,7] -0.036772084  0.1760748168
#> 30 v10.(7,10] -0.144812109 -0.0992783020
#> 31      v11.a  0.068936220  0.0014917292
#> 32      v11.c  0.069168005 -0.0009421594
#> 33      v11.e  0.068936220  0.0014917292
#> 34      v11.g  0.055569833  0.0181678477
#> 35      v11.i -0.046286851  0.0801704289
#> 36      v11.k -0.046055067  0.0777365403
#> 37      v11.m -0.053015061  0.0593614268
#> 38      v11.o -0.045782632 -0.0805368087
#> 39      v11.q -0.046014417 -0.0781029201
#> 40      v11.s -0.025456251 -0.0788378138
#> 41      v12.b  0.068936220  0.0014917292
#> 42      v12.d  0.069168005 -0.0009421594
#> 43      v12.f  0.068936220  0.0014917292
#> 44      v12.h  0.055569833  0.0181678477
#> 45      v12.j -0.046286851  0.0801704289
#> 46      v12.l -0.046055067  0.0777365403
#> 47      v12.n -0.053015061  0.0593614268
#> 48      v12.p -0.045782632 -0.0805368087
#> 49      v12.r -0.046014417 -0.0781029201
#> 50      v12.t -0.025456251 -0.0788378138
#> 51        v13  0.007443888 -0.0644123940

r_output is what I get when I finish the pca model.

Now, we recode it in SQL style.

library(add2deployment)
trans_inSQL(input=r_output)
#> Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [51].
#> 
#> 载入程辑包:'glue'
#> The following object is masked from 'package:dplyr':
#> 
#>     collapse
#> Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
#> their attributes
#> Warning in bind_rows_(x, .id): Vectorizing 'glue' elements may not preserve
#> their attributes
#> select user_id
#> 
#> 
#> ,
#> 
#> 
#> ifnull(cast((v1>[1 and v1<=4) as double),0)*0.181584193848469+
#> ifnull(cast((v1>4 and v1<=7) as double),0)*-0.0367720844560349+
#> ifnull(cast((v1>7 and v1<=10) as double),0)*-0.144812109392434+
#> ifnull(cast((v2>[1 and v2<=4) as double),0)*0.262610278076872+
#> ifnull(cast((v2>4 and v2<=7) as double),0)*-0.145356978354161+
#> ifnull(cast((v2>7 and v2<=10) as double),0)*-0.117253299722711+
#> ifnull(cast((v3>[1 and v3<=4) as double),0)*0.262610278076872+
#> ifnull(cast((v3>4 and v3<=7) as double),0)*-0.145356978354161+
#> ifnull(cast((v3>7 and v3<=10) as double),0)*-0.117253299722711+
#> ifnull(cast((v4>[1 and v4<=4) as double),0)*0.262610278076872+
#> ifnull(cast((v4>4 and v4<=7) as double),0)*-0.145356978354161+
#> ifnull(cast((v4>7 and v4<=10) as double),0)*-0.117253299722711+
#> ifnull(cast((v5>[1 and v5<=4) as double),0)*0.262610278076872+
#> ifnull(cast((v5>4 and v5<=7) as double),0)*-0.145356978354161+
#> ifnull(cast((v5>7 and v5<=10) as double),0)*-0.117253299722711+
#> ifnull(cast((v6>[1 and v6<=4) as double),0)*0.262610278076872+
#> ifnull(cast((v6>4 and v6<=7) as double),0)*-0.145356978354161+
#> ifnull(cast((v6>7 and v6<=10) as double),0)*-0.117253299722711+
#> ifnull(cast((v7>[1 and v7<=4) as double),0)*0.262610278076872+
#> ifnull(cast((v7>4 and v7<=7) as double),0)*-0.145356978354161+
#> ifnull(cast((v7>7 and v7<=10) as double),0)*-0.117253299722711+
#> ifnull(cast((v8>[1 and v8<=4) as double),0)*0.262610278076872+
#> ifnull(cast((v8>4 and v8<=7) as double),0)*-0.145356978354161+
#> ifnull(cast((v8>7 and v8<=10) as double),0)*-0.117253299722711+
#> ifnull(cast((v9>[1 and v9<=4) as double),0)*0.262610278076872+
#> ifnull(cast((v9>4 and v9<=7) as double),0)*-0.145356978354161+
#> ifnull(cast((v9>7 and v9<=10) as double),0)*-0.117253299722711+
#> ifnull(cast((v10>[1 and v10<=4) as double),0)*0.181584193848469+
#> ifnull(cast((v10>4 and v10<=7) as double),0)*-0.0367720844560348+
#> ifnull(cast((v10>7 and v10<=10) as double),0)*-0.144812109392434+
#> ifnull(cast(v11='a' as double),0)*0.0689362200661296+
#> ifnull(cast(v11='c' as double),0)*0.0691680047254186+
#> ifnull(cast(v11='e' as double),0)*0.0689362200661296+
#> ifnull(cast(v11='g' as double),0)*0.0555698332191943+
#> ifnull(cast(v11='i' as double),0)*-0.0462868511672591+
#> ifnull(cast(v11='k' as double),0)*-0.0460550665079701+
#> ifnull(cast(v11='m' as double),0)*-0.0530150606789322+
#> ifnull(cast(v11='o' as double),0)*-0.0457826320271065+
#> ifnull(cast(v11='q' as double),0)*-0.0460144166863955+
#> ifnull(cast(v11='s' as double),0)*-0.025456251009209+
#> ifnull(cast(v12='b' as double),0)*0.0689362200661297+
#> ifnull(cast(v12='d' as double),0)*0.0691680047254186+
#> ifnull(cast(v12='f' as double),0)*0.0689362200661297+
#> ifnull(cast(v12='h' as double),0)*0.0555698332191943+
#> ifnull(cast(v12='j' as double),0)*-0.0462868511672591+
#> ifnull(cast(v12='l' as double),0)*-0.0460550665079701+
#> ifnull(cast(v12='n' as double),0)*-0.0530150606789322+
#> ifnull(cast(v12='p' as double),0)*-0.0457826320271065+
#> ifnull(cast(v12='r' as double),0)*-0.0460144166863955+
#> ifnull(cast(v12='t' as double),0)*-0.025456251009209 as pc1,
#> 
#> 
#> ifnull(cast((v1>[1 and v1<=4) as double),0)*-0.0767965148209343+
#> ifnull(cast((v1>4 and v1<=7) as double),0)*0.176074816813784+
#> ifnull(cast((v1>7 and v1<=10) as double),0)*-0.0992783019928502+
#> ifnull(cast((v2>[1 and v2<=4) as double),0)*0.0202091466372069+
#> ifnull(cast((v2>4 and v2<=7) as double),0)*0.217268395959826+
#> ifnull(cast((v2>7 and v2<=10) as double),0)*-0.237477542597033+
#> ifnull(cast((v3>[1 and v3<=4) as double),0)*0.0202091466372069+
#> ifnull(cast((v3>4 and v3<=7) as double),0)*0.217268395959826+
#> ifnull(cast((v3>7 and v3<=10) as double),0)*-0.237477542597033+
#> ifnull(cast((v4>[1 and v4<=4) as double),0)*0.0202091466372069+
#> ifnull(cast((v4>4 and v4<=7) as double),0)*0.217268395959826+
#> ifnull(cast((v4>7 and v4<=10) as double),0)*-0.237477542597033+
#> ifnull(cast((v5>[1 and v5<=4) as double),0)*0.0202091466372069+
#> ifnull(cast((v5>4 and v5<=7) as double),0)*0.217268395959826+
#> ifnull(cast((v5>7 and v5<=10) as double),0)*-0.237477542597033+
#> ifnull(cast((v6>[1 and v6<=4) as double),0)*0.0202091466372069+
#> ifnull(cast((v6>4 and v6<=7) as double),0)*0.217268395959826+
#> ifnull(cast((v6>7 and v6<=10) as double),0)*-0.237477542597033+
#> ifnull(cast((v7>[1 and v7<=4) as double),0)*0.0202091466372069+
#> ifnull(cast((v7>4 and v7<=7) as double),0)*0.217268395959826+
#> ifnull(cast((v7>7 and v7<=10) as double),0)*-0.237477542597033+
#> ifnull(cast((v8>[1 and v8<=4) as double),0)*0.0202091466372069+
#> ifnull(cast((v8>4 and v8<=7) as double),0)*0.217268395959826+
#> ifnull(cast((v8>7 and v8<=10) as double),0)*-0.237477542597033+
#> ifnull(cast((v9>[1 and v9<=4) as double),0)*0.0202091466372069+
#> ifnull(cast((v9>4 and v9<=7) as double),0)*0.217268395959826+
#> ifnull(cast((v9>7 and v9<=10) as double),0)*-0.237477542597033+
#> ifnull(cast((v10>[1 and v10<=4) as double),0)*-0.0767965148209343+
#> ifnull(cast((v10>4 and v10<=7) as double),0)*0.176074816813785+
#> ifnull(cast((v10>7 and v10<=10) as double),0)*-0.0992783019928501+
#> ifnull(cast(v11='a' as double),0)*0.00149172918719605+
#> ifnull(cast(v11='c' as double),0)*-0.000942159419657452+
#> ifnull(cast(v11='e' as double),0)*0.00149172918719604+
#> ifnull(cast(v11='g' as double),0)*0.0181678476824723+
#> ifnull(cast(v11='i' as double),0)*0.0801704288690829+
#> ifnull(cast(v11='k' as double),0)*0.0777365402622294+
#> ifnull(cast(v11='m' as double),0)*0.0593614268285136+
#> ifnull(cast(v11='o' as double),0)*-0.0805368087141086+
#> ifnull(cast(v11='q' as double),0)*-0.0781029201072551+
#> ifnull(cast(v11='s' as double),0)*-0.078837813775669+
#> ifnull(cast(v12='b' as double),0)*0.00149172918719604+
#> ifnull(cast(v12='d' as double),0)*-0.000942159419657456+
#> ifnull(cast(v12='f' as double),0)*0.00149172918719604+
#> ifnull(cast(v12='h' as double),0)*0.0181678476824723+
#> ifnull(cast(v12='j' as double),0)*0.0801704288690829+
#> ifnull(cast(v12='l' as double),0)*0.0777365402622293+
#> ifnull(cast(v12='n' as double),0)*0.0593614268285135+
#> ifnull(cast(v12='p' as double),0)*-0.0805368087141086+
#> ifnull(cast(v12='r' as double),0)*-0.0781029201072551+
#> ifnull(cast(v12='t' as double),0)*-0.078837813775669 as pc2
#> 
#> 
#> from yourdataset