library(add2xgb)
library(xgboost)
library(tidyverse)
library(whisker)
model_trees <- jsonlite::fromJSON(
    xgb.dump(xgb_model, with_stats = FALSE, dump_format='json'), 
    simplifyDataFrame = FALSE)
feature_dict <- as.list(xgb_model$feature_names)

WHEN {{{split_long}}} < {{{split_condition}}} THEN {{{yes_sql}}}主要看 dump 文档,这里是小于符号 q的书写保证了每个距离差了一\n

library(whisker)
queries <- xgb_sql_score_query(
    model_trees, 
    'mtcars',
    feature_dict
)
queries
#> [1] "\n    SELECT\n      id,\n      1/(1+exp(-1*( \n      CASE\n        WHEN mpg IS NULL THEN -0.1\n        WHEN mpg < 20.1000004 THEN -0.1\n        ELSE 0.02222\n      END\n     + \n      CASE\n        WHEN mpg IS NULL THEN -0.10095\n        WHEN mpg < 19.4500008 THEN -0.10095\n        ELSE 0.11127\n      END\n     + \n      CASE\n        WHEN mpg IS NULL THEN -0.10054\n        WHEN mpg < 20.1000004 THEN -0.10054\n        ELSE 0.12482\n      END\n     + \n      CASE\n        WHEN mpg IS NULL THEN -0.10979\n        WHEN mpg < 19.2000008 THEN -0.10979\n        ELSE 0.02829\n      END\n     + \n      CASE\n        WHEN disp IS NULL THEN 0.05118\n        WHEN disp < 145.850006 THEN 0.05118\n        ELSE -0.10762\n      END\n     + \n      CASE\n        WHEN drat IS NULL THEN -0.08599\n        WHEN drat < 3.38499999 THEN -0.08599\n        ELSE \n      CASE\n        WHEN mpg IS NULL THEN 0.0703\n        WHEN mpg < 22.0999985 THEN 0.0703\n        ELSE 0.00544\n      END\n    \n      END\n     + \n      CASE\n        WHEN disp IS NULL THEN 0.10946\n        WHEN disp < 145.850006 THEN 0.10946\n        ELSE -0.11944\n      END\n     + \n      CASE\n        WHEN drat IS NULL THEN -0.08555\n        WHEN drat < 3.38499999 THEN -0.08555\n        ELSE \n      CASE\n        WHEN drat IS NULL THEN 0.10667\n        WHEN drat < 3.91000009 THEN 0.10667\n        ELSE 0.01969\n      END\n    \n      END\n     + \n      CASE\n        WHEN drat IS NULL THEN \n      CASE\n        WHEN mpg IS NULL THEN -0.02959\n        WHEN mpg < 16.25 THEN -0.02959\n        ELSE -0.10502\n      END\n    \n        WHEN drat < 3.7750001 THEN \n      CASE\n        WHEN mpg IS NULL THEN -0.02959\n        WHEN mpg < 16.25 THEN -0.02959\n        ELSE -0.10502\n      END\n    \n        ELSE 0.11573\n      END\n     + \n      CASE\n        WHEN mpg IS NULL THEN -0.10285\n        WHEN mpg < 20.1000004 THEN -0.10285\n        ELSE 0.04192\n      END\n     + \n      CASE\n        WHEN mpg IS NULL THEN -0.09612\n        WHEN mpg < 19.4500008 THEN -0.09612\n        ELSE 0.04053\n      END\n     + \n      CASE\n        WHEN drat IS NULL THEN -0.09284\n        WHEN drat < 3.73000002 THEN -0.09284\n        ELSE 0.08885\n      END\n     + \n      CASE\n        WHEN drat IS NULL THEN -0.07739\n        WHEN drat < 3.7750001 THEN -0.07739\n        ELSE 0.04401\n      END\n     + \n      CASE\n        WHEN drat IS NULL THEN -0.04462\n        WHEN drat < 3.75 THEN -0.04462\n        ELSE 0.05049\n      END\n     + \n      CASE\n        WHEN drat IS NULL THEN -0.06459\n        WHEN drat < 3.78999996 THEN -0.06459\n        ELSE 0.02993\n      END\n     + \n      CASE\n        WHEN drat IS NULL THEN \n      CASE\n        WHEN disp IS NULL THEN -0.00148\n        WHEN disp < 163.800003 THEN -0.00148\n        ELSE -0.08919\n      END\n    \n        WHEN drat < 4 THEN \n      CASE\n        WHEN disp IS NULL THEN -0.00148\n        WHEN disp < 163.800003 THEN -0.00148\n        ELSE -0.08919\n      END\n    \n        ELSE 0.08535\n      END\n     ))) AS score\n    -- FROM `mtcars`\n    FROM mtcars\n  "
queries %>% write_file("mtcars_model_code.sql")