14. Train LGBM
In [1]:
Copied!
import featurebyte as fb
import numpy as np
import pandas as pd
from itertools import product
from sklearn.metrics import roc_auc_score
from modeling_script import LightGBMPipeline, Objective, Metric
from typing import Optional
import featurebyte as fb
import numpy as np
import pandas as pd
from itertools import product
from sklearn.metrics import roc_auc_score
from modeling_script import LightGBMPipeline, Objective, Metric
from typing import Optional
14:05:16 | INFO | SDK version: 3.0.1.dev45 INFO :featurebyte:SDK version: 3.0.1.dev45 14:05:16 | INFO | No catalog activated. INFO :featurebyte:No catalog activated.
Activate Catalog¶
In [2]:
Copied!
# Set your profile to the tutorial environment
fb.use_profile("tutorial")
catalog_name = "Loan Applications Dataset SDK Tutorial"
catalog = fb.Catalog.activate(catalog_name)
# Set your profile to the tutorial environment
fb.use_profile("tutorial")
catalog_name = "Loan Applications Dataset SDK Tutorial"
catalog = fb.Catalog.activate(catalog_name)
14:05:34 | INFO | Using profile: tutorial INFO :featurebyte:Using profile: tutorial 14:05:34 | INFO | Using configuration file at: /Users/gxav/.featurebyte/config.yaml INFO :featurebyte:Using configuration file at: /Users/gxav/.featurebyte/config.yaml 14:05:34 | INFO | Active profile: tutorial (https://tutorials.featurebyte.com/api/v1) INFO :featurebyte:Active profile: tutorial (https://tutorials.featurebyte.com/api/v1) 14:05:34 | INFO | SDK version: 3.0.1.dev45 INFO :featurebyte:SDK version: 3.0.1.dev45 14:05:34 | INFO | No catalog activated. INFO :featurebyte:No catalog activated. 14:05:34 | INFO | Catalog activated: Loan Applications Dataset SDK Tutorial INFO :featurebyte.api.catalog:Catalog activated: Loan Applications Dataset SDK Tutorial
Get Training and Holdout data¶
In [3]:
Copied!
catalog.list_historical_feature_tables()
catalog.list_historical_feature_tables()
Out[3]:
| id | name | feature_store_name | observation_table_name | shape | created_at | |
|---|---|---|---|---|---|---|
| 0 | 683d3c98954f0aa89942addd | 40 features for Loan Applications - TRAIN | playground | Applications up to Sept 2024 with Loan Defaults | [303270, 43] | 2025-06-02T05:56:31.688000 |
In [4]:
Copied!
training_data_table = catalog.get_historical_feature_table("40 features for Loan Applications - TRAIN")
training_data_table = catalog.get_historical_feature_table("40 features for Loan Applications - TRAIN")
In [5]:
Copied!
# download as pandas data frame
feature_data = training_data_table.to_pandas()
# download as pandas data frame
feature_data = training_data_table.to_pandas()
Downloading table |████████████████████████████████████████| 303270/303270 [100%
In [6]:
Copied!
training_from = "2018-11-01 00:00"
training_to = "2024-04-01 00:00"
validation_from = "2024-04-01 00:00"
validation_to = "2024-10-01 00:00"
feature_data["POINT_IN_TIME"] = pd.to_datetime(feature_data["POINT_IN_TIME"])
cond = (
(feature_data["POINT_IN_TIME"] >= training_from)
& (feature_data["POINT_IN_TIME"] < training_to)
)
training_data = feature_data.loc[cond].reset_index(drop=True)
cond = (
(feature_data["POINT_IN_TIME"] >= validation_from)
& (feature_data["POINT_IN_TIME"] < validation_to)
)
validation_data = feature_data.loc[cond].reset_index(drop=True)
validation_data.shape
training_from = "2018-11-01 00:00"
training_to = "2024-04-01 00:00"
validation_from = "2024-04-01 00:00"
validation_to = "2024-10-01 00:00"
feature_data["POINT_IN_TIME"] = pd.to_datetime(feature_data["POINT_IN_TIME"])
cond = (
(feature_data["POINT_IN_TIME"] >= training_from)
& (feature_data["POINT_IN_TIME"] < training_to)
)
training_data = feature_data.loc[cond].reset_index(drop=True)
cond = (
(feature_data["POINT_IN_TIME"] >= validation_from)
& (feature_data["POINT_IN_TIME"] < validation_to)
)
validation_data = feature_data.loc[cond].reset_index(drop=True)
validation_data.shape
Out[6]:
(25669, 43)
Categorize per feature type¶
In [7]:
Copied!
target_column = "Loan_Default"
entity_columns = ["SK_ID_CURR"]
excluded_columns = set([target_column, "POINT_IN_TIME", "__FB_TABLE_ROW_INDEX", "NEG_SAMPLE_WEIGHT"] + entity_columns)
feature_columns = [column for column in training_data.columns if column not in excluded_columns]
target_column = "Loan_Default"
entity_columns = ["SK_ID_CURR"]
excluded_columns = set([target_column, "POINT_IN_TIME", "__FB_TABLE_ROW_INDEX", "NEG_SAMPLE_WEIGHT"] + entity_columns)
feature_columns = [column for column in training_data.columns if column not in excluded_columns]
In [8]:
Copied!
feature_types = {}
for f in feature_columns:
feature = catalog.get_feature(f)
feature_types[f] = feature.feature_type
feature_types = {}
for f in feature_columns:
feature = catalog.get_feature(f)
feature_types[f] = feature.feature_type
Specify Training Parameters¶
In [9]:
Copied!
objective = Objective.BINARY
eval_metric = Metric.AUC
num_boost_round = 10000
early_stopping_rounds = 50
small_count_threshold = 5
param_grid = {
"learning_rate": [0.01],
"max_depth": [7],
"num_leaves": [48],
"subsample": [0.8],
"colsample_bytree": [0.5],
"min_split_gain": [0.025],
"reg_alpha": [0.5],
"reg_lambda": [0.5],
}
objective = Objective.BINARY
eval_metric = Metric.AUC
num_boost_round = 10000
early_stopping_rounds = 50
small_count_threshold = 5
param_grid = {
"learning_rate": [0.01],
"max_depth": [7],
"num_leaves": [48],
"subsample": [0.8],
"colsample_bytree": [0.5],
"min_split_gain": [0.025],
"reg_alpha": [0.5],
"reg_lambda": [0.5],
}
Run LightGBM¶
In [10]:
Copied!
target_train = training_data[target_column]
features_train = training_data[feature_columns]
target_test = validation_data[target_column]
features_test = validation_data[feature_columns]
# Prepare cartesian product of parameters
keys = list(param_grid.keys())
values = list(param_grid.values())
best_auc = 0
best_params = None
lgbm_pipeline: Optional[LightGBMPipeline]
for combo in product(*values):
params = dict(zip(keys, combo))
pipeline = LightGBMPipeline(
objective=objective,
eval_metric=eval_metric,
num_boost_round=num_boost_round,
early_stopping_rounds=early_stopping_rounds,
**params,
)
pipeline.train(
df_train=features_train,
df_test=features_test,
y_train=target_train,
y_test=target_test,
feature_types=feature_types,
)
# validate on test data
predictions = pipeline.predict(features_test)
auc = roc_auc_score(target_test, predictions)
print(f"params: {params}, AUC: {auc:.4f}")
if auc > best_auc:
best_auc = auc
best_params = params
lgbm_pipeline = pipeline
target_train = training_data[target_column]
features_train = training_data[feature_columns]
target_test = validation_data[target_column]
features_test = validation_data[feature_columns]
# Prepare cartesian product of parameters
keys = list(param_grid.keys())
values = list(param_grid.values())
best_auc = 0
best_params = None
lgbm_pipeline: Optional[LightGBMPipeline]
for combo in product(*values):
params = dict(zip(keys, combo))
pipeline = LightGBMPipeline(
objective=objective,
eval_metric=eval_metric,
num_boost_round=num_boost_round,
early_stopping_rounds=early_stopping_rounds,
**params,
)
pipeline.train(
df_train=features_train,
df_test=features_test,
y_train=target_train,
y_test=target_test,
feature_types=feature_types,
)
# validate on test data
predictions = pipeline.predict(features_test)
auc = roc_auc_score(target_test, predictions)
print(f"params: {params}, AUC: {auc:.4f}")
if auc > best_auc:
best_auc = auc
best_params = params
lgbm_pipeline = pipeline
Preprocessing done
[LightGBM] [Info] Number of positive: 22442, number of negative: 255159
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.018461 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12358
[LightGBM] [Info] Number of data points in the train set: 277601, number of used features: 58
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080843 -> initscore=-2.430953
[LightGBM] [Info] Start training from score -2.430953
Training until validation scores don't improve for 50 rounds
Training until validation scores don't improve for 50 rounds
[10] valid_0's auc: 0.746828
[20] valid_0's auc: 0.754206
[30] valid_0's auc: 0.754575
[40] valid_0's auc: 0.759532
[50] valid_0's auc: 0.758646
[60] valid_0's auc: 0.761317
[70] valid_0's auc: 0.762011
[80] valid_0's auc: 0.763291
[90] valid_0's auc: 0.764279
[100] valid_0's auc: 0.765429
[110] valid_0's auc: 0.766071
[120] valid_0's auc: 0.767068
[130] valid_0's auc: 0.76777
[140] valid_0's auc: 0.768531
[150] valid_0's auc: 0.769357
[160] valid_0's auc: 0.770097
[170] valid_0's auc: 0.77083
[180] valid_0's auc: 0.771574
[190] valid_0's auc: 0.772369
[200] valid_0's auc: 0.772975
[210] valid_0's auc: 0.773546
[220] valid_0's auc: 0.774383
[230] valid_0's auc: 0.775004
[240] valid_0's auc: 0.775282
[250] valid_0's auc: 0.775757
[260] valid_0's auc: 0.776153
[270] valid_0's auc: 0.776642
[280] valid_0's auc: 0.777156
[290] valid_0's auc: 0.77757
[300] valid_0's auc: 0.778065
[310] valid_0's auc: 0.778329
[320] valid_0's auc: 0.778752
[330] valid_0's auc: 0.779165
[340] valid_0's auc: 0.779528
[350] valid_0's auc: 0.779795
[360] valid_0's auc: 0.780121
[370] valid_0's auc: 0.780461
[380] valid_0's auc: 0.780747
[390] valid_0's auc: 0.781042
[400] valid_0's auc: 0.781438
[410] valid_0's auc: 0.781705
[420] valid_0's auc: 0.781887
[430] valid_0's auc: 0.782286
[440] valid_0's auc: 0.782599
[450] valid_0's auc: 0.782759
[460] valid_0's auc: 0.782968
[470] valid_0's auc: 0.783172
[480] valid_0's auc: 0.783378
[490] valid_0's auc: 0.783655
[500] valid_0's auc: 0.783939
[510] valid_0's auc: 0.784292
[520] valid_0's auc: 0.784525
[530] valid_0's auc: 0.784802
[540] valid_0's auc: 0.78503
[550] valid_0's auc: 0.785248
[560] valid_0's auc: 0.785407
[570] valid_0's auc: 0.785552
[580] valid_0's auc: 0.785833
[590] valid_0's auc: 0.786028
[600] valid_0's auc: 0.786205
[610] valid_0's auc: 0.786363
[620] valid_0's auc: 0.786561
[630] valid_0's auc: 0.786695
[640] valid_0's auc: 0.786866
[650] valid_0's auc: 0.787015
[660] valid_0's auc: 0.787197
[670] valid_0's auc: 0.787361
[680] valid_0's auc: 0.787503
[690] valid_0's auc: 0.787636
[700] valid_0's auc: 0.787734
[710] valid_0's auc: 0.787866
[720] valid_0's auc: 0.788041
[730] valid_0's auc: 0.788204
[740] valid_0's auc: 0.788324
[750] valid_0's auc: 0.788524
[760] valid_0's auc: 0.78865
[770] valid_0's auc: 0.788748
[780] valid_0's auc: 0.788885
[790] valid_0's auc: 0.789014
[800] valid_0's auc: 0.789153
[810] valid_0's auc: 0.78926
[820] valid_0's auc: 0.789371
[830] valid_0's auc: 0.789455
[840] valid_0's auc: 0.789565
[850] valid_0's auc: 0.789688
[860] valid_0's auc: 0.789811
[870] valid_0's auc: 0.789858
[880] valid_0's auc: 0.789946
[890] valid_0's auc: 0.790024
[900] valid_0's auc: 0.790121
[910] valid_0's auc: 0.790301
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[920] valid_0's auc: 0.790316
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[930] valid_0's auc: 0.790443
[940] valid_0's auc: 0.790533
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[950] valid_0's auc: 0.790585
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[960] valid_0's auc: 0.790622
[970] valid_0's auc: 0.79068
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[980] valid_0's auc: 0.790809
[990] valid_0's auc: 0.790909
[1000] valid_0's auc: 0.790979
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1010] valid_0's auc: 0.79103
[1020] valid_0's auc: 0.791086
[1030] valid_0's auc: 0.79123
[1040] valid_0's auc: 0.791328
[1050] valid_0's auc: 0.791416
[1060] valid_0's auc: 0.791452
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1070] valid_0's auc: 0.79151
[1080] valid_0's auc: 0.791582
[1090] valid_0's auc: 0.791668
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1100] valid_0's auc: 0.79171
[1110] valid_0's auc: 0.791784
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1120] valid_0's auc: 0.791861
[1130] valid_0's auc: 0.79193
[1140] valid_0's auc: 0.791977
[1150] valid_0's auc: 0.79208
[1160] valid_0's auc: 0.792104
[1170] valid_0's auc: 0.792138
[1180] valid_0's auc: 0.792192
[1190] valid_0's auc: 0.792259
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1200] valid_0's auc: 0.792273
[1210] valid_0's auc: 0.79233
[1220] valid_0's auc: 0.792361
[1230] valid_0's auc: 0.79242
[1240] valid_0's auc: 0.79246
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1250] valid_0's auc: 0.79251
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1260] valid_0's auc: 0.792525
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1270] valid_0's auc: 0.792582
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1280] valid_0's auc: 0.792619
[1290] valid_0's auc: 0.792657
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1300] valid_0's auc: 0.792667
[1310] valid_0's auc: 0.79268
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1320] valid_0's auc: 0.792711
[1330] valid_0's auc: 0.792735
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1340] valid_0's auc: 0.792781
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1350] valid_0's auc: 0.792821
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1360] valid_0's auc: 0.792859
[1370] valid_0's auc: 0.792854
[1380] valid_0's auc: 0.792856
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1390] valid_0's auc: 0.792885
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1400] valid_0's auc: 0.792904
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1410] valid_0's auc: 0.792905
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1420] valid_0's auc: 0.79292
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1430] valid_0's auc: 0.792927
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1440] valid_0's auc: 0.792981
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1450] valid_0's auc: 0.792994
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1460] valid_0's auc: 0.79303
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1470] valid_0's auc: 0.793046
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1480] valid_0's auc: 0.793061
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1490] valid_0's auc: 0.793082
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1500] valid_0's auc: 0.793104
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1510] valid_0's auc: 0.793113
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1520] valid_0's auc: 0.793163
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1530] valid_0's auc: 0.793162
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1540] valid_0's auc: 0.793185
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1550] valid_0's auc: 0.793219
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1560] valid_0's auc: 0.79323
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1570] valid_0's auc: 0.79325
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1580] valid_0's auc: 0.793264
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1590] valid_0's auc: 0.793254
[1600] valid_0's auc: 0.793245
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1610] valid_0's auc: 0.793278
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1620] valid_0's auc: 0.793317
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1630] valid_0's auc: 0.793376
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1640] valid_0's auc: 0.793411
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1650] valid_0's auc: 0.793422
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1660] valid_0's auc: 0.793448
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1670] valid_0's auc: 0.793489
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1680] valid_0's auc: 0.793504
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1690] valid_0's auc: 0.793493
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1700] valid_0's auc: 0.793482
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1710] valid_0's auc: 0.793462
[1720] valid_0's auc: 0.793497
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1730] valid_0's auc: 0.79352
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1740] valid_0's auc: 0.793524
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1750] valid_0's auc: 0.793549
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1760] valid_0's auc: 0.793558
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1770] valid_0's auc: 0.793575
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1780] valid_0's auc: 0.793592
[1790] valid_0's auc: 0.793567
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1800] valid_0's auc: 0.793597
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1810] valid_0's auc: 0.793606
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1820] valid_0's auc: 0.793602
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1830] valid_0's auc: 0.793599
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1840] valid_0's auc: 0.793602
[1850] valid_0's auc: 0.793632
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1860] valid_0's auc: 0.793679
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1870] valid_0's auc: 0.793698
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1880] valid_0's auc: 0.793731
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1890] valid_0's auc: 0.793752
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1900] valid_0's auc: 0.793779
[1910] valid_0's auc: 0.793787
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1920] valid_0's auc: 0.793824
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1930] valid_0's auc: 0.793855
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1940] valid_0's auc: 0.793882
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1950] valid_0's auc: 0.793897
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1960] valid_0's auc: 0.793902
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1970] valid_0's auc: 0.793904
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1980] valid_0's auc: 0.793911
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1990] valid_0's auc: 0.793908
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[2000] valid_0's auc: 0.793923
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[2010] valid_0's auc: 0.79393
[2020] valid_0's auc: 0.793901
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[2030] valid_0's auc: 0.793906
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[2040] valid_0's auc: 0.793921
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[2050] valid_0's auc: 0.793904
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
Early stopping, best iteration is:
[2005] valid_0's auc: 0.793945
params: {'learning_rate': 0.01, 'max_depth': 7, 'num_leaves': 48, 'subsample': 0.8, 'colsample_bytree': 0.5, 'min_split_gain': 0.025, 'reg_alpha': 0.5, 'reg_lambda': 0.5}, AUC: 0.7939
In [11]:
Copied!
print(f"\nBest AUC: {best_auc:.4f}")
print(f"Best Parameters: {best_params}")
print(f"\nBest AUC: {best_auc:.4f}")
print(f"Best Parameters: {best_params}")
Best AUC: 0.7939
Best Parameters: {'learning_rate': 0.01, 'max_depth': 7, 'num_leaves': 48, 'subsample': 0.8, 'colsample_bytree': 0.5, 'min_split_gain': 0.025, 'reg_alpha': 0.5, 'reg_lambda': 0.5}
SHAP Importance¶
In [12]:
Copied!
pd.set_option('display.max_rows', None) # Show all rows
pd.set_option('display.max_colwidth', 100) # Show full column content
pd.set_option('display.max_rows', None) # Show all rows
pd.set_option('display.max_colwidth', 100) # Show full column content
In [13]:
Copied!
_, feature_importance = lgbm_pipeline.compute_shap(features_train.sample(n=10000, random_state=88))
feature_importance[["feature", "cumulative_importance_percent"]]
_, feature_importance = lgbm_pipeline.compute_shap(features_train.sample(n=10000, random_state=88))
feature_importance[["feature", "cumulative_importance_percent"]]
Out[13]:
| feature | cumulative_importance_percent | |
|---|---|---|
| 0 | NEW_APPLICATION_EXT_SOURCE_2 | 0.101066 |
| 1 | NEW_APPLICATION_EXT_SOURCE_3 | 0.188525 |
| 2 | NEW_APPLICATION_EXT_SOURCE_1 | 0.236480 |
| 3 | NEW_APPLICATION_AMT_ANNUITY_To_AMT_CREDIT | 0.276352 |
| 4 | NEW_APPLICATION_DAYS_EMPLOYED | 0.316175 |
| 5 | NEW_APPLICATION_Credit-Goods_Difference | 0.355660 |
| 6 | CLIENT_Max_of_Active_Cr_active_BureauReportedCredits_AMT_CREDIT_SUM_DEBT_To_AMT_CREDIT_SUMs_104w | 0.391181 |
| 7 | CLIENT_GENDER | 0.426466 |
| 8 | CLIENT_Max_of_PriorApplications_CNT_PAYMENTs_104w | 0.456833 |
| 9 | CLIENT_EDUCATION_TYPE | 0.484940 |
| 10 | NEW_APPLICATION_AMT_ANNUITY | 0.512791 |
| 11 | CLIENT_Installments_AMT_PAYMENTs_by_PriorApplication_CLIENT_TYPE_24cMo | 0.537398 |
| 12 | CLIENT_FAMILY_STATUS | 0.560329 |
| 13 | CLIENT_Avg_of_Consumer_credit_Cr_type_BureauReportedCredits_End_to_Update_Gaps_104w | 0.582440 |
| 14 | CLIENT_Min_of_Credit_card_monthly_balance_records_Available_Credits_6cMo | 0.603981 |
| 15 | CLIENT_Min_of_Installments_AMT_PAYMENTs_24cMo | 0.624278 |
| 16 | CLIENT_Age | 0.644416 |
| 17 | CLIENT_Avg_of_BureauReportedCredits_Available_Credits_104w | 0.664070 |
| 18 | CLIENT_Installments_AMT_PAYMENTs_by_PriorApplication_YIELD_GROUP_24cMo | 0.683458 |
| 19 | CLIENT_ORGANIZATION_TYPE | 0.702360 |
| 20 | CLIENT_PriorApplications_AMT_CREDITs_by_PriorApplication_YIELD_GROUP_104w | 0.720933 |
| 21 | CLIENT_Max_of_Loan_terminations_Loan_PriorApplication_AMT_APPLICATION_To_AMT_CREDITs_104w | 0.739492 |
| 22 | NEW_APPLICATION_AMT_GOODS_PRICE | 0.758032 |
| 23 | NEW_APPLICATION_DAYS_ID_PUBLISH | 0.775764 |
| 24 | NEW_APPLICATION_REGION_POPULATION_RELATIVE | 0.792866 |
| 25 | CLIENT_Installments_AMT_PAYMENTs_by_INSTALLMENT_STATUS_12cMo | 0.809920 |
| 26 | CLIENT_Avg_of_BureauReportedCredits_Available_Credits_26w | 0.826850 |
| 27 | CLIENT_Avg_of_Consumer_credit_Cr_type_BureauReportedCredits_AMT_CREDIT_SUMs_104w | 0.842489 |
| 28 | CLIENT_Time_To_Latest_Approved_Contract_status_PriorApplication_last_due_1st_version_timestamp_104w | 0.857940 |
| 29 | CLIENT_Max_of_Installments_PriorApplication_AMT_ANNUITY_To_AMT_CREDITs_6cMo | 0.873355 |
| 30 | CLIENT_Installments_AMT_PAYMENTs_by_PriorApplication_PAYMENT_TYPE_24cMo | 0.887748 |
| 31 | CLIENT_Max_of_Installments_Days_Difference_Actual_vs_Scheduleds_24cMo | 0.901785 |
| 32 | CLIENT_Std_of_Credit_card_monthly_balance_records_CNT_DRAWINGS_ATM_CURRENTs_24cMo | 0.915600 |
| 33 | NEW_APPLICATION_FLAG_DOCUMENT_3 | 0.929260 |
| 34 | CLIENT_Installments_AMT_PAYMENTs_by_INSTALLMENT_STATUS_6cMo | 0.942796 |
| 35 | CLIENT_PriorApplications_AMT_CREDITs_by_PriorApplication_NFLAG_INSURED_ON_APPROVAL_52w | 0.955856 |
| 36 | NEW_APPLICATION_FLOORSMAX_MODE | 0.968745 |
| 37 | CLIENT_Entropy_of_count_of_Installments_by_INSTALLMENT_STATUS_24cMo | 0.980715 |
| 38 | CLIENT_Std_of_BureauReportedCredits_Available_Credits_26w | 0.992054 |
| 39 | CLIENT_Max_of_Consumer_credit_Cr_type_BureauReportedCredits_AMT_CREDIT_SUM_DEBT_To_AMT_CREDIT_SU... | 1.000000 |
Iterate¶
Check out Credit Default UI Tutorials to get more ideas.
In [ ]:
Copied!