14. Train LGBM
In [1]:
Copied!
import featurebyte as fb
import numpy as np
import pandas as pd
from itertools import product
from sklearn.metrics import roc_auc_score
from modeling_script import LightGBMPipeline, Objective, Metric
from typing import Optional
import featurebyte as fb
import numpy as np
import pandas as pd
from itertools import product
from sklearn.metrics import roc_auc_score
from modeling_script import LightGBMPipeline, Objective, Metric
from typing import Optional
14:05:16 | INFO | SDK version: 3.0.1.dev45 INFO :featurebyte:SDK version: 3.0.1.dev45 14:05:16 | INFO | No catalog activated. INFO :featurebyte:No catalog activated.
Activate Catalog¶
In [2]:
Copied!
# Set your profile to the tutorial environment
fb.use_profile("tutorial")
catalog_name = "Loan Applications Dataset SDK Tutorial"
catalog = fb.Catalog.activate(catalog_name)
# Set your profile to the tutorial environment
fb.use_profile("tutorial")
catalog_name = "Loan Applications Dataset SDK Tutorial"
catalog = fb.Catalog.activate(catalog_name)
14:05:34 | INFO | Using profile: tutorial INFO :featurebyte:Using profile: tutorial 14:05:34 | INFO | Using configuration file at: /Users/gxav/.featurebyte/config.yaml INFO :featurebyte:Using configuration file at: /Users/gxav/.featurebyte/config.yaml 14:05:34 | INFO | Active profile: tutorial (https://tutorials.featurebyte.com/api/v1) INFO :featurebyte:Active profile: tutorial (https://tutorials.featurebyte.com/api/v1) 14:05:34 | INFO | SDK version: 3.0.1.dev45 INFO :featurebyte:SDK version: 3.0.1.dev45 14:05:34 | INFO | No catalog activated. INFO :featurebyte:No catalog activated. 14:05:34 | INFO | Catalog activated: Loan Applications Dataset SDK Tutorial INFO :featurebyte.api.catalog:Catalog activated: Loan Applications Dataset SDK Tutorial
Get Training and Holdout data¶
In [3]:
Copied!
catalog.list_historical_feature_tables()
catalog.list_historical_feature_tables()
Out[3]:
id | name | feature_store_name | observation_table_name | shape | created_at | |
---|---|---|---|---|---|---|
0 | 683d3c98954f0aa89942addd | 40 features for Loan Applications - TRAIN | playground | Applications up to Sept 2024 with Loan Defaults | [303270, 43] | 2025-06-02T05:56:31.688000 |
In [4]:
Copied!
training_data_table = catalog.get_historical_feature_table("40 features for Loan Applications - TRAIN")
training_data_table = catalog.get_historical_feature_table("40 features for Loan Applications - TRAIN")
In [5]:
Copied!
# download as pandas data frame
feature_data = training_data_table.to_pandas()
# download as pandas data frame
feature_data = training_data_table.to_pandas()
Downloading table |████████████████████████████████████████| 303270/303270 [100%
In [6]:
Copied!
training_from = "2018-11-01 00:00"
training_to = "2024-04-01 00:00"
validation_from = "2024-04-01 00:00"
validation_to = "2024-10-01 00:00"
feature_data["POINT_IN_TIME"] = pd.to_datetime(feature_data["POINT_IN_TIME"])
cond = (
(feature_data["POINT_IN_TIME"] >= training_from)
& (feature_data["POINT_IN_TIME"] < training_to)
)
training_data = feature_data.loc[cond].reset_index(drop=True)
cond = (
(feature_data["POINT_IN_TIME"] >= validation_from)
& (feature_data["POINT_IN_TIME"] < validation_to)
)
validation_data = feature_data.loc[cond].reset_index(drop=True)
validation_data.shape
training_from = "2018-11-01 00:00"
training_to = "2024-04-01 00:00"
validation_from = "2024-04-01 00:00"
validation_to = "2024-10-01 00:00"
feature_data["POINT_IN_TIME"] = pd.to_datetime(feature_data["POINT_IN_TIME"])
cond = (
(feature_data["POINT_IN_TIME"] >= training_from)
& (feature_data["POINT_IN_TIME"] < training_to)
)
training_data = feature_data.loc[cond].reset_index(drop=True)
cond = (
(feature_data["POINT_IN_TIME"] >= validation_from)
& (feature_data["POINT_IN_TIME"] < validation_to)
)
validation_data = feature_data.loc[cond].reset_index(drop=True)
validation_data.shape
Out[6]:
(25669, 43)
Categorize per feature type¶
In [7]:
Copied!
target_column = "Loan_Default"
entity_columns = ["SK_ID_CURR"]
excluded_columns = set([target_column, "POINT_IN_TIME", "__FB_TABLE_ROW_INDEX", "NEG_SAMPLE_WEIGHT"] + entity_columns)
feature_columns = [column for column in training_data.columns if column not in excluded_columns]
target_column = "Loan_Default"
entity_columns = ["SK_ID_CURR"]
excluded_columns = set([target_column, "POINT_IN_TIME", "__FB_TABLE_ROW_INDEX", "NEG_SAMPLE_WEIGHT"] + entity_columns)
feature_columns = [column for column in training_data.columns if column not in excluded_columns]
In [8]:
Copied!
feature_types = {}
for f in feature_columns:
feature = catalog.get_feature(f)
feature_types[f] = feature.feature_type
feature_types = {}
for f in feature_columns:
feature = catalog.get_feature(f)
feature_types[f] = feature.feature_type
Specify Training Parameters¶
In [9]:
Copied!
objective = Objective.BINARY
eval_metric = Metric.AUC
num_boost_round = 10000
early_stopping_rounds = 50
small_count_threshold = 5
param_grid = {
"learning_rate": [0.01],
"max_depth": [7],
"num_leaves": [48],
"subsample": [0.8],
"colsample_bytree": [0.5],
"min_split_gain": [0.025],
"reg_alpha": [0.5],
"reg_lambda": [0.5],
}
objective = Objective.BINARY
eval_metric = Metric.AUC
num_boost_round = 10000
early_stopping_rounds = 50
small_count_threshold = 5
param_grid = {
"learning_rate": [0.01],
"max_depth": [7],
"num_leaves": [48],
"subsample": [0.8],
"colsample_bytree": [0.5],
"min_split_gain": [0.025],
"reg_alpha": [0.5],
"reg_lambda": [0.5],
}
Run LightGBM¶
In [10]:
Copied!
target_train = training_data[target_column]
features_train = training_data[feature_columns]
target_test = validation_data[target_column]
features_test = validation_data[feature_columns]
# Prepare cartesian product of parameters
keys = list(param_grid.keys())
values = list(param_grid.values())
best_auc = 0
best_params = None
lgbm_pipeline: Optional[LightGBMPipeline]
for combo in product(*values):
params = dict(zip(keys, combo))
pipeline = LightGBMPipeline(
objective=objective,
eval_metric=eval_metric,
num_boost_round=num_boost_round,
early_stopping_rounds=early_stopping_rounds,
**params,
)
pipeline.train(
df_train=features_train,
df_test=features_test,
y_train=target_train,
y_test=target_test,
feature_types=feature_types,
)
# validate on test data
predictions = pipeline.predict(features_test)
auc = roc_auc_score(target_test, predictions)
print(f"params: {params}, AUC: {auc:.4f}")
if auc > best_auc:
best_auc = auc
best_params = params
lgbm_pipeline = pipeline
target_train = training_data[target_column]
features_train = training_data[feature_columns]
target_test = validation_data[target_column]
features_test = validation_data[feature_columns]
# Prepare cartesian product of parameters
keys = list(param_grid.keys())
values = list(param_grid.values())
best_auc = 0
best_params = None
lgbm_pipeline: Optional[LightGBMPipeline]
for combo in product(*values):
params = dict(zip(keys, combo))
pipeline = LightGBMPipeline(
objective=objective,
eval_metric=eval_metric,
num_boost_round=num_boost_round,
early_stopping_rounds=early_stopping_rounds,
**params,
)
pipeline.train(
df_train=features_train,
df_test=features_test,
y_train=target_train,
y_test=target_test,
feature_types=feature_types,
)
# validate on test data
predictions = pipeline.predict(features_test)
auc = roc_auc_score(target_test, predictions)
print(f"params: {params}, AUC: {auc:.4f}")
if auc > best_auc:
best_auc = auc
best_params = params
lgbm_pipeline = pipeline
Preprocessing done [LightGBM] [Info] Number of positive: 22442, number of negative: 255159 [LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.018461 seconds. You can set `force_row_wise=true` to remove the overhead. And if memory is not enough, you can set `force_col_wise=true`. [LightGBM] [Info] Total Bins 12358 [LightGBM] [Info] Number of data points in the train set: 277601, number of used features: 58 [LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080843 -> initscore=-2.430953 [LightGBM] [Info] Start training from score -2.430953 Training until validation scores don't improve for 50 rounds Training until validation scores don't improve for 50 rounds [10] valid_0's auc: 0.746828 [20] valid_0's auc: 0.754206 [30] valid_0's auc: 0.754575 [40] valid_0's auc: 0.759532 [50] valid_0's auc: 0.758646 [60] valid_0's auc: 0.761317 [70] valid_0's auc: 0.762011 [80] valid_0's auc: 0.763291 [90] valid_0's auc: 0.764279 [100] valid_0's auc: 0.765429 [110] valid_0's auc: 0.766071 [120] valid_0's auc: 0.767068 [130] valid_0's auc: 0.76777 [140] valid_0's auc: 0.768531 [150] valid_0's auc: 0.769357 [160] valid_0's auc: 0.770097 [170] valid_0's auc: 0.77083 [180] valid_0's auc: 0.771574 [190] valid_0's auc: 0.772369 [200] valid_0's auc: 0.772975 [210] valid_0's auc: 0.773546 [220] valid_0's auc: 0.774383 [230] valid_0's auc: 0.775004 [240] valid_0's auc: 0.775282 [250] valid_0's auc: 0.775757 [260] valid_0's auc: 0.776153 [270] valid_0's auc: 0.776642 [280] valid_0's auc: 0.777156 [290] valid_0's auc: 0.77757 [300] valid_0's auc: 0.778065 [310] valid_0's auc: 0.778329 [320] valid_0's auc: 0.778752 [330] valid_0's auc: 0.779165 [340] valid_0's auc: 0.779528 [350] valid_0's auc: 0.779795 [360] valid_0's auc: 0.780121 [370] valid_0's auc: 0.780461 [380] valid_0's auc: 0.780747 [390] valid_0's auc: 0.781042 [400] valid_0's auc: 0.781438 [410] valid_0's auc: 0.781705 [420] valid_0's auc: 0.781887 [430] valid_0's auc: 0.782286 [440] valid_0's auc: 0.782599 [450] valid_0's auc: 0.782759 [460] valid_0's auc: 0.782968 [470] valid_0's auc: 0.783172 [480] valid_0's auc: 0.783378 [490] valid_0's auc: 0.783655 [500] valid_0's auc: 0.783939 [510] valid_0's auc: 0.784292 [520] valid_0's auc: 0.784525 [530] valid_0's auc: 0.784802 [540] valid_0's auc: 0.78503 [550] valid_0's auc: 0.785248 [560] valid_0's auc: 0.785407 [570] valid_0's auc: 0.785552 [580] valid_0's auc: 0.785833 [590] valid_0's auc: 0.786028 [600] valid_0's auc: 0.786205 [610] valid_0's auc: 0.786363 [620] valid_0's auc: 0.786561 [630] valid_0's auc: 0.786695 [640] valid_0's auc: 0.786866 [650] valid_0's auc: 0.787015 [660] valid_0's auc: 0.787197 [670] valid_0's auc: 0.787361 [680] valid_0's auc: 0.787503 [690] valid_0's auc: 0.787636 [700] valid_0's auc: 0.787734 [710] valid_0's auc: 0.787866 [720] valid_0's auc: 0.788041 [730] valid_0's auc: 0.788204 [740] valid_0's auc: 0.788324 [750] valid_0's auc: 0.788524 [760] valid_0's auc: 0.78865 [770] valid_0's auc: 0.788748 [780] valid_0's auc: 0.788885 [790] valid_0's auc: 0.789014 [800] valid_0's auc: 0.789153 [810] valid_0's auc: 0.78926 [820] valid_0's auc: 0.789371 [830] valid_0's auc: 0.789455 [840] valid_0's auc: 0.789565 [850] valid_0's auc: 0.789688 [860] valid_0's auc: 0.789811 [870] valid_0's auc: 0.789858 [880] valid_0's auc: 0.789946 [890] valid_0's auc: 0.790024 [900] valid_0's auc: 0.790121 [910] valid_0's auc: 0.790301 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [920] valid_0's auc: 0.790316 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [930] valid_0's auc: 0.790443 [940] valid_0's auc: 0.790533 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [950] valid_0's auc: 0.790585 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [960] valid_0's auc: 0.790622 [970] valid_0's auc: 0.79068 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [980] valid_0's auc: 0.790809 [990] valid_0's auc: 0.790909 [1000] valid_0's auc: 0.790979 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1010] valid_0's auc: 0.79103 [1020] valid_0's auc: 0.791086 [1030] valid_0's auc: 0.79123 [1040] valid_0's auc: 0.791328 [1050] valid_0's auc: 0.791416 [1060] valid_0's auc: 0.791452 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1070] valid_0's auc: 0.79151 [1080] valid_0's auc: 0.791582 [1090] valid_0's auc: 0.791668 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1100] valid_0's auc: 0.79171 [1110] valid_0's auc: 0.791784 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1120] valid_0's auc: 0.791861 [1130] valid_0's auc: 0.79193 [1140] valid_0's auc: 0.791977 [1150] valid_0's auc: 0.79208 [1160] valid_0's auc: 0.792104 [1170] valid_0's auc: 0.792138 [1180] valid_0's auc: 0.792192 [1190] valid_0's auc: 0.792259 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1200] valid_0's auc: 0.792273 [1210] valid_0's auc: 0.79233 [1220] valid_0's auc: 0.792361 [1230] valid_0's auc: 0.79242 [1240] valid_0's auc: 0.79246 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1250] valid_0's auc: 0.79251 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1260] valid_0's auc: 0.792525 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1270] valid_0's auc: 0.792582 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1280] valid_0's auc: 0.792619 [1290] valid_0's auc: 0.792657 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1300] valid_0's auc: 0.792667 [1310] valid_0's auc: 0.79268 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1320] valid_0's auc: 0.792711 [1330] valid_0's auc: 0.792735 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1340] valid_0's auc: 0.792781 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1350] valid_0's auc: 0.792821 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1360] valid_0's auc: 0.792859 [1370] valid_0's auc: 0.792854 [1380] valid_0's auc: 0.792856 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1390] valid_0's auc: 0.792885 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1400] valid_0's auc: 0.792904 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1410] valid_0's auc: 0.792905 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1420] valid_0's auc: 0.79292 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1430] valid_0's auc: 0.792927 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1440] valid_0's auc: 0.792981 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1450] valid_0's auc: 0.792994 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1460] valid_0's auc: 0.79303 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1470] valid_0's auc: 0.793046 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1480] valid_0's auc: 0.793061 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1490] valid_0's auc: 0.793082 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1500] valid_0's auc: 0.793104 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1510] valid_0's auc: 0.793113 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1520] valid_0's auc: 0.793163 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1530] valid_0's auc: 0.793162 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1540] valid_0's auc: 0.793185 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1550] valid_0's auc: 0.793219 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1560] valid_0's auc: 0.79323 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1570] valid_0's auc: 0.79325 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1580] valid_0's auc: 0.793264 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1590] valid_0's auc: 0.793254 [1600] valid_0's auc: 0.793245 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1610] valid_0's auc: 0.793278 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1620] valid_0's auc: 0.793317 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1630] valid_0's auc: 0.793376 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1640] valid_0's auc: 0.793411 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1650] valid_0's auc: 0.793422 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1660] valid_0's auc: 0.793448 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1670] valid_0's auc: 0.793489 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1680] valid_0's auc: 0.793504 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1690] valid_0's auc: 0.793493 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1700] valid_0's auc: 0.793482 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1710] valid_0's auc: 0.793462 [1720] valid_0's auc: 0.793497 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1730] valid_0's auc: 0.79352 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1740] valid_0's auc: 0.793524 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1750] valid_0's auc: 0.793549 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1760] valid_0's auc: 0.793558 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1770] valid_0's auc: 0.793575 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1780] valid_0's auc: 0.793592 [1790] valid_0's auc: 0.793567 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1800] valid_0's auc: 0.793597 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1810] valid_0's auc: 0.793606 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1820] valid_0's auc: 0.793602 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1830] valid_0's auc: 0.793599 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1840] valid_0's auc: 0.793602 [1850] valid_0's auc: 0.793632 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1860] valid_0's auc: 0.793679 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1870] valid_0's auc: 0.793698 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1880] valid_0's auc: 0.793731 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1890] valid_0's auc: 0.793752 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1900] valid_0's auc: 0.793779 [1910] valid_0's auc: 0.793787 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1920] valid_0's auc: 0.793824 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1930] valid_0's auc: 0.793855 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1940] valid_0's auc: 0.793882 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1950] valid_0's auc: 0.793897 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1960] valid_0's auc: 0.793902 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1970] valid_0's auc: 0.793904 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1980] valid_0's auc: 0.793911 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [1990] valid_0's auc: 0.793908 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [2000] valid_0's auc: 0.793923 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [2010] valid_0's auc: 0.79393 [2020] valid_0's auc: 0.793901 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [2030] valid_0's auc: 0.793906 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [2040] valid_0's auc: 0.793921 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf [2050] valid_0's auc: 0.793904 [LightGBM] [Warning] No further splits with positive gain, best gain: -inf Early stopping, best iteration is: [2005] valid_0's auc: 0.793945 params: {'learning_rate': 0.01, 'max_depth': 7, 'num_leaves': 48, 'subsample': 0.8, 'colsample_bytree': 0.5, 'min_split_gain': 0.025, 'reg_alpha': 0.5, 'reg_lambda': 0.5}, AUC: 0.7939
In [11]:
Copied!
print(f"\nBest AUC: {best_auc:.4f}")
print(f"Best Parameters: {best_params}")
print(f"\nBest AUC: {best_auc:.4f}")
print(f"Best Parameters: {best_params}")
Best AUC: 0.7939 Best Parameters: {'learning_rate': 0.01, 'max_depth': 7, 'num_leaves': 48, 'subsample': 0.8, 'colsample_bytree': 0.5, 'min_split_gain': 0.025, 'reg_alpha': 0.5, 'reg_lambda': 0.5}
SHAP Importance¶
In [12]:
Copied!
pd.set_option('display.max_rows', None) # Show all rows
pd.set_option('display.max_colwidth', 100) # Show full column content
pd.set_option('display.max_rows', None) # Show all rows
pd.set_option('display.max_colwidth', 100) # Show full column content
In [13]:
Copied!
_, feature_importance = lgbm_pipeline.compute_shap(features_train.sample(n=10000, random_state=88))
feature_importance[["feature", "cumulative_importance_percent"]]
_, feature_importance = lgbm_pipeline.compute_shap(features_train.sample(n=10000, random_state=88))
feature_importance[["feature", "cumulative_importance_percent"]]
Out[13]:
feature | cumulative_importance_percent | |
---|---|---|
0 | NEW_APPLICATION_EXT_SOURCE_2 | 0.101066 |
1 | NEW_APPLICATION_EXT_SOURCE_3 | 0.188525 |
2 | NEW_APPLICATION_EXT_SOURCE_1 | 0.236480 |
3 | NEW_APPLICATION_AMT_ANNUITY_To_AMT_CREDIT | 0.276352 |
4 | NEW_APPLICATION_DAYS_EMPLOYED | 0.316175 |
5 | NEW_APPLICATION_Credit-Goods_Difference | 0.355660 |
6 | CLIENT_Max_of_Active_Cr_active_BureauReportedCredits_AMT_CREDIT_SUM_DEBT_To_AMT_CREDIT_SUMs_104w | 0.391181 |
7 | CLIENT_GENDER | 0.426466 |
8 | CLIENT_Max_of_PriorApplications_CNT_PAYMENTs_104w | 0.456833 |
9 | CLIENT_EDUCATION_TYPE | 0.484940 |
10 | NEW_APPLICATION_AMT_ANNUITY | 0.512791 |
11 | CLIENT_Installments_AMT_PAYMENTs_by_PriorApplication_CLIENT_TYPE_24cMo | 0.537398 |
12 | CLIENT_FAMILY_STATUS | 0.560329 |
13 | CLIENT_Avg_of_Consumer_credit_Cr_type_BureauReportedCredits_End_to_Update_Gaps_104w | 0.582440 |
14 | CLIENT_Min_of_Credit_card_monthly_balance_records_Available_Credits_6cMo | 0.603981 |
15 | CLIENT_Min_of_Installments_AMT_PAYMENTs_24cMo | 0.624278 |
16 | CLIENT_Age | 0.644416 |
17 | CLIENT_Avg_of_BureauReportedCredits_Available_Credits_104w | 0.664070 |
18 | CLIENT_Installments_AMT_PAYMENTs_by_PriorApplication_YIELD_GROUP_24cMo | 0.683458 |
19 | CLIENT_ORGANIZATION_TYPE | 0.702360 |
20 | CLIENT_PriorApplications_AMT_CREDITs_by_PriorApplication_YIELD_GROUP_104w | 0.720933 |
21 | CLIENT_Max_of_Loan_terminations_Loan_PriorApplication_AMT_APPLICATION_To_AMT_CREDITs_104w | 0.739492 |
22 | NEW_APPLICATION_AMT_GOODS_PRICE | 0.758032 |
23 | NEW_APPLICATION_DAYS_ID_PUBLISH | 0.775764 |
24 | NEW_APPLICATION_REGION_POPULATION_RELATIVE | 0.792866 |
25 | CLIENT_Installments_AMT_PAYMENTs_by_INSTALLMENT_STATUS_12cMo | 0.809920 |
26 | CLIENT_Avg_of_BureauReportedCredits_Available_Credits_26w | 0.826850 |
27 | CLIENT_Avg_of_Consumer_credit_Cr_type_BureauReportedCredits_AMT_CREDIT_SUMs_104w | 0.842489 |
28 | CLIENT_Time_To_Latest_Approved_Contract_status_PriorApplication_last_due_1st_version_timestamp_104w | 0.857940 |
29 | CLIENT_Max_of_Installments_PriorApplication_AMT_ANNUITY_To_AMT_CREDITs_6cMo | 0.873355 |
30 | CLIENT_Installments_AMT_PAYMENTs_by_PriorApplication_PAYMENT_TYPE_24cMo | 0.887748 |
31 | CLIENT_Max_of_Installments_Days_Difference_Actual_vs_Scheduleds_24cMo | 0.901785 |
32 | CLIENT_Std_of_Credit_card_monthly_balance_records_CNT_DRAWINGS_ATM_CURRENTs_24cMo | 0.915600 |
33 | NEW_APPLICATION_FLAG_DOCUMENT_3 | 0.929260 |
34 | CLIENT_Installments_AMT_PAYMENTs_by_INSTALLMENT_STATUS_6cMo | 0.942796 |
35 | CLIENT_PriorApplications_AMT_CREDITs_by_PriorApplication_NFLAG_INSURED_ON_APPROVAL_52w | 0.955856 |
36 | NEW_APPLICATION_FLOORSMAX_MODE | 0.968745 |
37 | CLIENT_Entropy_of_count_of_Installments_by_INSTALLMENT_STATUS_24cMo | 0.980715 |
38 | CLIENT_Std_of_BureauReportedCredits_Available_Credits_26w | 0.992054 |
39 | CLIENT_Max_of_Consumer_credit_Cr_type_BureauReportedCredits_AMT_CREDIT_SUM_DEBT_To_AMT_CREDIT_SU... | 1.000000 |
Iterate¶
Check out Credit Default UI Tutorials to get more ideas.
In [ ]:
Copied!