14. Train LGBM
In [1]:
Copied!
import featurebyte as fb
import pandas as pd
from sklearn.metrics import roc_auc_score
from modeling_script import LightGBMPipeline, Objective, Metric
import featurebyte as fb
import pandas as pd
from sklearn.metrics import roc_auc_score
from modeling_script import LightGBMPipeline, Objective, Metric
16:50:49 | WARNING | Service endpoint is inaccessible: http://featurebyte-server:8088/
Activate Catalog¶
In [2]:
Copied!
# Set your profile to the tutorial environment
fb.use_profile("tutorial")
catalog_name = "Credit Default Dataset SDK Tutorial"
catalog = fb.Catalog.activate(catalog_name)
# Set your profile to the tutorial environment
fb.use_profile("tutorial")
catalog_name = "Credit Default Dataset SDK Tutorial"
catalog = fb.Catalog.activate(catalog_name)
16:51:05 | INFO | Using profile: tutorial 16:51:05 | INFO | Using configuration file at: /Users/gxav/.featurebyte/config.yaml 16:51:05 | INFO | Active profile: tutorial (https://tutorials.featurebyte.com/api/v1) 16:51:05 | INFO | SDK version: 2.1.0.dev113 16:51:05 | INFO | No catalog activated. 16:51:05 | INFO | Catalog activated: Credit Default Dataset SDK Tutorial
Get Training and Holdout data¶
In [3]:
Copied!
catalog.list_historical_feature_tables()
catalog.list_historical_feature_tables()
Out[3]:
id | name | feature_store_name | observation_table_name | shape | created_at | |
---|---|---|---|---|---|---|
0 | 67c2c9a34e08d83e21381725 | 51 features for Credit Default - HOLDOUT_2024_1H | playground | CREDIT_DEFAULT_HOLDOUT_2024_1H | [25666, 54] | 2025-03-01T08:49:21.154000 |
1 | 67c2c8fe3df413286793fb5a | 51 features for Credit Default - TRAIN_2019_2023 | playground | CREDIT_DEFAULT_TRAIN_2019_2023 | [256273, 54] | 2025-03-01T08:47:25.134000 |
In [4]:
Copied!
training_data_table = catalog.get_historical_feature_table("51 features for Credit Default - TRAIN_2019_2023")
holdout_data_table = catalog.get_historical_feature_table("51 features for Credit Default - HOLDOUT_2024_1H")
training_data_table = catalog.get_historical_feature_table("51 features for Credit Default - TRAIN_2019_2023")
holdout_data_table = catalog.get_historical_feature_table("51 features for Credit Default - HOLDOUT_2024_1H")
In [5]:
Copied!
# download as pandas data frame
training_data = training_data_table.to_pandas()
holdout_data = holdout_data_table.to_pandas()
# download as pandas data frame
training_data = training_data_table.to_pandas()
holdout_data = holdout_data_table.to_pandas()
Downloading table |████████████████████████████████████████| 256273/256273 [100% Downloading table |████████████████████████████████████████| 25666/25666 [100%]
Categorize per feature type¶
In [6]:
Copied!
feature_types = {
"NEW_APPLICATION_AMT_ANNUITY": "numeric",
"NEW_APPLICATION_AMT_ANNUITY_To_AMT_CREDIT": "numeric",
"NEW_APPLICATION_AMT_ANNUITY_To_AMT_GOODS_VALUE": "numeric",
"NEW_APPLICATION_AMT_CREDIT": "numeric",
"NEW_APPLICATION_AMT_GOODS_VALUE_To_AMT_CREDIT": "numeric",
"NEW_APPLICATION_AMT_REQ_CREDIT_BUREAU_QRT": "numeric",
"NEW_APPLICATION_Credit-Goods_Gap": "numeric",
"NEW_APPLICATION_DAYS_EMPLOYED": "numeric",
"NEW_APPLICATION_DAYS_LAST_PHONE_CHANGE": "numeric",
"NEW_APPLICATION_DAYS_REGISTRATION": "numeric",
"NEW_APPLICATION_FLAG_DOCUMENT_3": "numeric",
"NEW_APPLICATION_FLOORSMAX_MEDI": "numeric",
"NEW_APPLICATION_REGION_POPULATION_RELATIVE": "numeric",
"NEW_APPLICATION_Time_To_BIRTHDATE": "numeric",
"CLIENT_Max_of_Consumer_Loan_terminations_Consumer_Loan_AMT_ANNUITYs_104w": "numeric",
"CLIENT_Avg_of_Consumer_installments_records_AMT_PAYMENTs_24cMo": "numeric",
"CLIENT_Avg_of_Late_Status_Consumer_installments_records_AMT_PAYMENTs_24cMo": "numeric",
"CLIENT_Min_of_Consumer_installments_records_AMT_PAYMENTs_12cMo": "numeric",
"CLIENT_Min_of_Consumer_installments_records_Consumer_Loan_AMT_CREDIT_To_AMT_APPLICATIONs_24cMo": "numeric",
"CLIENT_Max_of_Consumer_installments_records_Consumer_Loan_AMT_APPLICATIONs_24cMo": "numeric",
"CLIENT_Max_of_Consumer_installments_records_Payment_Delays_24cMo": "numeric",
"CLIENT_Temporal_Mean_of_Sums_of_Consumer_installments_records_Consumer_Loan_AMT_ANNUITYs_24cMo": "numeric",
"CLIENT_Avg_of_Approved_Status_Prior_Applications_AMT_ANNUITYs_104w": "numeric",
"CLIENT_Avg_of_Approved_Status_Prior_Applications_CNT_PAYMENTs_104w": "numeric",
"CLIENT_Avg_of_Prior_Applications_CNT_PAYMENTs_104w": "numeric",
"CLIENT_Max_of_Time_between_2_Approved_Status_Prior_Applications_for_the_Client_104w": "numeric",
"CLIENT_Max_of_Time_between_2_Prior_Applications_for_the_Client_104w": "numeric",
"CLIENT_Latest_Revolving_loans_Contract_type_Prior_Application_AMT_APPLICATION_104w": "numeric",
"CLIENT_Min_of_Approved_Status_Cash_loans_Contract_type_Prior_Applications_AMT_ANNUITY_To_AMT_CREDITs_104w": "numeric",
"CLIENT_Max_of_Approved_Status_Prior_Applications_AMT_ANNUITYs_104w": "numeric",
"CLIENT_Min_of_Cash_loans_Contract_type_Prior_Applications_CNT_PAYMENTs_104w": "numeric",
"CLIENT_Max_of_Prior_Applications_AMT_CREDIT_To_AMT_APPLICATIONs_104w": "numeric",
"CLIENT_Max_of_Prior_Applications_Application-Credit_Gaps_104w": "numeric",
"CLIENT_Max_of_Prior_Applications_CNT_PAYMENTs_104w": "numeric",
"CLIENT_Pct_of_Prior_Applications_is_Refused_Status_52w": "numeric",
"CLIENT_Pct_of_Prior_Applications_is_Refused_Status_Cash_loans_Contract_type_104w": "numeric",
"CLIENT_Sum_of_Approved_Status_Prior_Applications_AMT_ANNUITYs_To_Sum_of_Approved_Status_Prior_Applications_AMT_CREDITs_104w": "numeric",
"CLIENT_Time_Since_Latest_Prior_Application_DECISION_DATE_104w": "numeric",
"CLIENT_vs_OVERALL_Count_of_Approved_Status_Cash_loans_Contract_type_Prior_Applications_by_Prior_Application_PRODUCT_COMBINATION_104w": "numeric",
"NEW_APPLICATION_CODE_GENDER": "categorical",
"NEW_APPLICATION_EDUCATION_TYPE": "categorical",
"NEW_APPLICATION_INCOME_TYPE": "categorical",
"NEW_APPLICATION_OCCUPATION_TYPE": "categorical",
"NEW_APPLICATION_ORGANIZATION_TYPE": "categorical",
"CLIENT_Consumer_Loan_GOODS_CATEGORY_with_Lowest_sum_of_active_Consumer_Loans_AMT_CREDITs": "categorical",
"CLIENT_Latest_Refused_Status_Cash_loans_Contract_type_Prior_Application_YIELD_GROUP_104w": "categorical",
"CLIENT_Consumer_installments_records_AMT_PAYMENTs_by_Consumer_installments_record_INSTALLMENT_STATUS_6cMo": "dictionary",
"CLIENT_Count_of_Consumer_installments_records_by_Consumer_Loan_YIELD_GROUP_24cMo": "dictionary",
"CLIENT_Approved_Status_Prior_Applications_AMT_CREDITs_by_Prior_Application_YIELD_GROUP_104w": "dictionary",
"CLIENT_Count_of_Cash_loans_Contract_type_Prior_Applications_by_Prior_Application_YIELD_GROUP_104w": "dictionary",
"CLIENT_Count_of_Prior_Applications_by_Prior_Application_YIELD_GROUP_104w": "dictionary",
}
feature_types = {
"NEW_APPLICATION_AMT_ANNUITY": "numeric",
"NEW_APPLICATION_AMT_ANNUITY_To_AMT_CREDIT": "numeric",
"NEW_APPLICATION_AMT_ANNUITY_To_AMT_GOODS_VALUE": "numeric",
"NEW_APPLICATION_AMT_CREDIT": "numeric",
"NEW_APPLICATION_AMT_GOODS_VALUE_To_AMT_CREDIT": "numeric",
"NEW_APPLICATION_AMT_REQ_CREDIT_BUREAU_QRT": "numeric",
"NEW_APPLICATION_Credit-Goods_Gap": "numeric",
"NEW_APPLICATION_DAYS_EMPLOYED": "numeric",
"NEW_APPLICATION_DAYS_LAST_PHONE_CHANGE": "numeric",
"NEW_APPLICATION_DAYS_REGISTRATION": "numeric",
"NEW_APPLICATION_FLAG_DOCUMENT_3": "numeric",
"NEW_APPLICATION_FLOORSMAX_MEDI": "numeric",
"NEW_APPLICATION_REGION_POPULATION_RELATIVE": "numeric",
"NEW_APPLICATION_Time_To_BIRTHDATE": "numeric",
"CLIENT_Max_of_Consumer_Loan_terminations_Consumer_Loan_AMT_ANNUITYs_104w": "numeric",
"CLIENT_Avg_of_Consumer_installments_records_AMT_PAYMENTs_24cMo": "numeric",
"CLIENT_Avg_of_Late_Status_Consumer_installments_records_AMT_PAYMENTs_24cMo": "numeric",
"CLIENT_Min_of_Consumer_installments_records_AMT_PAYMENTs_12cMo": "numeric",
"CLIENT_Min_of_Consumer_installments_records_Consumer_Loan_AMT_CREDIT_To_AMT_APPLICATIONs_24cMo": "numeric",
"CLIENT_Max_of_Consumer_installments_records_Consumer_Loan_AMT_APPLICATIONs_24cMo": "numeric",
"CLIENT_Max_of_Consumer_installments_records_Payment_Delays_24cMo": "numeric",
"CLIENT_Temporal_Mean_of_Sums_of_Consumer_installments_records_Consumer_Loan_AMT_ANNUITYs_24cMo": "numeric",
"CLIENT_Avg_of_Approved_Status_Prior_Applications_AMT_ANNUITYs_104w": "numeric",
"CLIENT_Avg_of_Approved_Status_Prior_Applications_CNT_PAYMENTs_104w": "numeric",
"CLIENT_Avg_of_Prior_Applications_CNT_PAYMENTs_104w": "numeric",
"CLIENT_Max_of_Time_between_2_Approved_Status_Prior_Applications_for_the_Client_104w": "numeric",
"CLIENT_Max_of_Time_between_2_Prior_Applications_for_the_Client_104w": "numeric",
"CLIENT_Latest_Revolving_loans_Contract_type_Prior_Application_AMT_APPLICATION_104w": "numeric",
"CLIENT_Min_of_Approved_Status_Cash_loans_Contract_type_Prior_Applications_AMT_ANNUITY_To_AMT_CREDITs_104w": "numeric",
"CLIENT_Max_of_Approved_Status_Prior_Applications_AMT_ANNUITYs_104w": "numeric",
"CLIENT_Min_of_Cash_loans_Contract_type_Prior_Applications_CNT_PAYMENTs_104w": "numeric",
"CLIENT_Max_of_Prior_Applications_AMT_CREDIT_To_AMT_APPLICATIONs_104w": "numeric",
"CLIENT_Max_of_Prior_Applications_Application-Credit_Gaps_104w": "numeric",
"CLIENT_Max_of_Prior_Applications_CNT_PAYMENTs_104w": "numeric",
"CLIENT_Pct_of_Prior_Applications_is_Refused_Status_52w": "numeric",
"CLIENT_Pct_of_Prior_Applications_is_Refused_Status_Cash_loans_Contract_type_104w": "numeric",
"CLIENT_Sum_of_Approved_Status_Prior_Applications_AMT_ANNUITYs_To_Sum_of_Approved_Status_Prior_Applications_AMT_CREDITs_104w": "numeric",
"CLIENT_Time_Since_Latest_Prior_Application_DECISION_DATE_104w": "numeric",
"CLIENT_vs_OVERALL_Count_of_Approved_Status_Cash_loans_Contract_type_Prior_Applications_by_Prior_Application_PRODUCT_COMBINATION_104w": "numeric",
"NEW_APPLICATION_CODE_GENDER": "categorical",
"NEW_APPLICATION_EDUCATION_TYPE": "categorical",
"NEW_APPLICATION_INCOME_TYPE": "categorical",
"NEW_APPLICATION_OCCUPATION_TYPE": "categorical",
"NEW_APPLICATION_ORGANIZATION_TYPE": "categorical",
"CLIENT_Consumer_Loan_GOODS_CATEGORY_with_Lowest_sum_of_active_Consumer_Loans_AMT_CREDITs": "categorical",
"CLIENT_Latest_Refused_Status_Cash_loans_Contract_type_Prior_Application_YIELD_GROUP_104w": "categorical",
"CLIENT_Consumer_installments_records_AMT_PAYMENTs_by_Consumer_installments_record_INSTALLMENT_STATUS_6cMo": "dictionary",
"CLIENT_Count_of_Consumer_installments_records_by_Consumer_Loan_YIELD_GROUP_24cMo": "dictionary",
"CLIENT_Approved_Status_Prior_Applications_AMT_CREDITs_by_Prior_Application_YIELD_GROUP_104w": "dictionary",
"CLIENT_Count_of_Cash_loans_Contract_type_Prior_Applications_by_Prior_Application_YIELD_GROUP_104w": "dictionary",
"CLIENT_Count_of_Prior_Applications_by_Prior_Application_YIELD_GROUP_104w": "dictionary",
}
Run LightGBM¶
In [7]:
Copied!
target_train = training_data.Loan_Default
features_train = training_data.drop(columns=['POINT_IN_TIME', 'NEW_APPLICATION_ID', 'Loan_Default'])
target_test = holdout_data.Loan_Default
features_test = holdout_data.drop(columns=['POINT_IN_TIME', 'NEW_APPLICATION_ID', 'Loan_Default'])
lgbm_pipeline = LightGBMPipeline(objective=Objective.BINARY, eval_metric=Metric.AUC)
lgbm_pipeline.train(
df_train=features_train,
df_test=features_test,
y_train=target_train,
y_test=target_test,
feature_types=feature_types,
)
predictions = lgbm_pipeline.predict(features_test)
print(f"AUC: {roc_auc_score(target_test, predictions)}")
target_train = training_data.Loan_Default
features_train = training_data.drop(columns=['POINT_IN_TIME', 'NEW_APPLICATION_ID', 'Loan_Default'])
target_test = holdout_data.Loan_Default
features_test = holdout_data.drop(columns=['POINT_IN_TIME', 'NEW_APPLICATION_ID', 'Loan_Default'])
lgbm_pipeline = LightGBMPipeline(objective=Objective.BINARY, eval_metric=Metric.AUC)
lgbm_pipeline.train(
df_train=features_train,
df_test=features_test,
y_train=target_train,
y_test=target_test,
feature_types=feature_types,
)
predictions = lgbm_pipeline.predict(features_test)
print(f"AUC: {roc_auc_score(target_test, predictions)}")
AUC: 0.7405204792668864
SHAP Importance¶
In [8]:
Copied!
pd.set_option('display.max_rows', None) # Show all rows
pd.set_option('display.max_colwidth', 100) # Show full column content
pd.set_option('display.max_rows', None) # Show all rows
pd.set_option('display.max_colwidth', 100) # Show full column content
In [9]:
Copied!
_, feature_importance = lgbm_pipeline.compute_shap(features_train.sample(n=10000, random_state=88))
feature_importance[["feature", "cumulative_importance_percent"]]
_, feature_importance = lgbm_pipeline.compute_shap(features_train.sample(n=10000, random_state=88))
feature_importance[["feature", "cumulative_importance_percent"]]
Out[9]:
feature | cumulative_importance_percent | |
---|---|---|
0 | NEW_APPLICATION_DAYS_EMPLOYED | 0.067910 |
1 | NEW_APPLICATION_AMT_ANNUITY_To_AMT_CREDIT | 0.120227 |
2 | NEW_APPLICATION_CODE_GENDER | 0.171455 |
3 | NEW_APPLICATION_DAYS_LAST_PHONE_CHANGE | 0.220847 |
4 | NEW_APPLICATION_EDUCATION_TYPE | 0.265415 |
5 | NEW_APPLICATION_Time_To_BIRTHDATE | 0.307860 |
6 | NEW_APPLICATION_REGION_POPULATION_RELATIVE | 0.346151 |
7 | NEW_APPLICATION_AMT_GOODS_VALUE_To_AMT_CREDIT | 0.380739 |
8 | NEW_APPLICATION_Credit-Goods_Gap | 0.407075 |
9 | NEW_APPLICATION_AMT_ANNUITY_To_AMT_GOODS_VALUE | 0.433297 |
10 | NEW_APPLICATION_FLOORSMAX_MEDI | 0.459445 |
11 | CLIENT_Max_of_Prior_Applications_AMT_CREDIT_To_AMT_APPLICATIONs_104w | 0.484454 |
12 | NEW_APPLICATION_AMT_ANNUITY | 0.508202 |
13 | CLIENT_Count_of_Consumer_installments_records_by_Consumer_Loan_YIELD_GROUP_24cMo | 0.528123 |
14 | CLIENT_Consumer_installments_records_AMT_PAYMENTs_by_Consumer_installments_record_INSTALLMENT_ST... | 0.547329 |
15 | CLIENT_Latest_Revolving_loans_Contract_type_Prior_Application_AMT_APPLICATION_104w | 0.566244 |
16 | CLIENT_Max_of_Consumer_installments_records_Payment_Delays_24cMo | 0.584885 |
17 | CLIENT_Max_of_Consumer_Loan_terminations_Consumer_Loan_AMT_ANNUITYs_104w | 0.602186 |
18 | CLIENT_Count_of_Prior_Applications_by_Prior_Application_YIELD_GROUP_104w | 0.619358 |
19 | CLIENT_Max_of_Prior_Applications_CNT_PAYMENTs_104w | 0.636254 |
20 | NEW_APPLICATION_FLAG_DOCUMENT_3 | 0.653004 |
21 | NEW_APPLICATION_ORGANIZATION_TYPE | 0.669505 |
22 | CLIENT_Time_Since_Latest_Prior_Application_DECISION_DATE_104w | 0.685927 |
23 | CLIENT_Max_of_Time_between_2_Prior_Applications_for_the_Client_104w | 0.701935 |
24 | NEW_APPLICATION_AMT_REQ_CREDIT_BUREAU_QRT | 0.717486 |
25 | CLIENT_Min_of_Consumer_installments_records_Consumer_Loan_AMT_CREDIT_To_AMT_APPLICATIONs_24cMo | 0.732970 |
26 | CLIENT_Avg_of_Consumer_installments_records_AMT_PAYMENTs_24cMo | 0.748362 |
27 | CLIENT_Avg_of_Late_Status_Consumer_installments_records_AMT_PAYMENTs_24cMo | 0.763642 |
28 | CLIENT_Count_of_Cash_loans_Contract_type_Prior_Applications_by_Prior_Application_YIELD_GROUP_104w | 0.777910 |
29 | CLIENT_vs_OVERALL_Count_of_Approved_Status_Cash_loans_Contract_type_Prior_Applications_by_Prior_... | 0.791514 |
30 | NEW_APPLICATION_INCOME_TYPE | 0.805046 |
31 | CLIENT_Approved_Status_Prior_Applications_AMT_CREDITs_by_Prior_Application_YIELD_GROUP_104w | 0.818565 |
32 | CLIENT_Avg_of_Approved_Status_Prior_Applications_CNT_PAYMENTs_104w | 0.831819 |
33 | NEW_APPLICATION_OCCUPATION_TYPE | 0.844805 |
34 | CLIENT_Max_of_Consumer_installments_records_Consumer_Loan_AMT_APPLICATIONs_24cMo | 0.856838 |
35 | CLIENT_Sum_of_Approved_Status_Prior_Applications_AMT_ANNUITYs_To_Sum_of_Approved_Status_Prior_Ap... | 0.868432 |
36 | NEW_APPLICATION_AMT_CREDIT | 0.879934 |
37 | CLIENT_Pct_of_Prior_Applications_is_Refused_Status_52w | 0.891300 |
38 | CLIENT_Pct_of_Prior_Applications_is_Refused_Status_Cash_loans_Contract_type_104w | 0.901749 |
39 | CLIENT_Min_of_Consumer_installments_records_AMT_PAYMENTs_12cMo | 0.912137 |
40 | CLIENT_Max_of_Approved_Status_Prior_Applications_AMT_ANNUITYs_104w | 0.921494 |
41 | CLIENT_Max_of_Time_between_2_Approved_Status_Prior_Applications_for_the_Client_104w | 0.930813 |
42 | CLIENT_Avg_of_Approved_Status_Prior_Applications_AMT_ANNUITYs_104w | 0.940113 |
43 | CLIENT_Latest_Refused_Status_Cash_loans_Contract_type_Prior_Application_YIELD_GROUP_104w | 0.949050 |
44 | CLIENT_Min_of_Approved_Status_Cash_loans_Contract_type_Prior_Applications_AMT_ANNUITY_To_AMT_CRE... | 0.957822 |
45 | CLIENT_Temporal_Mean_of_Sums_of_Consumer_installments_records_Consumer_Loan_AMT_ANNUITYs_24cMo | 0.966514 |
46 | NEW_APPLICATION_DAYS_REGISTRATION | 0.975172 |
47 | CLIENT_Max_of_Prior_Applications_Application-Credit_Gaps_104w | 0.983529 |
48 | CLIENT_Avg_of_Prior_Applications_CNT_PAYMENTs_104w | 0.991302 |
49 | CLIENT_Min_of_Cash_loans_Contract_type_Prior_Applications_CNT_PAYMENTs_104w | 0.998942 |
50 | CLIENT_Consumer_Loan_GOODS_CATEGORY_with_Lowest_sum_of_active_Consumer_Loans_AMT_CREDITs | 1.000000 |
Iterate¶
Check out Credit Default UI Tutorials to get more ideas.
Explore features with the two source tables we left for you to explore.
Table | Description |
---|---|
CASH_LOAN_STATUS | Tracks cash loans status. |
CASH_INSTALLMENTS | Logs monthly installments for cash loans at the time of payment. |
In [ ]:
Copied!