8. Create Lookup Features
Create Lookup features¶
Now, let's dive into basic feature engineering.
The most straightforward features we can craft with FeatureByte are known as lookup features. These are either direct columns taken from the source table or simple transforms that don't require any aggregations.
We will declare 19 features from the NEW_APPLICATION table.
Activate catalog¶
In [1]:
Copied!
import featurebyte as fb
# Set your profile to the tutorial environment
fb.use_profile("tutorial")
catalog_name = "Credit Default Dataset SDK Tutorial"
catalog = fb.Catalog.activate(catalog_name)
import featurebyte as fb
# Set your profile to the tutorial environment
fb.use_profile("tutorial")
catalog_name = "Credit Default Dataset SDK Tutorial"
catalog = fb.Catalog.activate(catalog_name)
16:41:17 | WARNING | Service endpoint is inaccessible: http://featurebyte-server:8088/ 16:41:17 | INFO | Using profile: tutorial 16:41:17 | INFO | Using configuration file at: /Users/gxav/.featurebyte/config.yaml 16:41:17 | INFO | Active profile: tutorial (https://tutorials.featurebyte.com/api/v1) 16:41:17 | INFO | SDK version: 2.1.0.dev113 16:41:17 | INFO | No catalog activated. 16:41:17 | INFO | Catalog activated: Credit Default Dataset SDK Tutorial
Get view from table¶
In [2]:
Copied!
# Get view from NEW_APPLICATION dimension table.
new_application_view = catalog.get_view("NEW_APPLICATION")
# Get view from NEW_APPLICATION dimension table.
new_application_view = catalog.get_view("NEW_APPLICATION")
Create ratio and difference columns¶
In [3]:
Copied!
new_application_view["AMT_ANNUITY To AMT_CREDIT"] = (
new_application_view["AMT_ANNUITY"] / new_application_view["AMT_CREDIT"]
)
new_application_view["AMT_ANNUITY To AMT_CREDIT"] = (
new_application_view["AMT_ANNUITY"] / new_application_view["AMT_CREDIT"]
)
In [4]:
Copied!
new_application_view["AMT_GOODS_VALUE To AMT_CREDIT"] = (
new_application_view["AMT_GOODS_VALUE"] / new_application_view["AMT_CREDIT"]
)
new_application_view["AMT_GOODS_VALUE To AMT_CREDIT"] = (
new_application_view["AMT_GOODS_VALUE"] / new_application_view["AMT_CREDIT"]
)
In [5]:
Copied!
new_application_view["AMT_ANNUITY To AMT_GOODS_VALUE"] = (
new_application_view["AMT_ANNUITY"] / new_application_view["AMT_GOODS_VALUE"]
)
new_application_view["AMT_ANNUITY To AMT_GOODS_VALUE"] = (
new_application_view["AMT_ANNUITY"] / new_application_view["AMT_GOODS_VALUE"]
)
In [6]:
Copied!
new_application_view["Credit-Goods Gap"] = (
new_application_view["AMT_CREDIT"] - new_application_view["AMT_GOODS_VALUE"]
)
new_application_view["Credit-Goods Gap"] = (
new_application_view["AMT_CREDIT"] - new_application_view["AMT_GOODS_VALUE"]
)
In [7]:
Copied!
# Create lookup feature from BIRTHDATE column for New Application entity.
new_application_birthdate = new_application_view["BIRTHDATE"].as_feature(
"NEW_APPLICATION_BIRTHDATE"
)
# Create lookup feature from BIRTHDATE column for New Application entity.
new_application_birthdate = new_application_view["BIRTHDATE"].as_feature(
"NEW_APPLICATION_BIRTHDATE"
)
In [8]:
Copied!
# Create lookup feature from CODE_GENDER column for New Application entity.
new_application_code_gender = new_application_view["CODE_GENDER"].as_feature(
"NEW_APPLICATION_CODE_GENDER"
)
# Create lookup feature from CODE_GENDER column for New Application entity.
new_application_code_gender = new_application_view["CODE_GENDER"].as_feature(
"NEW_APPLICATION_CODE_GENDER"
)
In [9]:
Copied!
# Create lookup feature from INCOME_TYPE column for New Application entity.
new_application_income_type = new_application_view["INCOME_TYPE"].as_feature(
"NEW_APPLICATION_INCOME_TYPE"
)
# Create lookup feature from INCOME_TYPE column for New Application entity.
new_application_income_type = new_application_view["INCOME_TYPE"].as_feature(
"NEW_APPLICATION_INCOME_TYPE"
)
In [10]:
Copied!
# Create lookup feature from EDUCATION_TYPE column for New Application entity.
new_application_education_type = new_application_view["EDUCATION_TYPE"].as_feature(
"NEW_APPLICATION_EDUCATION_TYPE"
)
# Create lookup feature from EDUCATION_TYPE column for New Application entity.
new_application_education_type = new_application_view["EDUCATION_TYPE"].as_feature(
"NEW_APPLICATION_EDUCATION_TYPE"
)
In [11]:
Copied!
# Create lookup feature from OCCUPATION_TYPE column for New Application entity.
new_application_occupation_type = new_application_view["OCCUPATION_TYPE"].as_feature(
"NEW_APPLICATION_OCCUPATION_TYPE"
)
# Create lookup feature from OCCUPATION_TYPE column for New Application entity.
new_application_occupation_type = new_application_view["OCCUPATION_TYPE"].as_feature(
"NEW_APPLICATION_OCCUPATION_TYPE"
)
In [12]:
Copied!
# Create lookup feature from ORGANIZATION_TYPE column for New Application entity.
new_application_organization_type = new_application_view[
"ORGANIZATION_TYPE"
].as_feature("NEW_APPLICATION_ORGANIZATION_TYPE")
# Create lookup feature from ORGANIZATION_TYPE column for New Application entity.
new_application_organization_type = new_application_view[
"ORGANIZATION_TYPE"
].as_feature("NEW_APPLICATION_ORGANIZATION_TYPE")
In [13]:
Copied!
# Create lookup feature from AMT_CREDIT column for New Application entity.
new_application_amt_credit = new_application_view["AMT_CREDIT"].as_feature(
"NEW_APPLICATION_AMT_CREDIT"
)
# Create lookup feature from AMT_CREDIT column for New Application entity.
new_application_amt_credit = new_application_view["AMT_CREDIT"].as_feature(
"NEW_APPLICATION_AMT_CREDIT"
)
In [14]:
Copied!
# Create lookup feature from AMT_ANNUITY column for New Application entity.
new_application_amt_annuity = new_application_view["AMT_ANNUITY"].as_feature(
"NEW_APPLICATION_AMT_ANNUITY"
)
# Create lookup feature from AMT_ANNUITY column for New Application entity.
new_application_amt_annuity = new_application_view["AMT_ANNUITY"].as_feature(
"NEW_APPLICATION_AMT_ANNUITY"
)
In [15]:
Copied!
# Create lookup feature from REGION_POPULATION_RELATIVE column for New Application entity.
new_application_region_population_relative = new_application_view[
"REGION_POPULATION_RELATIVE"
].as_feature("NEW_APPLICATION_REGION_POPULATION_RELATIVE")
# Create lookup feature from REGION_POPULATION_RELATIVE column for New Application entity.
new_application_region_population_relative = new_application_view[
"REGION_POPULATION_RELATIVE"
].as_feature("NEW_APPLICATION_REGION_POPULATION_RELATIVE")
In [16]:
Copied!
# Create lookup feature from DAYS_EMPLOYED column for New Application entity.
new_application_days_employed = new_application_view["DAYS_EMPLOYED"].as_feature(
"NEW_APPLICATION_DAYS_EMPLOYED"
)
# Create lookup feature from DAYS_EMPLOYED column for New Application entity.
new_application_days_employed = new_application_view["DAYS_EMPLOYED"].as_feature(
"NEW_APPLICATION_DAYS_EMPLOYED"
)
In [17]:
Copied!
# Create lookup feature from DAYS_REGISTRATION column for New Application entity.
new_application_days_registration = new_application_view[
"DAYS_REGISTRATION"
].as_feature("NEW_APPLICATION_DAYS_REGISTRATION")
# Create lookup feature from DAYS_REGISTRATION column for New Application entity.
new_application_days_registration = new_application_view[
"DAYS_REGISTRATION"
].as_feature("NEW_APPLICATION_DAYS_REGISTRATION")
In [18]:
Copied!
# Create lookup feature from DAYS_LAST_PHONE_CHANGE column for New Application entity.
new_application_days_last_phone_change = new_application_view[
"DAYS_LAST_PHONE_CHANGE"
].as_feature("NEW_APPLICATION_DAYS_LAST_PHONE_CHANGE")
# Create lookup feature from DAYS_LAST_PHONE_CHANGE column for New Application entity.
new_application_days_last_phone_change = new_application_view[
"DAYS_LAST_PHONE_CHANGE"
].as_feature("NEW_APPLICATION_DAYS_LAST_PHONE_CHANGE")
In [19]:
Copied!
# Create lookup feature from FLOORSMAX_MEDI column for New Application entity.
new_application_floorsmax_medi = new_application_view["FLOORSMAX_MEDI"].as_feature(
"NEW_APPLICATION_FLOORSMAX_MEDI"
)
# Create lookup feature from FLOORSMAX_MEDI column for New Application entity.
new_application_floorsmax_medi = new_application_view["FLOORSMAX_MEDI"].as_feature(
"NEW_APPLICATION_FLOORSMAX_MEDI"
)
In [20]:
Copied!
# Create lookup feature from FLAG_DOCUMENT_3 column for New Application entity.
new_application_flag_document_3 = new_application_view["FLAG_DOCUMENT_3"].as_feature(
"NEW_APPLICATION_FLAG_DOCUMENT_3"
)
# Create lookup feature from FLAG_DOCUMENT_3 column for New Application entity.
new_application_flag_document_3 = new_application_view["FLAG_DOCUMENT_3"].as_feature(
"NEW_APPLICATION_FLAG_DOCUMENT_3"
)
In [21]:
Copied!
# Create lookup feature from AMT_REQ_CREDIT_BUREAU_QRT column for New Application entity.
new_application_amt_req_credit_bureau_qrt = new_application_view[
"AMT_REQ_CREDIT_BUREAU_QRT"
].as_feature("NEW_APPLICATION_AMT_REQ_CREDIT_BUREAU_QRT")
# Create lookup feature from AMT_REQ_CREDIT_BUREAU_QRT column for New Application entity.
new_application_amt_req_credit_bureau_qrt = new_application_view[
"AMT_REQ_CREDIT_BUREAU_QRT"
].as_feature("NEW_APPLICATION_AMT_REQ_CREDIT_BUREAU_QRT")
In [22]:
Copied!
# Create lookup feature from AMT_ANNUITY To AMT_CREDIT column for New Application entity.
new_application_amt_annuity_to_amt_credit = new_application_view[
"AMT_ANNUITY To AMT_CREDIT"
].as_feature("NEW_APPLICATION_AMT_ANNUITY_To_AMT_CREDIT")
# Create lookup feature from AMT_ANNUITY To AMT_CREDIT column for New Application entity.
new_application_amt_annuity_to_amt_credit = new_application_view[
"AMT_ANNUITY To AMT_CREDIT"
].as_feature("NEW_APPLICATION_AMT_ANNUITY_To_AMT_CREDIT")
In [23]:
Copied!
# Create lookup feature from AMT_GOODS_VALUE To AMT_CREDIT column for New Application entity.
new_application_amt_goods_value_to_amt_credit = new_application_view[
"AMT_GOODS_VALUE To AMT_CREDIT"
].as_feature("NEW_APPLICATION_AMT_GOODS_VALUE_To_AMT_CREDIT")
# Create lookup feature from AMT_GOODS_VALUE To AMT_CREDIT column for New Application entity.
new_application_amt_goods_value_to_amt_credit = new_application_view[
"AMT_GOODS_VALUE To AMT_CREDIT"
].as_feature("NEW_APPLICATION_AMT_GOODS_VALUE_To_AMT_CREDIT")
In [24]:
Copied!
# Create lookup feature from AMT_ANNUITY To AMT_GOODS_VALUE column for New Application entity.
new_application_amt_annuity_to_amt_goods_value = new_application_view[
"AMT_ANNUITY To AMT_GOODS_VALUE"
].as_feature("NEW_APPLICATION_AMT_ANNUITY_To_AMT_GOODS_VALUE")
# Create lookup feature from AMT_ANNUITY To AMT_GOODS_VALUE column for New Application entity.
new_application_amt_annuity_to_amt_goods_value = new_application_view[
"AMT_ANNUITY To AMT_GOODS_VALUE"
].as_feature("NEW_APPLICATION_AMT_ANNUITY_To_AMT_GOODS_VALUE")
In [25]:
Copied!
# Create lookup feature from Credit-Goods Gap column for New Application entity.
new_application_credit_goods_gap = new_application_view["Credit-Goods Gap"].as_feature(
"NEW_APPLICATION_Credit-Goods_Gap"
)
# Create lookup feature from Credit-Goods Gap column for New Application entity.
new_application_credit_goods_gap = new_application_view["Credit-Goods Gap"].as_feature(
"NEW_APPLICATION_Credit-Goods_Gap"
)
Derive Time since Lookup feature¶
In [26]:
Copied!
# Derive Time to the New Application's BIRTHDATE in days..
new_application_time_to_birthdate = (
fb.RequestColumn.point_in_time() - new_application_birthdate
).dt.day
# Name feature
new_application_time_to_birthdate.name = "NEW_APPLICATION_Time_To_BIRTHDATE"
# Derive Time to the New Application's BIRTHDATE in days..
new_application_time_to_birthdate = (
fb.RequestColumn.point_in_time() - new_application_birthdate
).dt.day
# Name feature
new_application_time_to_birthdate.name = "NEW_APPLICATION_Time_To_BIRTHDATE"
In [27]:
Copied!
fb.FeatureGroup(
[
new_application_time_to_birthdate,
new_application_region_population_relative,
new_application_organization_type,
new_application_occupation_type,
new_application_income_type,
new_application_floorsmax_medi,
new_application_flag_document_3,
new_application_education_type,
new_application_days_registration,
new_application_days_last_phone_change,
new_application_days_employed,
new_application_credit_goods_gap,
new_application_code_gender,
new_application_amt_req_credit_bureau_qrt,
new_application_amt_goods_value_to_amt_credit,
new_application_amt_credit,
new_application_amt_annuity_to_amt_goods_value,
new_application_amt_annuity_to_amt_credit,
new_application_amt_annuity,
]
).save()
fb.FeatureGroup(
[
new_application_time_to_birthdate,
new_application_region_population_relative,
new_application_organization_type,
new_application_occupation_type,
new_application_income_type,
new_application_floorsmax_medi,
new_application_flag_document_3,
new_application_education_type,
new_application_days_registration,
new_application_days_last_phone_change,
new_application_days_employed,
new_application_credit_goods_gap,
new_application_code_gender,
new_application_amt_req_credit_bureau_qrt,
new_application_amt_goods_value_to_amt_credit,
new_application_amt_credit,
new_application_amt_annuity_to_amt_goods_value,
new_application_amt_annuity_to_amt_credit,
new_application_amt_annuity,
]
).save()
Done! |████████████████████████████████████████| 100% in 12.2s (0.08%/s) Done! |████████████████████████████████████████| 100% in 6.1s (0.17%/s) Loading Feature(s) |████████████████████████████████████████| 19/19 [100%] in 0.
Add description¶
In [28]:
Copied!
new_application_time_to_birthdate.update_description(
"Time to the New Application's BIRTHDATE in days."
)
new_application_region_population_relative.update_description(
"REGION_POPULATION_RELATIVE of the New Application"
)
new_application_organization_type.update_description(
"ORGANIZATION_TYPE of the New Application"
)
new_application_occupation_type.update_description(
"OCCUPATION_TYPE of the New Application"
)
new_application_income_type.update_description("INCOME_TYPE of the New Application")
new_application_floorsmax_medi.update_description(
"FLOORSMAX_MEDI of the New Application"
)
new_application_flag_document_3.update_description(
"FLAG_DOCUMENT_3 of the New Application"
)
new_application_education_type.update_description(
"EDUCATION_TYPE of the New Application"
)
new_application_days_registration.update_description(
"DAYS_REGISTRATION of the New Application"
)
new_application_days_last_phone_change.update_description(
"DAYS_LAST_PHONE_CHANGE of the New Application"
)
new_application_days_employed.update_description("DAYS_EMPLOYED of the New Application")
new_application_credit_goods_gap.update_description(
"Credit-Goods Gap of the New Application"
)
new_application_code_gender.update_description("CODE_GENDER of the New Application")
new_application_amt_req_credit_bureau_qrt.update_description(
"AMT_REQ_CREDIT_BUREAU_QRT of the New Application"
)
new_application_amt_goods_value_to_amt_credit.update_description(
"AMT_GOODS_VALUE To AMT_CREDIT of the New Application"
)
new_application_amt_credit.update_description("AMT_CREDIT of the New Application")
new_application_amt_annuity_to_amt_goods_value.update_description(
"AMT_ANNUITY To AMT_GOODS_VALUE of the New Application"
)
new_application_amt_annuity_to_amt_credit.update_description(
"AMT_ANNUITY To AMT_CREDIT of the New Application"
)
new_application_amt_annuity.update_description("AMT_ANNUITY of the New Application")
new_application_time_to_birthdate.update_description(
"Time to the New Application's BIRTHDATE in days."
)
new_application_region_population_relative.update_description(
"REGION_POPULATION_RELATIVE of the New Application"
)
new_application_organization_type.update_description(
"ORGANIZATION_TYPE of the New Application"
)
new_application_occupation_type.update_description(
"OCCUPATION_TYPE of the New Application"
)
new_application_income_type.update_description("INCOME_TYPE of the New Application")
new_application_floorsmax_medi.update_description(
"FLOORSMAX_MEDI of the New Application"
)
new_application_flag_document_3.update_description(
"FLAG_DOCUMENT_3 of the New Application"
)
new_application_education_type.update_description(
"EDUCATION_TYPE of the New Application"
)
new_application_days_registration.update_description(
"DAYS_REGISTRATION of the New Application"
)
new_application_days_last_phone_change.update_description(
"DAYS_LAST_PHONE_CHANGE of the New Application"
)
new_application_days_employed.update_description("DAYS_EMPLOYED of the New Application")
new_application_credit_goods_gap.update_description(
"Credit-Goods Gap of the New Application"
)
new_application_code_gender.update_description("CODE_GENDER of the New Application")
new_application_amt_req_credit_bureau_qrt.update_description(
"AMT_REQ_CREDIT_BUREAU_QRT of the New Application"
)
new_application_amt_goods_value_to_amt_credit.update_description(
"AMT_GOODS_VALUE To AMT_CREDIT of the New Application"
)
new_application_amt_credit.update_description("AMT_CREDIT of the New Application")
new_application_amt_annuity_to_amt_goods_value.update_description(
"AMT_ANNUITY To AMT_GOODS_VALUE of the New Application"
)
new_application_amt_annuity_to_amt_credit.update_description(
"AMT_ANNUITY To AMT_CREDIT of the New Application"
)
new_application_amt_annuity.update_description("AMT_ANNUITY of the New Application")
See feature definition file¶
In [29]:
Copied!
new_application_time_to_birthdate.definition
new_application_time_to_birthdate.definition
Out[29]:
# Generated by SDK version: 2.1.0.post7
from bson import ObjectId
from featurebyte import AddTimestampSchema
from featurebyte import ColumnCleaningOperation
from featurebyte import DimensionTable
from featurebyte import TimestampSchema
from featurebyte.api.request_column import RequestColumn
# dimension_table name: "NEW_APPLICATION"
dimension_table = DimensionTable.get_by_id(ObjectId("67c2c74c924afe7a79ec6f24"))
dimension_view = dimension_table.get_view(
view_mode="manual",
drop_column_names=["available_at"],
column_cleaning_operations=[
ColumnCleaningOperation(
column_name="BIRTHDATE",
cleaning_operations=[
AddTimestampSchema(
timestamp_schema=TimestampSchema(
format_string="YYYY-MM-DD",
is_utc_time=False,
timezone="America/Los_Angeles",
)
)
],
)
],
)
grouped = dimension_view.as_features(
column_names=["BIRTHDATE"],
feature_names=["NEW_APPLICATION_BIRTHDATE"],
offset=None,
)
feat = grouped["NEW_APPLICATION_BIRTHDATE"]
request_col = RequestColumn.point_in_time()
feat_1 = (request_col - feat).dt.day
feat_1.name = "NEW_APPLICATION_Time_To_BIRTHDATE"
output = feat_1
output.save(_id=ObjectId("67c2c82e3bb5b209f32ea36a"))
In [ ]:
Copied!