10. Derive features from other features
Derive feature from other features¶
Features can also be created based on other features.
In this tutorial we will create a feature that measures the Z-Score of the Latest invoice Amount for the customer compared to that customer's invoices over a 28d period. This feature is derived from 3 features we created previously: CUSTOMER_Latest_invoice_Amount, CUSTOMER_Avg_of_invoice_Amount_28d and CUSTOMER_Std_of_invoice_Amount_28d.
In [1]:
Copied!
import featurebyte as fb
# Set your profile to the tutorial environment
fb.use_profile("tutorial")
catalog_name = "Grocery Dataset Tutorial"
catalog = fb.Catalog.activate(catalog_name)
import featurebyte as fb
# Set your profile to the tutorial environment
fb.use_profile("tutorial")
catalog_name = "Grocery Dataset Tutorial"
catalog = fb.Catalog.activate(catalog_name)
16:43:20 | INFO | Using configuration file at: /Users/viktor/.featurebyte/config.yaml 16:43:20 | INFO | Active profile: tutorial (https://tutorials.featurebyte.com/api/v1) 16:43:20 | INFO | SDK version: 0.6.0.dev121 16:43:20 | INFO | No catalog activated. 16:43:20 | INFO | 10 feature lists, 59 features deployed 16:43:20 | INFO | Using profile: tutorial 16:43:21 | INFO | Using configuration file at: /Users/viktor/.featurebyte/config.yaml 16:43:21 | INFO | Active profile: tutorial (https://tutorials.featurebyte.com/api/v1) 16:43:21 | INFO | SDK version: 0.6.0.dev121 16:43:21 | INFO | No catalog activated. 16:43:21 | INFO | 10 feature lists, 59 features deployed 16:43:21 | INFO | Catalog activated: Grocery Dataset Tutorial
Get features we previously created and saved¶
In [2]:
Copied!
customer_latest_invoice_amount = catalog.get_feature("CUSTOMER_Latest_invoice_Amount")
customer_avg_of_invoice_amount_28d = catalog.get_feature("CUSTOMER_Avg_of_invoice_Amount_28d")
customer_std_of_invoice_amount_28d = catalog.get_feature("CUSTOMER_Std_of_invoice_Amount_28d")
customer_latest_invoice_amount = catalog.get_feature("CUSTOMER_Latest_invoice_Amount")
customer_avg_of_invoice_amount_28d = catalog.get_feature("CUSTOMER_Avg_of_invoice_Amount_28d")
customer_std_of_invoice_amount_28d = catalog.get_feature("CUSTOMER_Std_of_invoice_Amount_28d")
Derive a z-score feature from saved features¶
In [3]:
Copied!
customer_latest_invoice_amount_Z_score_to_invoice_amount_28d = (
customer_latest_invoice_amount
- customer_avg_of_invoice_amount_28d
) / customer_std_of_invoice_amount_28d
# Give a name to new feature
customer_latest_invoice_amount_Z_score_to_invoice_amount_28d.name = \
"CUSTOMER_Latest_invoice_Amount_Z_Score_to_invoice_Amount_28d"
customer_latest_invoice_amount_Z_score_to_invoice_amount_28d = (
customer_latest_invoice_amount
- customer_avg_of_invoice_amount_28d
) / customer_std_of_invoice_amount_28d
# Give a name to new feature
customer_latest_invoice_amount_Z_score_to_invoice_amount_28d.name = \
"CUSTOMER_Latest_invoice_Amount_Z_Score_to_invoice_Amount_28d"
Preview feature¶
In [4]:
Copied!
customer_latest_invoice_amount_Z_score_to_invoice_amount_28d.primary_entity
customer_latest_invoice_amount_Z_score_to_invoice_amount_28d.primary_entity
Out[4]:
[<featurebyte.api.entity.Entity at 0x149b56c80> { 'name': 'customer', 'created_at': '2023-11-27T15:39:09.477000', 'updated_at': '2023-11-27T15:39:19.968000', 'description': None, 'serving_names': [ 'GROCERYCUSTOMERGUID' ], 'catalog_name': 'Grocery Dataset Tutorial' }]
In [5]:
Copied!
# Get observation table: 'Preview Table with 10 Customers'
preview_table = catalog.get_observation_table("Preview Table with 10 Customers").to_pandas()
# Get observation table: 'Preview Table with 10 Customers'
preview_table = catalog.get_observation_table("Preview Table with 10 Customers").to_pandas()
Downloading table |████████████████████████████████████████| 10/10 [100%] in 0.1
In [6]:
Copied!
# Preview CUSTOMER_Age
customer_latest_invoice_amount_Z_score_to_invoice_amount_28d.preview(preview_table)
# Preview CUSTOMER_Age
customer_latest_invoice_amount_Z_score_to_invoice_amount_28d.preview(preview_table)
Out[6]:
POINT_IN_TIME | GROCERYCUSTOMERGUID | CUSTOMER_Latest_invoice_Amount_Z_Score_to_invoice_Amount_28d | |
---|---|---|---|
0 | 2022-11-28 11:36:31 | d4559f7d-eb28-42c6-b47d-847de24952c2 | NaN |
1 | 2022-10-09 15:47:55 | 3f8c7c4c-f2c2-408e-a08e-622de3d3a0b9 | NaN |
2 | 2022-09-14 15:42:42 | 35390325-8443-43c1-a934-18db923d9a47 | -0.838057 |
3 | 2022-12-26 18:39:46 | 4eb4ee84-ee13-4eec-9c26-61b6eb4ba35b | 0.992653 |
4 | 2022-12-06 08:47:43 | e42fa5f3-7737-4c6a-9ef4-856f113e60bd | 1.530644 |
5 | 2022-11-09 12:14:40 | 8440debb-6abc-4adc-8c6c-749928141fd0 | NaN |
6 | 2022-10-12 17:32:15 | 8a54e527-e9a4-47a9-a28f-8b3c6ecc02db | -0.049459 |
7 | 2023-01-01 11:51:28 | cea213d4-36e4-48c3-ae8d-c7a25911e11c | -0.530470 |
8 | 2023-02-05 15:48:23 | 3b4f2821-b761-40e9-a32a-5f09685cc597 | 0.024855 |
9 | 2023-03-10 16:15:46 | 91a64566-e212-4e36-8f23-c1f1f324a301 | -0.743774 |
Save feature¶
In [7]:
Copied!
customer_latest_invoice_amount_Z_score_to_invoice_amount_28d.save()
customer_latest_invoice_amount_Z_score_to_invoice_amount_28d.save()
Done! |████████████████████████████████████████| 100% in 6.5s (0.16%/s)
Add description and view definition file¶
In [8]:
Copied!
# Add description
customer_latest_invoice_amount_Z_score_to_invoice_amount_28d.update_description(
"Z-Score of the Latest invoice Amount for the customer compared to customer invoices over a 28d period"
)
# See feature definition file
customer_latest_invoice_amount_Z_score_to_invoice_amount_28d.definition
# Add description
customer_latest_invoice_amount_Z_score_to_invoice_amount_28d.update_description(
"Z-Score of the Latest invoice Amount for the customer compared to customer invoices over a 28d period"
)
# See feature definition file
customer_latest_invoice_amount_Z_score_to_invoice_amount_28d.definition
Out[8]:
# Generated by SDK version: 0.6.0.dev121
from bson import ObjectId
from featurebyte import ColumnCleaningOperation
from featurebyte import DisguisedValueImputation
from featurebyte import EventTable
from featurebyte import FeatureJobSetting
from featurebyte import ValueBeyondEndpointImputation
# event_table name: "GROCERYINVOICE"
event_table = EventTable.get_by_id(ObjectId("6564b7ebbeba6c193e0fe3bc"))
event_view = event_table.get_view(
view_mode="manual",
drop_column_names=["record_available_at"],
column_cleaning_operations=[
ColumnCleaningOperation(
column_name="Amount",
cleaning_operations=[
DisguisedValueImputation(
imputed_value=None, disguised_values=[-99.0, -98.0]
),
ValueBeyondEndpointImputation(
type="less_than", end_point=0.0, imputed_value=0.0
),
ValueBeyondEndpointImputation(
type="greater_than", end_point=2000.0, imputed_value=2000.0
),
],
)
],
)
grouped = event_view.groupby(
by_keys=["GroceryCustomerGuid"], category=None
).aggregate_over(
value_column="Amount",
method="latest",
windows=[None],
feature_names=["CUSTOMER_Latest_invoice_Amount"],
feature_job_setting=FeatureJobSetting(
blind_spot="120s", frequency="3600s", time_modulo_frequency="120s"
),
skip_fill_na=True,
)
feat = grouped["CUSTOMER_Latest_invoice_Amount"]
grouped_1 = event_view.groupby(
by_keys=["GroceryCustomerGuid"], category=None
).aggregate_over(
value_column="Amount",
method="avg",
windows=["28d"],
feature_names=["CUSTOMER_Avg_of_invoice_Amount_28d"],
feature_job_setting=FeatureJobSetting(
blind_spot="120s", frequency="3600s", time_modulo_frequency="120s"
),
skip_fill_na=True,
)
feat_1 = grouped_1["CUSTOMER_Avg_of_invoice_Amount_28d"]
grouped_2 = event_view.groupby(
by_keys=["GroceryCustomerGuid"], category=None
).aggregate_over(
value_column="Amount",
method="std",
windows=["28d"],
feature_names=["CUSTOMER_Std_of_invoice_Amount_28d"],
feature_job_setting=FeatureJobSetting(
blind_spot="120s", frequency="3600s", time_modulo_frequency="120s"
),
skip_fill_na=True,
)
feat_2 = grouped_2["CUSTOMER_Std_of_invoice_Amount_28d"]
feat_3 = (feat - feat_1) / feat_2
feat_3.name = "CUSTOMER_Latest_invoice_Amount_Z_Score_to_invoice_Amount_28d"
output = feat_3
output.save(_id=ObjectId("6564b91c69ee318ce739930c"))