9. Create window aggregate features
Create window aggregate features¶
Next feature type we will consider is window aggregate feature. These are features generated by aggregating data within specific time frame.
In [1]:
Copied!
import featurebyte as fb
# Set your profile to the tutorial environment
fb.use_profile("tutorial")
catalog_name = "Grocery Dataset Tutorial"
catalog = fb.Catalog.activate(catalog_name)
import featurebyte as fb
# Set your profile to the tutorial environment
fb.use_profile("tutorial")
catalog_name = "Grocery Dataset Tutorial"
catalog = fb.Catalog.activate(catalog_name)
15:30:30 | INFO | SDK version: 1.0.2.dev46 15:30:30 | INFO | No catalog activated. 15:30:30 | INFO | Using profile: tutorial 15:30:30 | INFO | Using configuration file at: /Users/gxav/.featurebyte/config.yaml 15:30:30 | INFO | Active profile: tutorial (https://tutorials.featurebyte.com/api/v1) 15:30:30 | INFO | SDK version: 1.0.2.dev46 15:30:30 | INFO | No catalog activated. 15:30:31 | INFO | Catalog activated: Grocery Dataset Tutorial
In [2]:
Copied!
# Set desired windows
windows = ['14d', '28d']
# Set desired windows
windows = ['14d', '28d']
Do window aggregation from INVOICEITEMS¶
Let's start with some aggregations from the items view and create features for the interaction between Customer and Product Group.
In [3]:
Copied!
# Get view from GROCERYPRODUCT dimension table.
groceryproduct_view = catalog.get_view("GROCERYPRODUCT")
# Get view from INVOICEITEMS item table.
invoiceitems_view = catalog.get_view("INVOICEITEMS")
# Join GROCERYPRODUCT view to INVOICEITEMS view.
invoiceitems_view = invoiceitems_view.join(groceryproduct_view, rsuffix="")
# Get view from GROCERYPRODUCT dimension table.
groceryproduct_view = catalog.get_view("GROCERYPRODUCT")
# Get view from INVOICEITEMS item table.
invoiceitems_view = catalog.get_view("INVOICEITEMS")
# Join GROCERYPRODUCT view to INVOICEITEMS view.
invoiceitems_view = invoiceitems_view.join(groceryproduct_view, rsuffix="")
In [4]:
Copied!
# Group GROCERYINVOICE view by customer entity (GroceryCustomerGuid) and productgroup entity (ProductGroup).
invoiceitems_view_by_customer_x_productgroup = invoiceitems_view.groupby(
['GroceryCustomerGuid', 'ProductGroup']
)
# Group GROCERYINVOICE view by customer entity (GroceryCustomerGuid) and productgroup entity (ProductGroup).
invoiceitems_view_by_customer_x_productgroup = invoiceitems_view.groupby(
['GroceryCustomerGuid', 'ProductGroup']
)
In [5]:
Copied!
# Get Sum of TotalCost for the customer x productgroup over time.
customer_productgroup_sum_of_totalcost_14d_28d = \
invoiceitems_view_by_customer_x_productgroup.aggregate_over(
"TotalCost", method="sum",
feature_names=[
"CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost"
+ "_" + w for w in windows
],
fill_value=0,
windows=windows
)
# Get Sum of TotalCost for the customer x productgroup over time.
customer_productgroup_sum_of_totalcost_14d_28d = \
invoiceitems_view_by_customer_x_productgroup.aggregate_over(
"TotalCost", method="sum",
feature_names=[
"CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost"
+ "_" + w for w in windows
],
fill_value=0,
windows=windows
)
In [6]:
Copied!
# Get Latest Interaction between Customer and ProductGroup
customer_x_productgroup_latest_timestamp = \
invoiceitems_view_by_customer_x_productgroup.aggregate_over(
"Timestamp", method="latest",
feature_names=["CUSTOMER_x_PRODUCTGROUP_Latest_Timestamp"],
windows=[None]
)["CUSTOMER_x_PRODUCTGROUP_Latest_Timestamp"]
# Get Latest Interaction between Customer and ProductGroup
customer_x_productgroup_latest_timestamp = \
invoiceitems_view_by_customer_x_productgroup.aggregate_over(
"Timestamp", method="latest",
feature_names=["CUSTOMER_x_PRODUCTGROUP_Latest_Timestamp"],
windows=[None]
)["CUSTOMER_x_PRODUCTGROUP_Latest_Timestamp"]
In [7]:
Copied!
# Create recency feature: Time Since Latest Interaction between Customer and Product Group
customer_x_productgroup_time_since_latest_timestamp = (
fb.RequestColumn.point_in_time()
- customer_x_productgroup_latest_timestamp
).dt.hour
customer_x_productgroup_time_since_latest_timestamp.name = \
"CUSTOMER_x_PRODUCTGROUP_Time_Since_Latest_Timestamp"
# Create recency feature: Time Since Latest Interaction between Customer and Product Group
customer_x_productgroup_time_since_latest_timestamp = (
fb.RequestColumn.point_in_time()
- customer_x_productgroup_latest_timestamp
).dt.hour
customer_x_productgroup_time_since_latest_timestamp.name = \
"CUSTOMER_x_PRODUCTGROUP_Time_Since_Latest_Timestamp"
Do window aggregation from GROCERYINVOICE¶
Now, let's do some aggregations on the invoices view for the Customer entity.
In [8]:
Copied!
# Get view from GROCERYINVOICE event table.
groceryinvoice_view = catalog.get_view("GROCERYINVOICE")
# Get view from GROCERYINVOICE event table.
groceryinvoice_view = catalog.get_view("GROCERYINVOICE")
In [9]:
Copied!
# Group GROCERYINVOICE view by customer entity (GroceryCustomerGuid).
groceryinvoice_view_by_customer = groceryinvoice_view.groupby(['GroceryCustomerGuid'])
# Group GROCERYINVOICE view by customer entity (GroceryCustomerGuid).
groceryinvoice_view_by_customer = groceryinvoice_view.groupby(['GroceryCustomerGuid'])
In [10]:
Copied!
# Get Latest invoice Amount for the customer
customer_latest_invoice_amount = groceryinvoice_view_by_customer.aggregate_over(
"Amount", method="latest",
feature_names=["CUSTOMER_Latest_invoice_Amount"],
windows=[None]
)["CUSTOMER_Latest_invoice_Amount"]
# Get Latest invoice Amount for the customer
customer_latest_invoice_amount = groceryinvoice_view_by_customer.aggregate_over(
"Amount", method="latest",
feature_names=["CUSTOMER_Latest_invoice_Amount"],
windows=[None]
)["CUSTOMER_Latest_invoice_Amount"]
In [11]:
Copied!
# Get Count of invoices for the customer
customer_count_of_invoice_14d_28d = groceryinvoice_view_by_customer.aggregate_over(
method="count",
feature_names=[
"CUSTOMER_Count_of_invoice"
+ "_" + w for w in windows
],
windows=windows
)
# Get Count of invoices for the customer
customer_count_of_invoice_14d_28d = groceryinvoice_view_by_customer.aggregate_over(
method="count",
feature_names=[
"CUSTOMER_Count_of_invoice"
+ "_" + w for w in windows
],
windows=windows
)
In [12]:
Copied!
# Get Avg of Amount for the customer over time.
customer_avg_of_invoice_amount_14d_28d = groceryinvoice_view_by_customer.aggregate_over(
"Amount", method="avg",
feature_names=[
"CUSTOMER_Avg_of_invoice_Amount"
+ "_" + w for w in windows
],
windows=windows
)
# Get Avg of Amount for the customer over time.
customer_avg_of_invoice_amount_14d_28d = groceryinvoice_view_by_customer.aggregate_over(
"Amount", method="avg",
feature_names=[
"CUSTOMER_Avg_of_invoice_Amount"
+ "_" + w for w in windows
],
windows=windows
)
In [13]:
Copied!
# Get Std of Amount for the customer over time.
customer_std_of_invoice_amount_14d_28d = groceryinvoice_view_by_customer.aggregate_over(
"Amount", method="std",
feature_names=[
"CUSTOMER_Std_of_invoice_Amount"
+ "_" + w for w in windows
],
windows=windows
)
# Get Std of Amount for the customer over time.
customer_std_of_invoice_amount_14d_28d = groceryinvoice_view_by_customer.aggregate_over(
"Amount", method="std",
feature_names=[
"CUSTOMER_Std_of_invoice_Amount"
+ "_" + w for w in windows
],
windows=windows
)
Preview a feature group¶
For convenience, we can create a feature group to preview/save all features we just created.
In [14]:
Copied!
feature_group = fb.FeatureGroup([
customer_x_productgroup_time_since_latest_timestamp,
customer_productgroup_sum_of_totalcost_14d_28d,
customer_latest_invoice_amount,
customer_count_of_invoice_14d_28d,
customer_avg_of_invoice_amount_14d_28d,
customer_std_of_invoice_amount_14d_28d,
])
feature_group = fb.FeatureGroup([
customer_x_productgroup_time_since_latest_timestamp,
customer_productgroup_sum_of_totalcost_14d_28d,
customer_latest_invoice_amount,
customer_count_of_invoice_14d_28d,
customer_avg_of_invoice_amount_14d_28d,
customer_std_of_invoice_amount_14d_28d,
])
In [15]:
Copied!
# Check the primary entity of the feature group. It should be the interaction Customer x ProductGroup.
feature_group.primary_entity
# Check the primary entity of the feature group. It should be the interaction Customer x ProductGroup.
feature_group.primary_entity
Out[15]:
[<featurebyte.api.entity.Entity at 0x107a508c0> { 'name': 'customer', 'created_at': '2024-04-26T07:28:15.128000', 'updated_at': '2024-04-26T07:28:24.021000', 'description': None, 'serving_names': [ 'GROCERYCUSTOMERGUID' ], 'catalog_name': 'Grocery Dataset Tutorial' }, <featurebyte.api.entity.Entity at 0x14a543240> { 'name': 'productgroup', 'created_at': '2024-04-26T07:28:17.313000', 'updated_at': '2024-04-26T07:28:27.128000', 'description': None, 'serving_names': [ 'PRODUCTGROUP' ], 'catalog_name': 'Grocery Dataset Tutorial' }]
In [16]:
Copied!
# Get observation table: 'Preview Table with 10 items'
preview_table = catalog.get_observation_table("Preview Table with 10 items")
# Get observation table: 'Preview Table with 10 items'
preview_table = catalog.get_observation_table("Preview Table with 10 items")
In [17]:
Copied!
# Preview feature_group
feature_group.preview(preview_table)
# Preview feature_group
feature_group.preview(preview_table)
Out[17]:
POINT_IN_TIME | GROCERYINVOICEITEMGUID | CUSTOMER_x_PRODUCTGROUP_Time_Since_Latest_Timestamp | CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost_14d | CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost_28d | CUSTOMER_Latest_invoice_Amount | CUSTOMER_Count_of_invoice_14d | CUSTOMER_Count_of_invoice_28d | CUSTOMER_Avg_of_invoice_Amount_14d | CUSTOMER_Avg_of_invoice_Amount_28d | CUSTOMER_Std_of_invoice_Amount_14d | CUSTOMER_Std_of_invoice_Amount_28d | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2022-11-03 14:39:00 | 9faf5936-d4bb-4709-a530-c1624ec003a5 | 6314.067778 | 0.0 | 0.00 | 63.65 | 2 | 9 | 46.9650 | 23.980000 | 16.685000 | 19.449867 |
1 | 2022-08-16 15:37:21 | 6781acc6-652d-4867-a138-2d8adb278886 | NaN | 0.0 | 0.00 | 1.50 | 1 | 3 | 1.5000 | 10.873333 | 0.000000 | 11.031305 |
2 | 2022-10-05 14:30:03 | fc4769cf-41d6-4fb0-9cfc-fe30502cfa18 | 360.176111 | 0.0 | 5.63 | 14.57 | 2 | 5 | 7.9150 | 16.126000 | 6.655000 | 10.164890 |
3 | 2023-03-19 15:22:18 | dcdb7c98-dd62-4287-b432-bfe3a2317ebc | 163.990000 | 9.0 | 9.00 | 85.74 | 2 | 3 | 60.1600 | 64.773333 | 25.580000 | 21.881270 |
4 | 2023-03-13 21:11:43 | cfc620a2-0054-4b4b-99d5-040cf87cfe2d | 414.501389 | 0.0 | 3.09 | 4.29 | 2 | 9 | 3.1400 | 13.368889 | 1.150000 | 11.359725 |
5 | 2022-11-18 10:07:49 | ae2ccf38-4e5d-4c76-b1e5-04f12307e45b | NaN | 0.0 | 0.00 | 9.24 | 3 | 6 | 28.6700 | 24.143333 | 33.102335 | 25.427108 |
6 | 2022-11-09 18:07:20 | 37122b82-478b-4d9e-b236-69629b592c0b | 4448.626944 | 0.0 | 0.00 | 69.93 | 1 | 2 | 69.9300 | 80.980000 | 0.000000 | 11.050000 |
7 | 2023-02-15 21:10:22 | 5daf8edb-8625-4653-aaa0-9ac03df92017 | 1705.355000 | 0.0 | 0.00 | 24.04 | 2 | 3 | 35.8850 | 35.370000 | 11.845000 | 9.698787 |
8 | 2023-02-27 13:27:47 | b807e05c-ff1c-4fb3-a760-e0e8ce29c859 | 316.402222 | 1.0 | 1.00 | 4.80 | 4 | 8 | 10.8925 | 8.286250 | 7.927192 | 6.802988 |
9 | 2023-04-19 15:25:05 | 8a60f455-aff1-4b4e-8c63-9ab89df2715d | 503.857500 | 0.0 | 2.89 | 3.39 | 1 | 3 | 3.3900 | 10.223333 | 0.000000 | 5.149319 |
Save features into catalog¶
With feature groups we can do it in one call.
In [18]:
Copied!
feature_group.save()
feature_group.save()
Done! |████████████████████████████████████████| 100% in 15.6s (0.06%/s) Done! |████████████████████████████████████████| 100% in 6.2s (0.16%/s) Loading Feature(s) |████████████████████████████████████████| 10/10 [100%] in 0.
Add description and see feature definition files¶
In [19]:
Copied!
# Add description
customer_x_productgroup_time_since_latest_timestamp.update_description(
"Time Since Latest interaction between the customer and the product group"
)
# See feature definition file
customer_x_productgroup_time_since_latest_timestamp.definition
# Add description
customer_x_productgroup_time_since_latest_timestamp.update_description(
"Time Since Latest interaction between the customer and the product group"
)
# See feature definition file
customer_x_productgroup_time_since_latest_timestamp.definition
Out[19]:
# Generated by SDK version: 1.0.2.dev46
from bson import ObjectId
from featurebyte import DimensionTable
from featurebyte import FeatureJobSetting
from featurebyte import ItemTable
from featurebyte.api.request_column import RequestColumn
# item_table name: "INVOICEITEMS", event_table name: "GROCERYINVOICE"
item_table = ItemTable.get_by_id(ObjectId("662b5778aa13c89fa14554e2"))
item_view = item_table.get_view(
event_suffix=None,
view_mode="manual",
drop_column_names=["record_available_at"],
column_cleaning_operations=[],
event_drop_column_names=["record_available_at"],
event_column_cleaning_operations=[],
event_join_column_names=[
"Timestamp",
"GroceryInvoiceGuid",
"GroceryCustomerGuid",
"tz_offset",
],
)
# dimension_table name: "GROCERYPRODUCT"
dimension_table = DimensionTable.get_by_id(ObjectId("662b577aaa13c89fa14554e3"))
dimension_view = dimension_table.get_view(
view_mode="manual", drop_column_names=[], column_cleaning_operations=[]
)
joined_view = item_view.join(
dimension_view, on="GroceryProductGuid", how="left", rsuffix="", rprefix=""
)
grouped = joined_view.groupby(
by_keys=["GroceryCustomerGuid", "ProductGroup"], category=None
).aggregate_over(
value_column="Timestamp",
method="latest",
windows=[None],
feature_names=["CUSTOMER_x_PRODUCTGROUP_Latest_Timestamp"],
feature_job_setting=FeatureJobSetting(
blind_spot="120s", frequency="3600s", time_modulo_frequency="120s"
),
skip_fill_na=True,
)
feat = grouped["CUSTOMER_x_PRODUCTGROUP_Latest_Timestamp"]
request_col = RequestColumn.point_in_time()
feat_1 = (request_col - feat).dt.hour
feat_1.name = "CUSTOMER_x_PRODUCTGROUP_Time_Since_Latest_Timestamp"
output = feat_1
output.save(_id=ObjectId("662b5819da3f1a887399f98a"))
In [20]:
Copied!
# Add description
customer_productgroup_sum_of_totalcost_14d = \
customer_productgroup_sum_of_totalcost_14d_28d["CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost_14d"]
customer_productgroup_sum_of_totalcost_14d.update_description(
"Total spent by the customer on the product group over a 14d period."
)
customer_productgroup_sum_of_totalcost_28d = \
customer_productgroup_sum_of_totalcost_14d_28d["CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost_28d"]
customer_productgroup_sum_of_totalcost_28d.update_description(
"Total spent by the customer on the product group over a 28d period."
)
# See feature definition file
customer_productgroup_sum_of_totalcost_28d.definition
# Add description
customer_productgroup_sum_of_totalcost_14d = \
customer_productgroup_sum_of_totalcost_14d_28d["CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost_14d"]
customer_productgroup_sum_of_totalcost_14d.update_description(
"Total spent by the customer on the product group over a 14d period."
)
customer_productgroup_sum_of_totalcost_28d = \
customer_productgroup_sum_of_totalcost_14d_28d["CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost_28d"]
customer_productgroup_sum_of_totalcost_28d.update_description(
"Total spent by the customer on the product group over a 28d period."
)
# See feature definition file
customer_productgroup_sum_of_totalcost_28d.definition
Out[20]:
# Generated by SDK version: 1.0.2.dev46
from bson import ObjectId
from featurebyte import DimensionTable
from featurebyte import FeatureJobSetting
from featurebyte import ItemTable
# item_table name: "INVOICEITEMS", event_table name: "GROCERYINVOICE"
item_table = ItemTable.get_by_id(ObjectId("662b5778aa13c89fa14554e2"))
item_view = item_table.get_view(
event_suffix=None,
view_mode="manual",
drop_column_names=["record_available_at"],
column_cleaning_operations=[],
event_drop_column_names=["record_available_at"],
event_column_cleaning_operations=[],
event_join_column_names=[
"Timestamp",
"GroceryInvoiceGuid",
"GroceryCustomerGuid",
"tz_offset",
],
)
# dimension_table name: "GROCERYPRODUCT"
dimension_table = DimensionTable.get_by_id(ObjectId("662b577aaa13c89fa14554e3"))
dimension_view = dimension_table.get_view(
view_mode="manual", drop_column_names=[], column_cleaning_operations=[]
)
joined_view = item_view.join(
dimension_view, on="GroceryProductGuid", how="left", rsuffix="", rprefix=""
)
grouped = joined_view.groupby(
by_keys=["GroceryCustomerGuid", "ProductGroup"], category=None
).aggregate_over(
value_column="TotalCost",
method="sum",
windows=["28d"],
feature_names=["CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost_28d"],
feature_job_setting=FeatureJobSetting(
blind_spot="120s", frequency="3600s", time_modulo_frequency="120s"
),
skip_fill_na=True,
)
feat = grouped["CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost_28d"]
feat_1 = feat.copy()
feat_1[feat.isnull()] = 0
feat_1.name = "CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost_28d"
output = feat_1
output.save(_id=ObjectId("662b5819da3f1a887399f986"))
In [21]:
Copied!
# Add description
customer_latest_invoice_amount.update_description("Latest invoice Amount for the customer")
# See feature definition file
customer_latest_invoice_amount.definition
# Add description
customer_latest_invoice_amount.update_description("Latest invoice Amount for the customer")
# See feature definition file
customer_latest_invoice_amount.definition
Out[21]:
# Generated by SDK version: 1.0.2.dev46
from bson import ObjectId
from featurebyte import ColumnCleaningOperation
from featurebyte import DisguisedValueImputation
from featurebyte import EventTable
from featurebyte import FeatureJobSetting
from featurebyte import ValueBeyondEndpointImputation
# event_table name: "GROCERYINVOICE"
event_table = EventTable.get_by_id(ObjectId("662b5775aa13c89fa14554e1"))
event_view = event_table.get_view(
view_mode="manual",
drop_column_names=["record_available_at"],
column_cleaning_operations=[
ColumnCleaningOperation(
column_name="Amount",
cleaning_operations=[
DisguisedValueImputation(
imputed_value=None, disguised_values=[-99.0, -98.0]
),
ValueBeyondEndpointImputation(
type="less_than", end_point=0.0, imputed_value=0.0
),
ValueBeyondEndpointImputation(
type="greater_than", end_point=2000.0, imputed_value=2000.0
),
],
)
],
)
grouped = event_view.groupby(
by_keys=["GroceryCustomerGuid"], category=None
).aggregate_over(
value_column="Amount",
method="latest",
windows=[None],
feature_names=["CUSTOMER_Latest_invoice_Amount"],
feature_job_setting=FeatureJobSetting(
blind_spot="120s", frequency="3600s", time_modulo_frequency="120s"
),
skip_fill_na=True,
)
feat = grouped["CUSTOMER_Latest_invoice_Amount"]
output = feat
output.save(_id=ObjectId("662b581ada3f1a887399f98b"))
In [22]:
Copied!
# Add description
customer_count_of_invoice_14d = customer_count_of_invoice_14d_28d["CUSTOMER_Count_of_invoice_14d"]
customer_count_of_invoice_14d.update_description(
"Count of invoice for the customer over a 14d period."
)
customer_count_of_invoice_28d = customer_count_of_invoice_14d_28d["CUSTOMER_Count_of_invoice_28d"]
customer_count_of_invoice_28d.update_description(
"Count of invoice for the customer over a 28d period."
)
# See feature definition file
customer_count_of_invoice_28d.definition
# Add description
customer_count_of_invoice_14d = customer_count_of_invoice_14d_28d["CUSTOMER_Count_of_invoice_14d"]
customer_count_of_invoice_14d.update_description(
"Count of invoice for the customer over a 14d period."
)
customer_count_of_invoice_28d = customer_count_of_invoice_14d_28d["CUSTOMER_Count_of_invoice_28d"]
customer_count_of_invoice_28d.update_description(
"Count of invoice for the customer over a 28d period."
)
# See feature definition file
customer_count_of_invoice_28d.definition
Out[22]:
# Generated by SDK version: 1.0.2.dev46
from bson import ObjectId
from featurebyte import EventTable
from featurebyte import FeatureJobSetting
# event_table name: "GROCERYINVOICE"
event_table = EventTable.get_by_id(ObjectId("662b5775aa13c89fa14554e1"))
event_view = event_table.get_view(
view_mode="manual",
drop_column_names=["record_available_at"],
column_cleaning_operations=[],
)
grouped = event_view.groupby(
by_keys=["GroceryCustomerGuid"], category=None
).aggregate_over(
value_column=None,
method="count",
windows=["28d"],
feature_names=["CUSTOMER_Count_of_invoice_28d"],
feature_job_setting=FeatureJobSetting(
blind_spot="120s", frequency="3600s", time_modulo_frequency="120s"
),
skip_fill_na=True,
)
feat = grouped["CUSTOMER_Count_of_invoice_28d"]
feat_1 = feat.copy()
feat_1[feat.isnull()] = 0
feat_1.name = "CUSTOMER_Count_of_invoice_28d"
output = feat_1
output.save(_id=ObjectId("662b581ada3f1a887399f98e"))
In [23]:
Copied!
# Add description
customer_avg_of_invoice_amount_14d = customer_avg_of_invoice_amount_14d_28d["CUSTOMER_Avg_of_invoice_Amount_14d"]
customer_avg_of_invoice_amount_14d.update_description(
"Avg of invoice Amount for the customer over a 14d period."
)
customer_avg_of_invoice_amount_28d = customer_avg_of_invoice_amount_14d_28d["CUSTOMER_Avg_of_invoice_Amount_28d"]
customer_avg_of_invoice_amount_28d.update_description(
"Avg of invoice Amount for the customer over a 28d period."
)
# See feature definition file
customer_avg_of_invoice_amount_28d.definition
# Add description
customer_avg_of_invoice_amount_14d = customer_avg_of_invoice_amount_14d_28d["CUSTOMER_Avg_of_invoice_Amount_14d"]
customer_avg_of_invoice_amount_14d.update_description(
"Avg of invoice Amount for the customer over a 14d period."
)
customer_avg_of_invoice_amount_28d = customer_avg_of_invoice_amount_14d_28d["CUSTOMER_Avg_of_invoice_Amount_28d"]
customer_avg_of_invoice_amount_28d.update_description(
"Avg of invoice Amount for the customer over a 28d period."
)
# See feature definition file
customer_avg_of_invoice_amount_28d.definition
Out[23]:
# Generated by SDK version: 1.0.2.dev46
from bson import ObjectId
from featurebyte import ColumnCleaningOperation
from featurebyte import DisguisedValueImputation
from featurebyte import EventTable
from featurebyte import FeatureJobSetting
from featurebyte import ValueBeyondEndpointImputation
# event_table name: "GROCERYINVOICE"
event_table = EventTable.get_by_id(ObjectId("662b5775aa13c89fa14554e1"))
event_view = event_table.get_view(
view_mode="manual",
drop_column_names=["record_available_at"],
column_cleaning_operations=[
ColumnCleaningOperation(
column_name="Amount",
cleaning_operations=[
DisguisedValueImputation(
imputed_value=None, disguised_values=[-99.0, -98.0]
),
ValueBeyondEndpointImputation(
type="less_than", end_point=0.0, imputed_value=0.0
),
ValueBeyondEndpointImputation(
type="greater_than", end_point=2000.0, imputed_value=2000.0
),
],
)
],
)
grouped = event_view.groupby(
by_keys=["GroceryCustomerGuid"], category=None
).aggregate_over(
value_column="Amount",
method="avg",
windows=["28d"],
feature_names=["CUSTOMER_Avg_of_invoice_Amount_28d"],
feature_job_setting=FeatureJobSetting(
blind_spot="120s", frequency="3600s", time_modulo_frequency="120s"
),
skip_fill_na=True,
)
feat = grouped["CUSTOMER_Avg_of_invoice_Amount_28d"]
output = feat
output.save(_id=ObjectId("662b581ada3f1a887399f991"))
In [24]:
Copied!
# Add description
customer_std_of_invoice_amount_14d = customer_std_of_invoice_amount_14d_28d["CUSTOMER_Std_of_invoice_Amount_14d"]
customer_std_of_invoice_amount_14d.update_description(
"Std of invoice Amount for the customer over a 14d period."
)
customer_std_of_invoice_amount_28d = customer_std_of_invoice_amount_14d_28d["CUSTOMER_Std_of_invoice_Amount_28d"]
customer_std_of_invoice_amount_28d.update_description(
"Std of invoice Amount for the customer over a 28d period."
)
# See feature definition file
customer_std_of_invoice_amount_28d.definition
# Add description
customer_std_of_invoice_amount_14d = customer_std_of_invoice_amount_14d_28d["CUSTOMER_Std_of_invoice_Amount_14d"]
customer_std_of_invoice_amount_14d.update_description(
"Std of invoice Amount for the customer over a 14d period."
)
customer_std_of_invoice_amount_28d = customer_std_of_invoice_amount_14d_28d["CUSTOMER_Std_of_invoice_Amount_28d"]
customer_std_of_invoice_amount_28d.update_description(
"Std of invoice Amount for the customer over a 28d period."
)
# See feature definition file
customer_std_of_invoice_amount_28d.definition
Out[24]:
# Generated by SDK version: 1.0.2.dev46
from bson import ObjectId
from featurebyte import ColumnCleaningOperation
from featurebyte import DisguisedValueImputation
from featurebyte import EventTable
from featurebyte import FeatureJobSetting
from featurebyte import ValueBeyondEndpointImputation
# event_table name: "GROCERYINVOICE"
event_table = EventTable.get_by_id(ObjectId("662b5775aa13c89fa14554e1"))
event_view = event_table.get_view(
view_mode="manual",
drop_column_names=["record_available_at"],
column_cleaning_operations=[
ColumnCleaningOperation(
column_name="Amount",
cleaning_operations=[
DisguisedValueImputation(
imputed_value=None, disguised_values=[-99.0, -98.0]
),
ValueBeyondEndpointImputation(
type="less_than", end_point=0.0, imputed_value=0.0
),
ValueBeyondEndpointImputation(
type="greater_than", end_point=2000.0, imputed_value=2000.0
),
],
)
],
)
grouped = event_view.groupby(
by_keys=["GroceryCustomerGuid"], category=None
).aggregate_over(
value_column="Amount",
method="std",
windows=["28d"],
feature_names=["CUSTOMER_Std_of_invoice_Amount_28d"],
feature_job_setting=FeatureJobSetting(
blind_spot="120s", frequency="3600s", time_modulo_frequency="120s"
),
skip_fill_na=True,
)
feat = grouped["CUSTOMER_Std_of_invoice_Amount_28d"]
output = feat
output.save(_id=ObjectId("662b581ada3f1a887399f993"))
In [ ]:
Copied!
In [ ]:
Copied!