9. Create Window Aggregate Features
Create window aggregate features¶
Next feature type we will consider is window aggregate feature. These are features generated by aggregating data within specific time frame.
In [1]:
Copied!
import featurebyte as fb
# Set your profile to the tutorial environment
fb.use_profile("tutorial")
catalog_name = "Grocery Dataset SDK Tutorial"
catalog = fb.Catalog.activate(catalog_name)
import featurebyte as fb
# Set your profile to the tutorial environment
fb.use_profile("tutorial")
catalog_name = "Grocery Dataset SDK Tutorial"
catalog = fb.Catalog.activate(catalog_name)
17:46:34 | INFO | SDK version: 3.3.1 17:46:34 | INFO | No catalog activated. 17:46:34 | INFO | Using profile: tutorial 17:46:34 | INFO | Using configuration file at: /Users/gxav/.featurebyte/config.yaml 17:46:34 | INFO | Active profile: tutorial (https://tutorials.featurebyte.com/api/v1) 17:46:34 | INFO | SDK version: 3.3.1 17:46:34 | INFO | No catalog activated. 17:46:34 | INFO | Catalog activated: Grocery Dataset SDK Tutorial 16:07:43 | INFO | Active profile: tutorial (https://tutorials.featurebyte.com/api/v1) 16:07:43 | WARNING | Remote SDK version (1.1.0.dev7) is different from local (1.1.0.dev1). Update local SDK to avoid unexpected behavior. 16:07:43 | INFO | No catalog activated. 16:07:43 | INFO | Catalog activated: Grocery Dataset Tutorial
In [2]:
Copied!
# Set desired windows
windows = ['14d', '28d']
# Set desired windows
windows = ['14d', '28d']
Do window aggregation from INVOICEITEMS¶
Let's start with some aggregations from the items view and create features for the interaction between Customer and Product Group.
In [3]:
Copied!
# Get view from GROCERYPRODUCT dimension table.
groceryproduct_view = catalog.get_view("GROCERYPRODUCT")
# Get view from INVOICEITEMS item table.
invoiceitems_view = catalog.get_view("INVOICEITEMS")
# Join GROCERYPRODUCT view to INVOICEITEMS view.
invoiceitems_view = invoiceitems_view.join(groceryproduct_view, rsuffix="")
# Get view from GROCERYPRODUCT dimension table.
groceryproduct_view = catalog.get_view("GROCERYPRODUCT")
# Get view from INVOICEITEMS item table.
invoiceitems_view = catalog.get_view("INVOICEITEMS")
# Join GROCERYPRODUCT view to INVOICEITEMS view.
invoiceitems_view = invoiceitems_view.join(groceryproduct_view, rsuffix="")
In [4]:
Copied!
# Group GROCERYINVOICE view by customer entity (GroceryCustomerGuid) and productgroup entity (ProductGroup).
invoiceitems_view_by_customer_x_productgroup = invoiceitems_view.groupby(
['GroceryCustomerGuid', 'ProductGroup']
)
# Group GROCERYINVOICE view by customer entity (GroceryCustomerGuid) and productgroup entity (ProductGroup).
invoiceitems_view_by_customer_x_productgroup = invoiceitems_view.groupby(
['GroceryCustomerGuid', 'ProductGroup']
)
In [5]:
Copied!
# Get Sum of TotalCost for the customer x productgroup over time.
customer_productgroup_sum_of_totalcost_14d_28d = \
invoiceitems_view_by_customer_x_productgroup.aggregate_over(
"TotalCost", method="sum",
feature_names=[
"CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost"
+ "_" + w for w in windows
],
windows=windows
)
# Get Sum of TotalCost for the customer x productgroup over time.
customer_productgroup_sum_of_totalcost_14d_28d = \
invoiceitems_view_by_customer_x_productgroup.aggregate_over(
"TotalCost", method="sum",
feature_names=[
"CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost"
+ "_" + w for w in windows
],
windows=windows
)
In [6]:
Copied!
# Get Latest Interaction between Customer and ProductGroup
customer_x_productgroup_latest_timestamp = \
invoiceitems_view_by_customer_x_productgroup.aggregate_over(
"Timestamp", method="latest",
feature_names=["CUSTOMER_x_PRODUCTGROUP_Latest_Timestamp"],
windows=[None]
)["CUSTOMER_x_PRODUCTGROUP_Latest_Timestamp"]
# Get Latest Interaction between Customer and ProductGroup
customer_x_productgroup_latest_timestamp = \
invoiceitems_view_by_customer_x_productgroup.aggregate_over(
"Timestamp", method="latest",
feature_names=["CUSTOMER_x_PRODUCTGROUP_Latest_Timestamp"],
windows=[None]
)["CUSTOMER_x_PRODUCTGROUP_Latest_Timestamp"]
In [7]:
Copied!
# Create recency feature: Time Since Latest Interaction between Customer and Product Group
customer_x_productgroup_time_since_latest_timestamp = (
fb.RequestColumn.point_in_time()
- customer_x_productgroup_latest_timestamp
).dt.hour
customer_x_productgroup_time_since_latest_timestamp.name = \
"CUSTOMER_x_PRODUCTGROUP_Time_Since_Latest_Timestamp"
# Create recency feature: Time Since Latest Interaction between Customer and Product Group
customer_x_productgroup_time_since_latest_timestamp = (
fb.RequestColumn.point_in_time()
- customer_x_productgroup_latest_timestamp
).dt.hour
customer_x_productgroup_time_since_latest_timestamp.name = \
"CUSTOMER_x_PRODUCTGROUP_Time_Since_Latest_Timestamp"
Do window aggregation from GROCERYINVOICE¶
Now, let's do some aggregations on the invoices view for the Customer entity.
In [8]:
Copied!
# Get view from GROCERYINVOICE event table.
groceryinvoice_view = catalog.get_view("GROCERYINVOICE")
# Get view from GROCERYINVOICE event table.
groceryinvoice_view = catalog.get_view("GROCERYINVOICE")
In [9]:
Copied!
# Group GROCERYINVOICE view by customer entity (GroceryCustomerGuid).
groceryinvoice_view_by_customer = groceryinvoice_view.groupby(['GroceryCustomerGuid'])
# Group GROCERYINVOICE view by customer entity (GroceryCustomerGuid).
groceryinvoice_view_by_customer = groceryinvoice_view.groupby(['GroceryCustomerGuid'])
In [10]:
Copied!
# Get Latest invoice Amount for the customer
customer_latest_invoice_amount = groceryinvoice_view_by_customer.aggregate_over(
"Amount", method="latest",
feature_names=["CUSTOMER_Latest_invoice_Amount"],
windows=[None]
)["CUSTOMER_Latest_invoice_Amount"]
# Get Latest invoice Amount for the customer
customer_latest_invoice_amount = groceryinvoice_view_by_customer.aggregate_over(
"Amount", method="latest",
feature_names=["CUSTOMER_Latest_invoice_Amount"],
windows=[None]
)["CUSTOMER_Latest_invoice_Amount"]
In [11]:
Copied!
# Get Count of invoices for the customer
customer_count_of_invoice_14d_28d = groceryinvoice_view_by_customer.aggregate_over(
value_column=None,
method="count",
feature_names=[
"CUSTOMER_Count_of_invoice"
+ "_" + w for w in windows
],
windows=windows
)
# Get Count of invoices for the customer
customer_count_of_invoice_14d_28d = groceryinvoice_view_by_customer.aggregate_over(
value_column=None,
method="count",
feature_names=[
"CUSTOMER_Count_of_invoice"
+ "_" + w for w in windows
],
windows=windows
)
In [12]:
Copied!
# Get Avg of Amount for the customer over time.
customer_avg_of_invoice_amount_14d_28d = groceryinvoice_view_by_customer.aggregate_over(
"Amount", method="avg",
feature_names=[
"CUSTOMER_Avg_of_invoice_Amount"
+ "_" + w for w in windows
],
windows=windows
)
# Get Avg of Amount for the customer over time.
customer_avg_of_invoice_amount_14d_28d = groceryinvoice_view_by_customer.aggregate_over(
"Amount", method="avg",
feature_names=[
"CUSTOMER_Avg_of_invoice_Amount"
+ "_" + w for w in windows
],
windows=windows
)
In [13]:
Copied!
# Get Std of Amount for the customer over time.
customer_std_of_invoice_amount_14d_28d = groceryinvoice_view_by_customer.aggregate_over(
"Amount", method="std",
feature_names=[
"CUSTOMER_Std_of_invoice_Amount"
+ "_" + w for w in windows
],
windows=windows
)
# Get Std of Amount for the customer over time.
customer_std_of_invoice_amount_14d_28d = groceryinvoice_view_by_customer.aggregate_over(
"Amount", method="std",
feature_names=[
"CUSTOMER_Std_of_invoice_Amount"
+ "_" + w for w in windows
],
windows=windows
)
Preview a feature group¶
For convenience, we can create a feature group to preview/save all features we just created.
In [14]:
Copied!
feature_group = fb.FeatureGroup([
customer_x_productgroup_time_since_latest_timestamp,
customer_productgroup_sum_of_totalcost_14d_28d,
customer_latest_invoice_amount,
customer_count_of_invoice_14d_28d,
customer_avg_of_invoice_amount_14d_28d,
customer_std_of_invoice_amount_14d_28d,
])
feature_group = fb.FeatureGroup([
customer_x_productgroup_time_since_latest_timestamp,
customer_productgroup_sum_of_totalcost_14d_28d,
customer_latest_invoice_amount,
customer_count_of_invoice_14d_28d,
customer_avg_of_invoice_amount_14d_28d,
customer_std_of_invoice_amount_14d_28d,
])
In [15]:
Copied!
# Check the primary entity of the feature group. It should be the interaction Customer x ProductGroup.
feature_group.primary_entity
# Check the primary entity of the feature group. It should be the interaction Customer x ProductGroup.
feature_group.primary_entity
Out[15]:
[<featurebyte.api.entity.Entity at 0x3317f4720>
{
'name': 'customer',
'created_at': '2025-12-04T09:42:26.412000',
'updated_at': '2025-12-04T09:42:31.346000',
'description': None,
'serving_names': [
'GROCERYCUSTOMERGUID'
],
'catalog_name': 'Grocery Dataset SDK Tutorial'
},
<featurebyte.api.entity.Entity at 0x33167e8e0>
{
'name': 'productgroup',
'created_at': '2025-12-04T09:42:27.759000',
'updated_at': '2025-12-04T09:42:32.773000',
'description': None,
'serving_names': [
'PRODUCTGROUP'
],
'catalog_name': 'Grocery Dataset SDK Tutorial'
}]
In [16]:
Copied!
# Get observation table: 'Preview Table with 10 items'
preview_table = catalog.get_observation_table("Preview Table with 10 items")
# Get observation table: 'Preview Table with 10 items'
preview_table = catalog.get_observation_table("Preview Table with 10 items")
In [17]:
Copied!
# Preview feature_group
feature_group.preview(preview_table)
# Preview feature_group
feature_group.preview(preview_table)
Out[17]:
| POINT_IN_TIME | GROCERYINVOICEITEMGUID | CUSTOMER_x_PRODUCTGROUP_Time_Since_Latest_Timestamp | CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost_14d | CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost_28d | CUSTOMER_Latest_invoice_Amount | CUSTOMER_Count_of_invoice_14d | CUSTOMER_Count_of_invoice_28d | CUSTOMER_Avg_of_invoice_Amount_14d | CUSTOMER_Avg_of_invoice_Amount_28d | CUSTOMER_Std_of_invoice_Amount_14d | CUSTOMER_Std_of_invoice_Amount_28d | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2023-03-27 14:11:18 | f0f67ecc-38a0-4ddf-99a4-a3d531b00aa9 | 286.953333 | 10.00 | 28.97 | 7.15 | 2.0 | 5.0 | 14.805000 | 18.260000 | 7.655000 | 6.174263 |
| 1 | 2023-06-26 15:05:25 | a3ac8a5e-8d9b-40b4-a835-38ccce024110 | 677.198056 | NaN | NaN | 57.87 | NaN | NaN | NaN | NaN | NaN | NaN |
| 2 | 2023-06-14 08:04:37 | 7b7b5ae1-4a84-449b-88f0-37bd54bc5c2c | 767.908333 | NaN | NaN | 40.54 | 2.0 | 7.0 | 26.695000 | 20.620000 | 13.845000 | 16.888431 |
| 3 | 2022-10-03 20:03:17 | 20941a03-d666-4718-851c-5016640f2c87 | NaN | NaN | NaN | 1.00 | 1.0 | 2.0 | 1.000000 | 58.490000 | 0.000000 | 57.490000 |
| 4 | 2023-02-14 11:48:38 | 889a2767-dbee-4ac0-a030-f2b921f0867c | 498.531111 | NaN | 5.04 | 4.97 | 1.0 | 3.0 | 4.970000 | 4.670000 | 0.000000 | 1.042433 |
| 5 | 2022-07-26 09:12:59 | 75c6652a-fb95-4d36-aad1-e9a38ad2e4b0 | 360.957222 | NaN | 1.79 | 20.72 | 4.0 | 9.0 | 18.685000 | 20.600000 | 10.511343 | 12.880202 |
| 6 | 2023-03-24 09:58:13 | 9c7bb568-3237-4115-8e4b-ad1e2855299a | 125.022500 | 2.99 | 3.99 | 22.72 | 4.0 | 10.0 | 11.340000 | 10.425000 | 8.430590 | 10.298780 |
| 7 | 2022-12-05 17:43:17 | c2dc525f-9f21-4b98-87e8-8ded5f0bdd37 | 553.113611 | NaN | 2.00 | 19.96 | 3.0 | 8.0 | 31.283333 | 26.490000 | 26.294727 | 19.828913 |
| 8 | 2022-09-30 17:39:57 | a4a2b3f5-69fb-403b-8868-bc360b500add | 2111.837778 | NaN | NaN | 2.99 | 5.0 | 6.0 | 7.066000 | 6.945000 | 3.461009 | 3.171019 |
| 9 | 2022-09-11 11:55:15 | 0a9b41ff-baec-4e30-b4da-b7ade2a14176 | 97.736667 | 2.69 | 8.07 | 48.84 | 9.0 | 19.0 | 24.005556 | 22.886842 | 16.348748 | 19.781989 |
Save features into catalog¶
With feature groups we can do it in one call.
In [18]:
Copied!
feature_group.save()
feature_group.save()
Done! |████████████████████████████████████████| 100% in 6.3s (0.16%/s) Done! |████████████████████████████████████████| 100% in 6.2s (0.16%/s) Loading Feature(s) |████████████████████████████████████████| 10/10 [100%] in 0. Done! |████████████████████████████████████████| 100% in 9.1s (0.11%/s) Done! |████████████████████████████████████████| 100% in 6.1s (0.17%/s) Loading Feature(s) |████████████████████████████████████████| 10/10 [100%] in 0.
Add description¶
In [19]:
Copied!
# Add description
customer_x_productgroup_time_since_latest_timestamp.update_description(
"Time Since Latest interaction between the customer and the product group"
)
# Add description
customer_x_productgroup_time_since_latest_timestamp.update_description(
"Time Since Latest interaction between the customer and the product group"
)
In [20]:
Copied!
# Add description
customer_productgroup_sum_of_totalcost_14d = \
customer_productgroup_sum_of_totalcost_14d_28d["CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost_14d"]
customer_productgroup_sum_of_totalcost_14d.update_description(
"Total spent by the customer on the product group over a 14d period."
)
customer_productgroup_sum_of_totalcost_28d = \
customer_productgroup_sum_of_totalcost_14d_28d["CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost_28d"]
customer_productgroup_sum_of_totalcost_28d.update_description(
"Total spent by the customer on the product group over a 28d period."
)
# Add description
customer_productgroup_sum_of_totalcost_14d = \
customer_productgroup_sum_of_totalcost_14d_28d["CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost_14d"]
customer_productgroup_sum_of_totalcost_14d.update_description(
"Total spent by the customer on the product group over a 14d period."
)
customer_productgroup_sum_of_totalcost_28d = \
customer_productgroup_sum_of_totalcost_14d_28d["CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost_28d"]
customer_productgroup_sum_of_totalcost_28d.update_description(
"Total spent by the customer on the product group over a 28d period."
)
In [21]:
Copied!
# Add description
customer_latest_invoice_amount.update_description("Latest invoice Amount for the customer")
# Add description
customer_latest_invoice_amount.update_description("Latest invoice Amount for the customer")
In [22]:
Copied!
# Add description
customer_count_of_invoice_14d = customer_count_of_invoice_14d_28d["CUSTOMER_Count_of_invoice_14d"]
customer_count_of_invoice_14d.update_description(
"Count of invoice for the customer over a 14d period."
)
customer_count_of_invoice_28d = customer_count_of_invoice_14d_28d["CUSTOMER_Count_of_invoice_28d"]
customer_count_of_invoice_28d.update_description(
"Count of invoice for the customer over a 28d period."
)
# Add description
customer_count_of_invoice_14d = customer_count_of_invoice_14d_28d["CUSTOMER_Count_of_invoice_14d"]
customer_count_of_invoice_14d.update_description(
"Count of invoice for the customer over a 14d period."
)
customer_count_of_invoice_28d = customer_count_of_invoice_14d_28d["CUSTOMER_Count_of_invoice_28d"]
customer_count_of_invoice_28d.update_description(
"Count of invoice for the customer over a 28d period."
)
In [23]:
Copied!
# Add description
customer_avg_of_invoice_amount_14d = customer_avg_of_invoice_amount_14d_28d["CUSTOMER_Avg_of_invoice_Amount_14d"]
customer_avg_of_invoice_amount_14d.update_description(
"Avg of invoice Amount for the customer over a 14d period."
)
customer_avg_of_invoice_amount_28d = customer_avg_of_invoice_amount_14d_28d["CUSTOMER_Avg_of_invoice_Amount_28d"]
customer_avg_of_invoice_amount_28d.update_description(
"Avg of invoice Amount for the customer over a 28d period."
)
# Add description
customer_avg_of_invoice_amount_14d = customer_avg_of_invoice_amount_14d_28d["CUSTOMER_Avg_of_invoice_Amount_14d"]
customer_avg_of_invoice_amount_14d.update_description(
"Avg of invoice Amount for the customer over a 14d period."
)
customer_avg_of_invoice_amount_28d = customer_avg_of_invoice_amount_14d_28d["CUSTOMER_Avg_of_invoice_Amount_28d"]
customer_avg_of_invoice_amount_28d.update_description(
"Avg of invoice Amount for the customer over a 28d period."
)
In [24]:
Copied!
# Add description
customer_std_of_invoice_amount_14d = customer_std_of_invoice_amount_14d_28d["CUSTOMER_Std_of_invoice_Amount_14d"]
customer_std_of_invoice_amount_14d.update_description(
"Std of invoice Amount for the customer over a 14d period."
)
customer_std_of_invoice_amount_28d = customer_std_of_invoice_amount_14d_28d["CUSTOMER_Std_of_invoice_Amount_28d"]
customer_std_of_invoice_amount_28d.update_description(
"Std of invoice Amount for the customer over a 28d period."
)
# Add description
customer_std_of_invoice_amount_14d = customer_std_of_invoice_amount_14d_28d["CUSTOMER_Std_of_invoice_Amount_14d"]
customer_std_of_invoice_amount_14d.update_description(
"Std of invoice Amount for the customer over a 14d period."
)
customer_std_of_invoice_amount_28d = customer_std_of_invoice_amount_14d_28d["CUSTOMER_Std_of_invoice_Amount_28d"]
customer_std_of_invoice_amount_28d.update_description(
"Std of invoice Amount for the customer over a 28d period."
)
In [ ]:
Copied!