9. Create Window Aggregate Features
Create window aggregate features¶
Next feature type we will consider is window aggregate feature. These are features generated by aggregating data within specific time frame.
In [1]:
                Copied!
                
                
            import featurebyte as fb
# Set your profile to the tutorial environment
fb.use_profile("tutorial")
catalog_name = "Grocery Dataset Tutorial"
catalog = fb.Catalog.activate(catalog_name)
import featurebyte as fb
# Set your profile to the tutorial environment
fb.use_profile("tutorial")
catalog_name = "Grocery Dataset Tutorial"
catalog = fb.Catalog.activate(catalog_name)  
    
        14:21:29 | INFO | SDK version: 3.0.1.dev45 INFO :featurebyte:SDK version: 3.0.1.dev45 14:21:29 | INFO | No catalog activated. INFO :featurebyte:No catalog activated. 14:21:29 | INFO | Using profile: tutorial INFO :featurebyte:Using profile: tutorial 14:21:29 | INFO | Using configuration file at: /Users/gxav/.featurebyte/config.yaml INFO :featurebyte:Using configuration file at: /Users/gxav/.featurebyte/config.yaml 14:21:29 | INFO | Active profile: tutorial (https://tutorials.featurebyte.com/api/v1) INFO :featurebyte:Active profile: tutorial (https://tutorials.featurebyte.com/api/v1) 14:21:29 | INFO | SDK version: 3.0.1.dev45 INFO :featurebyte:SDK version: 3.0.1.dev45 14:21:29 | INFO | No catalog activated. INFO :featurebyte:No catalog activated. 14:21:30 | INFO | Catalog activated: Grocery Dataset Tutorial INFO :featurebyte.api.catalog:Catalog activated: Grocery Dataset Tutorial 16:07:43 | INFO | Using configuration file at: /Users/gxav/.featurebyte/config.yaml 16:07:43 | INFO | Active profile: tutorial (https://tutorials.featurebyte.com/api/v1) 16:07:43 | WARNING | Remote SDK version (1.1.0.dev7) is different from local (1.1.0.dev1). Update local SDK to avoid unexpected behavior. 16:07:43 | INFO | No catalog activated. 16:07:43 | INFO | Catalog activated: Grocery Dataset Tutorial
In [2]:
                Copied!
                
                
            # Set desired windows
windows = ['14d', '28d']
# Set desired windows
windows = ['14d', '28d']
    
        Do window aggregation from INVOICEITEMS¶
Let's start with some aggregations from the items view and create features for the interaction between Customer and Product Group.
In [3]:
                Copied!
                
                
            # Get view from GROCERYPRODUCT dimension table.
groceryproduct_view = catalog.get_view("GROCERYPRODUCT")
# Get view from INVOICEITEMS item table.
invoiceitems_view = catalog.get_view("INVOICEITEMS")
# Join GROCERYPRODUCT view to INVOICEITEMS view.
invoiceitems_view = invoiceitems_view.join(groceryproduct_view, rsuffix="")
# Get view from GROCERYPRODUCT dimension table.
groceryproduct_view = catalog.get_view("GROCERYPRODUCT")
# Get view from INVOICEITEMS item table.
invoiceitems_view = catalog.get_view("INVOICEITEMS")
# Join GROCERYPRODUCT view to INVOICEITEMS view.
invoiceitems_view = invoiceitems_view.join(groceryproduct_view, rsuffix="")
    
        In [4]:
                Copied!
                
                
            # Group GROCERYINVOICE view by customer entity (GroceryCustomerGuid) and productgroup entity (ProductGroup).
invoiceitems_view_by_customer_x_productgroup = invoiceitems_view.groupby(
    ['GroceryCustomerGuid', 'ProductGroup']
)
# Group GROCERYINVOICE view by customer entity (GroceryCustomerGuid) and productgroup entity (ProductGroup).
invoiceitems_view_by_customer_x_productgroup = invoiceitems_view.groupby(
    ['GroceryCustomerGuid', 'ProductGroup']
)
    
        In [5]:
                Copied!
                
                
            # Get Sum of TotalCost for the customer x productgroup over time.
customer_productgroup_sum_of_totalcost_14d_28d = \
invoiceitems_view_by_customer_x_productgroup.aggregate_over(
    "TotalCost", method="sum",
    feature_names=[
        "CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost"
        + "_" + w for w in windows
    ],
    windows=windows
)
# Get Sum of TotalCost for the customer x productgroup over time.
customer_productgroup_sum_of_totalcost_14d_28d = \
invoiceitems_view_by_customer_x_productgroup.aggregate_over(
    "TotalCost", method="sum",
    feature_names=[
        "CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost"
        + "_" + w for w in windows
    ],
    windows=windows
)
    
        In [6]:
                Copied!
                
                
            # Get Latest Interaction between Customer and ProductGroup
customer_x_productgroup_latest_timestamp = \
invoiceitems_view_by_customer_x_productgroup.aggregate_over(
    "Timestamp", method="latest",
    feature_names=["CUSTOMER_x_PRODUCTGROUP_Latest_Timestamp"],
    windows=[None]
)["CUSTOMER_x_PRODUCTGROUP_Latest_Timestamp"]
# Get Latest Interaction between Customer and ProductGroup
customer_x_productgroup_latest_timestamp = \
invoiceitems_view_by_customer_x_productgroup.aggregate_over(
    "Timestamp", method="latest",
    feature_names=["CUSTOMER_x_PRODUCTGROUP_Latest_Timestamp"],
    windows=[None]
)["CUSTOMER_x_PRODUCTGROUP_Latest_Timestamp"]
    
        In [7]:
                Copied!
                
                
            # Create recency feature: Time Since Latest Interaction between Customer and Product Group
customer_x_productgroup_time_since_latest_timestamp = (
    fb.RequestColumn.point_in_time()
    - customer_x_productgroup_latest_timestamp
).dt.hour
customer_x_productgroup_time_since_latest_timestamp.name = \
"CUSTOMER_x_PRODUCTGROUP_Time_Since_Latest_Timestamp"
# Create recency feature: Time Since Latest Interaction between Customer and Product Group
customer_x_productgroup_time_since_latest_timestamp = (
    fb.RequestColumn.point_in_time()
    - customer_x_productgroup_latest_timestamp
).dt.hour
customer_x_productgroup_time_since_latest_timestamp.name = \
"CUSTOMER_x_PRODUCTGROUP_Time_Since_Latest_Timestamp"
    
        Do window aggregation from GROCERYINVOICE¶
Now, let's do some aggregations on the invoices view for the Customer entity.
In [8]:
                Copied!
                
                
            # Get view from GROCERYINVOICE event table.
groceryinvoice_view = catalog.get_view("GROCERYINVOICE")
# Get view from GROCERYINVOICE event table.
groceryinvoice_view = catalog.get_view("GROCERYINVOICE")
    
        In [9]:
                Copied!
                
                
            # Group GROCERYINVOICE view by customer entity (GroceryCustomerGuid).
groceryinvoice_view_by_customer = groceryinvoice_view.groupby(['GroceryCustomerGuid'])
# Group GROCERYINVOICE view by customer entity (GroceryCustomerGuid).
groceryinvoice_view_by_customer = groceryinvoice_view.groupby(['GroceryCustomerGuid'])
    
        In [10]:
                Copied!
                
                
            # Get Latest invoice Amount for the customer
customer_latest_invoice_amount = groceryinvoice_view_by_customer.aggregate_over(
    "Amount", method="latest",
    feature_names=["CUSTOMER_Latest_invoice_Amount"],
    windows=[None]
)["CUSTOMER_Latest_invoice_Amount"]
# Get Latest invoice Amount for the customer
customer_latest_invoice_amount = groceryinvoice_view_by_customer.aggregate_over(
    "Amount", method="latest",
    feature_names=["CUSTOMER_Latest_invoice_Amount"],
    windows=[None]
)["CUSTOMER_Latest_invoice_Amount"]
    
        In [11]:
                Copied!
                
                
            # Get Count of invoices for the customer
customer_count_of_invoice_14d_28d = groceryinvoice_view_by_customer.aggregate_over(
    value_column=None,
    method="count",
    feature_names=[
        "CUSTOMER_Count_of_invoice"
        + "_" + w for w in windows
    ],
    windows=windows
)
# Get Count of invoices for the customer
customer_count_of_invoice_14d_28d = groceryinvoice_view_by_customer.aggregate_over(
    value_column=None,
    method="count",
    feature_names=[
        "CUSTOMER_Count_of_invoice"
        + "_" + w for w in windows
    ],
    windows=windows
)
    
        In [12]:
                Copied!
                
                
            # Get Avg of Amount for the customer over time.
customer_avg_of_invoice_amount_14d_28d = groceryinvoice_view_by_customer.aggregate_over(
    "Amount", method="avg",
    feature_names=[
        "CUSTOMER_Avg_of_invoice_Amount"
        + "_" + w for w in windows
    ],
    windows=windows
)
# Get Avg of Amount for the customer over time.
customer_avg_of_invoice_amount_14d_28d = groceryinvoice_view_by_customer.aggregate_over(
    "Amount", method="avg",
    feature_names=[
        "CUSTOMER_Avg_of_invoice_Amount"
        + "_" + w for w in windows
    ],
    windows=windows
)
    
        In [13]:
                Copied!
                
                
            # Get Std of Amount for the customer over time.
customer_std_of_invoice_amount_14d_28d = groceryinvoice_view_by_customer.aggregate_over(
    "Amount", method="std",
    feature_names=[
        "CUSTOMER_Std_of_invoice_Amount"
        + "_" + w for w in windows
    ],
    windows=windows
)
# Get Std of Amount for the customer over time.
customer_std_of_invoice_amount_14d_28d = groceryinvoice_view_by_customer.aggregate_over(
    "Amount", method="std",
    feature_names=[
        "CUSTOMER_Std_of_invoice_Amount"
        + "_" + w for w in windows
    ],
    windows=windows
)
    
        Preview a feature group¶
For convenience, we can create a feature group to preview/save all features we just created.
In [14]:
                Copied!
                
                
            feature_group = fb.FeatureGroup([
    customer_x_productgroup_time_since_latest_timestamp,
    customer_productgroup_sum_of_totalcost_14d_28d,
    customer_latest_invoice_amount,
    customer_count_of_invoice_14d_28d,
    customer_avg_of_invoice_amount_14d_28d,
    customer_std_of_invoice_amount_14d_28d,
])
feature_group = fb.FeatureGroup([
    customer_x_productgroup_time_since_latest_timestamp,
    customer_productgroup_sum_of_totalcost_14d_28d,
    customer_latest_invoice_amount,
    customer_count_of_invoice_14d_28d,
    customer_avg_of_invoice_amount_14d_28d,
    customer_std_of_invoice_amount_14d_28d,
])
    
        In [15]:
                Copied!
                
                
            # Check the primary entity of the feature group. It should be the interaction Customer x ProductGroup.
feature_group.primary_entity
# Check the primary entity of the feature group. It should be the interaction Customer x ProductGroup.
feature_group.primary_entity
    
        Out[15]:
[<featurebyte.api.entity.Entity at 0x17511a520>
 {
   'name': 'customer',
   'created_at': '2025-06-02T06:17:26.675000',
   'updated_at': '2025-06-02T06:17:28.925000',
   'description': None,
   'serving_names': [
     'GROCERYCUSTOMERGUID'
   ],
   'catalog_name': 'Grocery Dataset Tutorial'
 },
 <featurebyte.api.entity.Entity at 0x17511a5c0>
 {
   'name': 'productgroup',
   'created_at': '2025-06-02T06:17:27.207000',
   'updated_at': '2025-06-02T06:17:29.690000',
   'description': None,
   'serving_names': [
     'PRODUCTGROUP'
   ],
   'catalog_name': 'Grocery Dataset Tutorial'
 }]
In [16]:
                Copied!
                
                
            # Get observation table: 'Preview Table with 10 items'
preview_table = catalog.get_observation_table("Preview Table with 10 items")
# Get observation table: 'Preview Table with 10 items'
preview_table = catalog.get_observation_table("Preview Table with 10 items")
    
        In [17]:
                Copied!
                
                
            # Preview feature_group
feature_group.preview(preview_table)
# Preview feature_group
feature_group.preview(preview_table)
    
        Out[17]:
| POINT_IN_TIME | GROCERYINVOICEITEMGUID | CUSTOMER_x_PRODUCTGROUP_Time_Since_Latest_Timestamp | CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost_14d | CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost_28d | CUSTOMER_Latest_invoice_Amount | CUSTOMER_Count_of_invoice_14d | CUSTOMER_Count_of_invoice_28d | CUSTOMER_Avg_of_invoice_Amount_14d | CUSTOMER_Avg_of_invoice_Amount_28d | CUSTOMER_Std_of_invoice_Amount_14d | CUSTOMER_Std_of_invoice_Amount_28d | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022-07-23 15:32:29 | d87d65b8-4f78-41cc-8bd3-0064f83fe4fb | 7.715000 | 5.00 | 5.00 | 2.50 | 8.0 | 15.0 | 14.246250 | 17.746667 | 14.128027 | 14.277847 | 
| 1 | 2023-05-25 05:21:12 | cf670b7a-c6bf-4598-b0c0-400378b9cab6 | 269.470000 | 0.99 | 2.62 | 2.99 | 6.0 | 9.0 | 4.273333 | 4.196667 | 3.607879 | 3.063727 | 
| 2 | 2023-02-28 10:28:24 | 59b63729-d448-4496-8f36-de26a91e2310 | 334.700556 | 19.10 | 19.10 | 183.32 | 2.0 | 2.0 | 144.770000 | 144.770000 | 38.550000 | 38.550000 | 
| 3 | 2023-04-26 19:36:57 | f867935a-d33a-43d1-b3bc-02c539769836 | 8904.007778 | NaN | NaN | 1.00 | 8.0 | 15.0 | 5.066250 | 5.500667 | 8.702607 | 7.331070 | 
| 4 | 2023-01-06 14:38:32 | e63c0f14-3530-49e9-b73e-f92594e82663 | 315.693056 | 2.79 | 2.79 | 22.94 | 1.0 | 2.0 | 22.940000 | 13.675000 | 0.000000 | 9.265000 | 
| 5 | 2023-06-09 16:38:32 | 6acb20fd-605d-4982-aa39-77054f08103c | 965.136389 | NaN | NaN | 0.88 | 5.0 | 7.0 | 15.198000 | 23.201429 | 19.734034 | 22.005472 | 
| 6 | 2023-03-20 15:08:44 | 6f5299d0-fa38-4707-8108-1b66805d84e5 | 241.958889 | 2.99 | 2.99 | 35.00 | 4.0 | 8.0 | 35.472500 | 37.061250 | 17.705779 | 14.057970 | 
| 7 | 2023-05-04 15:15:25 | 099cb405-5b2d-4dba-9071-a157ff0dbadc | 219.637222 | 0.79 | 0.79 | 24.16 | 2.0 | 5.0 | 14.825000 | 11.268000 | 9.335000 | 8.628357 | 
| 8 | 2023-06-02 14:24:28 | 0154e4b4-25a4-4276-af72-2826bbc64c31 | 745.476667 | NaN | NaN | 3.46 | NaN | NaN | NaN | NaN | NaN | NaN | 
| 9 | 2022-11-01 14:32:22 | 8687e2a4-7f97-4442-873c-5c52d74404f8 | 284.610278 | 3.64 | 11.29 | 1.43 | 6.0 | 15.0 | 14.155000 | 11.654667 | 15.361576 | 16.405865 | 
Save features into catalog¶
With feature groups we can do it in one call.
In [18]:
                Copied!
                
                
            feature_group.save()
feature_group.save()
    
        Done! |████████████████████████████████████████| 100% in 9.1s (0.11%/s) Done! |████████████████████████████████████████| 100% in 6.2s (0.16%/s) Loading Feature(s) |████████████████████████████████████████| 10/10 [100%] in 0. Done! |████████████████████████████████████████| 100% in 9.1s (0.11%/s) Done! |████████████████████████████████████████| 100% in 6.1s (0.17%/s) Loading Feature(s) |████████████████████████████████████████| 10/10 [100%] in 0.
Add description¶
In [19]:
                Copied!
                
                
            # Add description
customer_x_productgroup_time_since_latest_timestamp.update_description(
    "Time Since Latest interaction between the customer and the product group"
)
# Add description
customer_x_productgroup_time_since_latest_timestamp.update_description(
    "Time Since Latest interaction between the customer and the product group"
)
    
        In [20]:
                Copied!
                
                
            # Add description
customer_productgroup_sum_of_totalcost_14d = \
customer_productgroup_sum_of_totalcost_14d_28d["CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost_14d"]
customer_productgroup_sum_of_totalcost_14d.update_description(
    "Total spent by the customer on the product group over a 14d period."
)
customer_productgroup_sum_of_totalcost_28d = \
customer_productgroup_sum_of_totalcost_14d_28d["CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost_28d"]
customer_productgroup_sum_of_totalcost_28d.update_description(
    "Total spent by the customer on the product group over a 28d period."
)
# Add description
customer_productgroup_sum_of_totalcost_14d = \
customer_productgroup_sum_of_totalcost_14d_28d["CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost_14d"]
customer_productgroup_sum_of_totalcost_14d.update_description(
    "Total spent by the customer on the product group over a 14d period."
)
customer_productgroup_sum_of_totalcost_28d = \
customer_productgroup_sum_of_totalcost_14d_28d["CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost_28d"]
customer_productgroup_sum_of_totalcost_28d.update_description(
    "Total spent by the customer on the product group over a 28d period."
)
    
        In [21]:
                Copied!
                
                
            # Add description
customer_latest_invoice_amount.update_description("Latest invoice Amount for the customer")
# Add description
customer_latest_invoice_amount.update_description("Latest invoice Amount for the customer")
    
        In [22]:
                Copied!
                
                
            # Add description
customer_count_of_invoice_14d = customer_count_of_invoice_14d_28d["CUSTOMER_Count_of_invoice_14d"]
customer_count_of_invoice_14d.update_description(
    "Count of invoice for the customer over a 14d period."
)
customer_count_of_invoice_28d = customer_count_of_invoice_14d_28d["CUSTOMER_Count_of_invoice_28d"]
customer_count_of_invoice_28d.update_description(
    "Count of invoice for the customer over a 28d period."
)
# Add description
customer_count_of_invoice_14d = customer_count_of_invoice_14d_28d["CUSTOMER_Count_of_invoice_14d"]
customer_count_of_invoice_14d.update_description(
    "Count of invoice for the customer over a 14d period."
)
customer_count_of_invoice_28d = customer_count_of_invoice_14d_28d["CUSTOMER_Count_of_invoice_28d"]
customer_count_of_invoice_28d.update_description(
    "Count of invoice for the customer over a 28d period."
)
    
        In [23]:
                Copied!
                
                
            # Add description
customer_avg_of_invoice_amount_14d = customer_avg_of_invoice_amount_14d_28d["CUSTOMER_Avg_of_invoice_Amount_14d"]
customer_avg_of_invoice_amount_14d.update_description(
    "Avg of invoice Amount for the customer over a 14d period."
)
customer_avg_of_invoice_amount_28d = customer_avg_of_invoice_amount_14d_28d["CUSTOMER_Avg_of_invoice_Amount_28d"]
customer_avg_of_invoice_amount_28d.update_description(
    "Avg of invoice Amount for the customer over a 28d period."
)
# Add description
customer_avg_of_invoice_amount_14d = customer_avg_of_invoice_amount_14d_28d["CUSTOMER_Avg_of_invoice_Amount_14d"]
customer_avg_of_invoice_amount_14d.update_description(
    "Avg of invoice Amount for the customer over a 14d period."
)
customer_avg_of_invoice_amount_28d = customer_avg_of_invoice_amount_14d_28d["CUSTOMER_Avg_of_invoice_Amount_28d"]
customer_avg_of_invoice_amount_28d.update_description(
    "Avg of invoice Amount for the customer over a 28d period."
)
    
        In [24]:
                Copied!
                
                
            # Add description
customer_std_of_invoice_amount_14d = customer_std_of_invoice_amount_14d_28d["CUSTOMER_Std_of_invoice_Amount_14d"]
customer_std_of_invoice_amount_14d.update_description(
    "Std of invoice Amount for the customer over a 14d period."
)
customer_std_of_invoice_amount_28d = customer_std_of_invoice_amount_14d_28d["CUSTOMER_Std_of_invoice_Amount_28d"]
customer_std_of_invoice_amount_28d.update_description(
    "Std of invoice Amount for the customer over a 28d period."
)
# Add description
customer_std_of_invoice_amount_14d = customer_std_of_invoice_amount_14d_28d["CUSTOMER_Std_of_invoice_Amount_14d"]
customer_std_of_invoice_amount_14d.update_description(
    "Std of invoice Amount for the customer over a 14d period."
)
customer_std_of_invoice_amount_28d = customer_std_of_invoice_amount_14d_28d["CUSTOMER_Std_of_invoice_Amount_28d"]
customer_std_of_invoice_amount_28d.update_description(
    "Std of invoice Amount for the customer over a 28d period."
)
    
        In [ ]:
                Copied!