### Create window aggregate features 

Next feature type we will consider is window aggregate feature. 
These are features generated by aggregating data within specific time frame.  


In [1]:
import featurebyte as fb

# Set your profile to the tutorial environment
fb.use_profile("tutorial")

catalog_name = "Grocery Dataset Tutorial"
catalog = fb.Catalog.activate(catalog_name)  

[32;20m16:42:36[0m | [1m[38;20mINFO    [0m[0m | [1m[38;20mUsing configuration file at: /Users/viktor/.featurebyte/config.yaml[0m[0m
[32;20m16:42:36[0m | [1m[38;20mINFO    [0m[0m | [1m[38;20mActive profile: tutorial (https://tutorials.featurebyte.com/api/v1)[0m[0m
[32;20m16:42:36[0m | [1m[38;20mINFO    [0m[0m | [1m[38;20mSDK version: 0.6.0.dev121[0m[0m
[32;20m16:42:36[0m | [1m[38;20mINFO    [0m[0m | [1m[38;20mNo catalog activated.[0m[0m
[32;20m16:42:36[0m | [1m[38;20mINFO    [0m[0m | [1m[38;20m11 feature lists, 66 features deployed[0m[0m
[32;20m16:42:36[0m | [1m[38;20mINFO    [0m[0m | [1m[38;20mUsing profile: tutorial[0m[0m
[32;20m16:42:36[0m | [1m[38;20mINFO    [0m[0m | [1m[38;20mUsing configuration file at: /Users/viktor/.featurebyte/config.yaml[0m[0m
[32;20m16:42:36[0m | [1m[38;20mINFO    [0m[0m | [1m[38;20mActive profile: tutorial (https://tutorials.featurebyte.com/api/v1)[0m[0m
[32;20m16:42:37[0m | [

In [2]:
# Get view from GROCERYINVOICE event table.
groceryinvoice_view = catalog.get_view("GROCERYINVOICE")

#### Do window aggregation from GROCERYINVOICE


In [3]:
# Group GROCERYINVOICE view by customer entity (GroceryCustomerGuid).
groceryinvoice_view_by_customer = groceryinvoice_view.groupby(['GroceryCustomerGuid'])

In [4]:
# Get Latest invoice Amount for the customer
customer_latest_invoice_amount = groceryinvoice_view_by_customer.aggregate_over(
    "Amount", method="latest",
    feature_names=["CUSTOMER_Latest_invoice_Amount"],
    windows=[None]
)["CUSTOMER_Latest_invoice_Amount"]

In [5]:
# Set desired windows
windows = ['14d', '28d']

In [6]:
# Get Count of invoices for the customer
customer_count_of_invoice_14d_28d = groceryinvoice_view_by_customer.aggregate_over(
    method="count",
    feature_names=[
        "CUSTOMER_Count_of_invoice"
        + "_" + w for w in windows
    ],
    windows=windows
)

In [7]:
# Get Avg of Amount for the customer over time.
customer_avg_of_invoice_amount_14d_28d = groceryinvoice_view_by_customer.aggregate_over(
    "Amount", method="avg",
    feature_names=[
        "CUSTOMER_Avg_of_invoice_Amount"
        + "_" + w for w in windows
    ],
    windows=windows
)

In [8]:
# Get Std of Amount for the customer over time.
customer_std_of_invoice_amount_14d_28d = groceryinvoice_view_by_customer.aggregate_over(
    "Amount", method="std",
    feature_names=[
        "CUSTOMER_Std_of_invoice_Amount"
        + "_" + w for w in windows
    ],
    windows=windows
)

#### Preview a feature group

In [9]:
feature_group = fb.FeatureGroup([
    customer_latest_invoice_amount,
    customer_count_of_invoice_14d_28d,
    customer_avg_of_invoice_amount_14d_28d,
    customer_std_of_invoice_amount_14d_28d,
])

In [10]:
# Check the primary entity of the feature group
feature_group.primary_entity

[<featurebyte.api.entity.Entity at 0x13b95d600>
 {
   'name': 'customer',
   'created_at': '2023-11-27T15:39:09.477000',
   'updated_at': '2023-11-27T15:39:19.968000',
   'description': None,
   'serving_names': [
     'GROCERYCUSTOMERGUID'
   ],
   'catalog_name': 'Grocery Dataset Tutorial'
 }]

In [11]:
# Get observation table: 'Preview Table with 10 Customers'
preview_table = catalog.get_observation_table("Preview Table with 10 Customers").to_pandas()

Downloading table |████████████████████████████████████████| 10/10 [100%] in 0.1


In [12]:
# Preview feature_group
feature_group.preview(preview_table)

Unnamed: 0,POINT_IN_TIME,GROCERYCUSTOMERGUID,CUSTOMER_Latest_invoice_Amount,CUSTOMER_Count_of_invoice_14d,CUSTOMER_Count_of_invoice_28d,CUSTOMER_Avg_of_invoice_Amount_14d,CUSTOMER_Avg_of_invoice_Amount_28d,CUSTOMER_Std_of_invoice_Amount_14d,CUSTOMER_Std_of_invoice_Amount_28d
0,2022-11-28 11:36:31,d4559f7d-eb28-42c6-b47d-847de24952c2,6.72,0,1,,6.72,,0.0
1,2022-10-09 15:47:55,3f8c7c4c-f2c2-408e-a08e-622de3d3a0b9,12.28,0,0,,,,
2,2022-09-14 15:42:42,35390325-8443-43c1-a934-18db923d9a47,10.02,0,4,,26.415,,19.563101
3,2022-12-26 18:39:46,4eb4ee84-ee13-4eec-9c26-61b6eb4ba35b,53.09,5,10,15.626,26.45,19.000809,26.837185
4,2022-12-06 08:47:43,e42fa5f3-7737-4c6a-9ef4-856f113e60bd,21.74,3,6,15.56,10.555,6.681337,7.307384
5,2022-11-09 12:14:40,8440debb-6abc-4adc-8c6c-749928141fd0,15.3,1,1,15.3,15.3,0.0,0.0
6,2022-10-12 17:32:15,8a54e527-e9a4-47a9-a28f-8b3c6ecc02db,14.55,2,4,14.56,15.05,0.01,10.109369
7,2023-01-01 11:51:28,cea213d4-36e4-48c3-ae8d-c7a25911e11c,0.89,12,29,3.209167,5.220345,4.154236,8.163223
8,2023-02-05 15:48:23,3b4f2821-b761-40e9-a32a-5f09685cc597,11.43,4,4,11.3175,11.3175,4.526297,4.526297
9,2023-03-10 16:15:46,91a64566-e212-4e36-8f23-c1f1f324a301,2.0,6,8,4.568333,10.43125,2.832763,11.335775


#### Save features into catalog 
With feature groups we can do it in one call.

In [13]:
feature_group.save()

Done! |████████████████████████████████████████| 100% in 13.0s (0.08%/s)        
Loading Feature(s) |████████████████████████████████████████| 7/7 [100%] in 0.6s


#### Add description and see feature definition files

In [14]:
# Add description
customer_latest_invoice_amount.update_description("Latest invoice Amount for the customer")
# See feature definition file
customer_latest_invoice_amount.definition

In [15]:
# Add description
customer_count_of_invoice_14d = customer_count_of_invoice_14d_28d["CUSTOMER_Count_of_invoice_14d"]
customer_count_of_invoice_14d.update_description(
    "Sum of invoice Amount for the customer over a 14d period."
)
customer_count_of_invoice_28d = customer_count_of_invoice_14d_28d["CUSTOMER_Count_of_invoice_28d"]
customer_count_of_invoice_28d.update_description(
    "Count of invoice for the customer over a 28d period."
)
# See feature definition file
customer_count_of_invoice_28d.definition

In [16]:
# Add description
customer_avg_of_invoice_amount_14d = customer_avg_of_invoice_amount_14d_28d["CUSTOMER_Avg_of_invoice_Amount_14d"]
customer_avg_of_invoice_amount_14d.update_description(
    "Avg of invoice Amount for the customer over a 14d period."
)
customer_avg_of_invoice_amount_28d = customer_avg_of_invoice_amount_14d_28d["CUSTOMER_Avg_of_invoice_Amount_28d"]
customer_avg_of_invoice_amount_28d.update_description(
    "Avg of invoice Amount for the customer over a 28d period."
)
# See feature definition file
customer_avg_of_invoice_amount_28d.definition

In [17]:
# Add description
customer_std_of_invoice_amount_14d = customer_std_of_invoice_amount_14d_28d["CUSTOMER_Std_of_invoice_Amount_14d"]
customer_std_of_invoice_amount_14d.update_description(
    "Std of invoice Amount for the customer over a 14d period."
)
customer_std_of_invoice_amount_28d = customer_std_of_invoice_amount_14d_28d["CUSTOMER_Std_of_invoice_Amount_28d"]
customer_std_of_invoice_amount_28d.update_description(
    "Std of invoice Amount for the customer over a 28d period."
)

# See feature definition file
customer_std_of_invoice_amount_28d.definition

### Concepts in this tutorial
- [Aggregate Features](https://docs.featurebyte.com/latest/about/glossary/#aggregate-features)
- [Window Aggregate Features](https://docs.featurebyte.com/latest/about/glossary/#aggregates-over-a-window)

#### SDK reference for
- [View.groupby()](https://docs.featurebyte.com/latest/reference/featurebyte.api.view.GroupByMixin.groupby/#see-also)
- [Groupby.aggregate_over()](https://docs.featurebyte.com/latest/reference/featurebyte.api.groupby.GroupBy.aggregate_over/)
- [List of aggregate functions](https://docs.featurebyte.com/latest/reference/featurebyte.enum.AggFunc/)
- [FeatureGroup](https://docs.featurebyte.com/latest/reference/core/feature_group/)
- [FeatureGroup.save()](https://docs.featurebyte.com/latest/reference/featurebyte.api.feature_group.FeatureGroup.save/)