PRODUCT vs PRODUCTGROUP item TotalCost across customer Age bands 28d
SDK code to create PRODUCT_vs_PRODUCTGROUP_item_TotalCost_across_customer_Age_bands_28d¶
Feature description:
Similarity between the product and all products of the product productgroup measured by the Cosine Similarity between the Distribution representing the cumulative TotalCost of item, categorized by their respective customer_Age_band, over 28d for both entities.
In [ ]:
Copied!
import featurebyte as fb
fb.use_profile("tutorial")
import featurebyte as fb
fb.use_profile("tutorial")
Activate catalog¶
In [ ]:
Copied!
catalog = fb.Catalog.activate("Grocery Dataset Tutorial")
catalog = fb.Catalog.activate("Grocery Dataset Tutorial")
Set windows for aggregation¶
In [ ]:
Copied!
windows = ['28d']
windows = ['28d']
Get view from table¶
In [ ]:
Copied!
# Get view from GROCERYPRODUCT dimension table.
groceryproduct_view = catalog.get_view("GROCERYPRODUCT")
# Get view from GROCERYPRODUCT dimension table.
groceryproduct_view = catalog.get_view("GROCERYPRODUCT")
In [ ]:
Copied!
# Get view from INVOICEITEMS item table.
invoiceitems_view = catalog.get_view("INVOICEITEMS")
# Get view from INVOICEITEMS item table.
invoiceitems_view = catalog.get_view("INVOICEITEMS")
In [ ]:
Copied!
# Get view from GROCERYCUSTOMER scd table.
grocerycustomer_view = catalog.get_view("GROCERYCUSTOMER")
# Get view from GROCERYCUSTOMER scd table.
grocerycustomer_view = catalog.get_view("GROCERYCUSTOMER")
Join views¶
In [ ]:
Copied!
# Join GROCERYPRODUCT view to INVOICEITEMS view.
invoiceitems_view = invoiceitems_view.join(groceryproduct_view, rsuffix="")
# Join GROCERYPRODUCT view to INVOICEITEMS view.
invoiceitems_view = invoiceitems_view.join(groceryproduct_view, rsuffix="")
In [ ]:
Copied!
# Join GROCERYCUSTOMER view to INVOICEITEMS view.
invoiceitems_view = invoiceitems_view.join(grocerycustomer_view, rsuffix="")
# Join GROCERYCUSTOMER view to INVOICEITEMS view.
invoiceitems_view = invoiceitems_view.join(grocerycustomer_view, rsuffix="")
Derive Timedelta from datetime column¶
In [ ]:
Copied!
# Derive Age from the event timestamp and the date of birth.
invoiceitems_view["customer_Age"] = (
(
invoiceitems_view["Timestamp"]
- invoiceitems_view["DateOfBirth"]
).dt.day / 365.25
).floor()
# Transform age into a 5 year age band.
invoiceitems_view["customer_Age_band"] = (
((invoiceitems_view["customer_Age"] + 1) / 5).ceil() - 1
) * 5
invoiceitems_view["customer_Age_band"] = (
invoiceitems_view["customer_Age_band"].astype(str)
+ "-" + (invoiceitems_view["customer_Age_band"] + 4).astype(str)
)
# Derive Age from the event timestamp and the date of birth.
invoiceitems_view["customer_Age"] = (
(
invoiceitems_view["Timestamp"]
- invoiceitems_view["DateOfBirth"]
).dt.day / 365.25
).floor()
# Transform age into a 5 year age band.
invoiceitems_view["customer_Age_band"] = (
((invoiceitems_view["customer_Age"] + 1) / 5).ceil() - 1
) * 5
invoiceitems_view["customer_Age_band"] = (
invoiceitems_view["customer_Age_band"].astype(str)
+ "-" + (invoiceitems_view["customer_Age_band"] + 4).astype(str)
)
Do window aggregation from INVOICEITEMS¶
See SDK reference for features
See SDK reference to groupby a view
See SDK reference to do aggregation over time
In [ ]:
Copied!
# Group INVOICEITEMS view by product entity (GroceryProductGuid) across different
# customer_Age_bands.
invoiceitems_view_by_product_across_customer_age_band =\
invoiceitems_view.groupby(
['GroceryProductGuid'], category="customer_Age_band"
)
# Group INVOICEITEMS view by product entity (GroceryProductGuid) across different
# customer_Age_bands.
invoiceitems_view_by_product_across_customer_age_band =\
invoiceitems_view.groupby(
['GroceryProductGuid'], category="customer_Age_band"
)
In [ ]:
Copied!
# Distribution representing the cumulative TotalCost of item, categorized by their respective
# customer_Age_band, for the product over time.
feature_group =\
invoiceitems_view_by_product_across_customer_age_band.aggregate_over(
"TotalCost", method=fb.AggFunc.SUM,
feature_names=[
"PRODUCT_item_TotalCost_across_customer_Age_bands"
+ "_" + w for w in windows
],
windows=windows
)
# Get PRODUCT_item_TotalCost_across_customer_Age_bands_28d object from feature group.
product_item_totalcost_across_customer_age_bands_28d =\
feature_group["PRODUCT_item_TotalCost_across_customer_Age_bands_28d"]
# Distribution representing the cumulative TotalCost of item, categorized by their respective
# customer_Age_band, for the product over time.
feature_group =\
invoiceitems_view_by_product_across_customer_age_band.aggregate_over(
"TotalCost", method=fb.AggFunc.SUM,
feature_names=[
"PRODUCT_item_TotalCost_across_customer_Age_bands"
+ "_" + w for w in windows
],
windows=windows
)
# Get PRODUCT_item_TotalCost_across_customer_Age_bands_28d object from feature group.
product_item_totalcost_across_customer_age_bands_28d =\
feature_group["PRODUCT_item_TotalCost_across_customer_Age_bands_28d"]
In [ ]:
Copied!
# Group INVOICEITEMS view by productgroup entity (ProductGroup) across different
# customer_Age_bands.
invoiceitems_view_by_productgroup_across_customer_age_band =\
invoiceitems_view.groupby(
['ProductGroup'], category="customer_Age_band"
)
# Group INVOICEITEMS view by productgroup entity (ProductGroup) across different
# customer_Age_bands.
invoiceitems_view_by_productgroup_across_customer_age_band =\
invoiceitems_view.groupby(
['ProductGroup'], category="customer_Age_band"
)
In [ ]:
Copied!
# Distribution representing the cumulative TotalCost of item, categorized by their respective
# customer_Age_band, for the productgroup over time.
feature_group =\
invoiceitems_view_by_productgroup_across_customer_age_band.aggregate_over(
"TotalCost", method=fb.AggFunc.SUM,
feature_names=[
"PRODUCTGROUP_item_TotalCost_across_customer_Age_bands"
+ "_" + w for w in windows
],
windows=windows
)
# Get PRODUCTGROUP_item_TotalCost_across_customer_Age_bands_28d object from feature group.
productgroup_item_totalcost_across_customer_age_bands_28d =\
feature_group["PRODUCTGROUP_item_TotalCost_across_customer_Age_bands_28d"]
# Distribution representing the cumulative TotalCost of item, categorized by their respective
# customer_Age_band, for the productgroup over time.
feature_group =\
invoiceitems_view_by_productgroup_across_customer_age_band.aggregate_over(
"TotalCost", method=fb.AggFunc.SUM,
feature_names=[
"PRODUCTGROUP_item_TotalCost_across_customer_Age_bands"
+ "_" + w for w in windows
],
windows=windows
)
# Get PRODUCTGROUP_item_TotalCost_across_customer_Age_bands_28d object from feature group.
productgroup_item_totalcost_across_customer_age_bands_28d =\
feature_group["PRODUCTGROUP_item_TotalCost_across_customer_Age_bands_28d"]
Derive Similarity feature across entities¶
In [ ]:
Copied!
# Derive Similarity feature from cosine similarity between
# PRODUCT_item_TotalCost_across_customer_Age_bands_28d
# and PRODUCTGROUP_item_TotalCost_across_customer_Age_bands_28d
product_vs_productgroup_item_totalcost_across_customer_age_bands_28d =\
product_item_totalcost_across_customer_age_bands_28d.cd.cosine_similarity(
productgroup_item_totalcost_across_customer_age_bands_28d
)
# Give a name to new feature
product_vs_productgroup_item_totalcost_across_customer_age_bands_28d.name = \
"PRODUCT_vs_PRODUCTGROUP_item_TotalCost_across_customer_Age_bands_28d"
# Derive Similarity feature from cosine similarity between
# PRODUCT_item_TotalCost_across_customer_Age_bands_28d
# and PRODUCTGROUP_item_TotalCost_across_customer_Age_bands_28d
product_vs_productgroup_item_totalcost_across_customer_age_bands_28d =\
product_item_totalcost_across_customer_age_bands_28d.cd.cosine_similarity(
productgroup_item_totalcost_across_customer_age_bands_28d
)
# Give a name to new feature
product_vs_productgroup_item_totalcost_across_customer_age_bands_28d.name = \
"PRODUCT_vs_PRODUCTGROUP_item_TotalCost_across_customer_Age_bands_28d"
Preview feature¶
Read on the feature primary entity concept
Read on the serving entity concept
In [ ]:
Copied!
#Check the primary entity of the feature'
product_vs_productgroup_item_totalcost_across_customer_age_bands_28d.primary_entity
#Check the primary entity of the feature'
product_vs_productgroup_item_totalcost_across_customer_age_bands_28d.primary_entity
In [ ]:
Copied!
#Get observation table: 'Preview Table with 10 items'
preview_table = catalog.get_observation_table(
"Preview Table with 10 items"
)
#Get observation table: 'Preview Table with 10 items'
preview_table = catalog.get_observation_table(
"Preview Table with 10 items"
)
In [ ]:
Copied!
#Preview PRODUCT_vs_PRODUCTGROUP_item_TotalCost_across_customer_Age_bands_28d
product_vs_productgroup_item_totalcost_across_customer_age_bands_28d.preview(
preview_table
)
#Preview PRODUCT_vs_PRODUCTGROUP_item_TotalCost_across_customer_Age_bands_28d
product_vs_productgroup_item_totalcost_across_customer_age_bands_28d.preview(
preview_table
)
Save feature¶
In [ ]:
Copied!
# Save feature
product_vs_productgroup_item_totalcost_across_customer_age_bands_28d.save()
# Save feature
product_vs_productgroup_item_totalcost_across_customer_age_bands_28d.save()
Add description and see feature definition file¶
In [ ]:
Copied!
# Add description
product_vs_productgroup_item_totalcost_across_customer_age_bands_28d.update_description(
"Similarity between the product and all products of the product "
"productgroup measured by the Cosine Similarity between the "
"Distribution representing the cumulative TotalCost of item, "
"categorized by their respective customer_Age_band, over 28d for both "
"entities."
)
# See feature definition file
product_vs_productgroup_item_totalcost_across_customer_age_bands_28d.definition
# Add description
product_vs_productgroup_item_totalcost_across_customer_age_bands_28d.update_description(
"Similarity between the product and all products of the product "
"productgroup measured by the Cosine Similarity between the "
"Distribution representing the cumulative TotalCost of item, "
"categorized by their respective customer_Age_band, over 28d for both "
"entities."
)
# See feature definition file
product_vs_productgroup_item_totalcost_across_customer_age_bands_28d.definition