CUSTOMER vs OVERALL item TotalCost across product ProductGroups 28d
SDK code to create CUSTOMER_vs_OVERALL_item_TotalCost_across_product_ProductGroups_28d¶
Feature description:
Similarity between the customer and all customers measured by the Cosine Similarity between the Distribution representing the cumulative TotalCost of item, categorized by their respective product's ProductGroup, over 28d for both entities.
In [ ]:
Copied!
import featurebyte as fb
fb.use_profile("tutorial")
import featurebyte as fb
fb.use_profile("tutorial")
Activate catalog¶
In [ ]:
Copied!
catalog = fb.Catalog.activate("Grocery Dataset Tutorial")
catalog = fb.Catalog.activate("Grocery Dataset Tutorial")
Set windows for aggregation¶
In [ ]:
Copied!
windows = ['28d']
windows = ['28d']
Get view from table¶
In [ ]:
Copied!
# Get view from GROCERYPRODUCT dimension table.
groceryproduct_view = catalog.get_view("GROCERYPRODUCT")
# Get view from GROCERYPRODUCT dimension table.
groceryproduct_view = catalog.get_view("GROCERYPRODUCT")
In [ ]:
Copied!
# Get view from INVOICEITEMS item table.
invoiceitems_view = catalog.get_view("INVOICEITEMS")
# Get view from INVOICEITEMS item table.
invoiceitems_view = catalog.get_view("INVOICEITEMS")
Join views¶
In [ ]:
Copied!
# Join GROCERYPRODUCT view to INVOICEITEMS view.
invoiceitems_view = invoiceitems_view.join(groceryproduct_view, rsuffix="")
# Join GROCERYPRODUCT view to INVOICEITEMS view.
invoiceitems_view = invoiceitems_view.join(groceryproduct_view, rsuffix="")
Do window aggregation from INVOICEITEMS¶
See SDK reference for features
See SDK reference to groupby a view
See SDK reference to do aggregation over time
In [ ]:
Copied!
# Group INVOICEITEMS view by customer entity (GroceryCustomerGuid) across different ProductGroups.
invoiceitems_view_by_customer_across_productgroup =\
invoiceitems_view.groupby(
['GroceryCustomerGuid'], category="ProductGroup"
)
# Group INVOICEITEMS view by customer entity (GroceryCustomerGuid) across different ProductGroups.
invoiceitems_view_by_customer_across_productgroup =\
invoiceitems_view.groupby(
['GroceryCustomerGuid'], category="ProductGroup"
)
In [ ]:
Copied!
# Distribution representing the cumulative TotalCost of item, categorized by their respective
# product's ProductGroup, for the customer over time.
feature_group =\
invoiceitems_view_by_customer_across_productgroup.aggregate_over(
"TotalCost", method=fb.AggFunc.SUM,
feature_names=[
"CUSTOMER_item_TotalCost_across_product_ProductGroups"
+ "_" + w for w in windows
],
windows=windows
)
# Get CUSTOMER_item_TotalCost_across_product_ProductGroups_28d object from feature group.
customer_item_totalcost_across_product_productgroups_28d =\
feature_group["CUSTOMER_item_TotalCost_across_product_ProductGroups_28d"]
# Distribution representing the cumulative TotalCost of item, categorized by their respective
# product's ProductGroup, for the customer over time.
feature_group =\
invoiceitems_view_by_customer_across_productgroup.aggregate_over(
"TotalCost", method=fb.AggFunc.SUM,
feature_names=[
"CUSTOMER_item_TotalCost_across_product_ProductGroups"
+ "_" + w for w in windows
],
windows=windows
)
# Get CUSTOMER_item_TotalCost_across_product_ProductGroups_28d object from feature group.
customer_item_totalcost_across_product_productgroups_28d =\
feature_group["CUSTOMER_item_TotalCost_across_product_ProductGroups_28d"]
In [ ]:
Copied!
# Group INVOICEITEMS view across different ProductGroups.
invoiceitems_view_by_overall_across_productgroup =\
invoiceitems_view.groupby(
[], category="ProductGroup"
)
# Group INVOICEITEMS view across different ProductGroups.
invoiceitems_view_by_overall_across_productgroup =\
invoiceitems_view.groupby(
[], category="ProductGroup"
)
In [ ]:
Copied!
# Distribution representing the cumulative TotalCost of item, categorized by their respective
# product's ProductGroup, over time.
feature_group =\
invoiceitems_view_by_overall_across_productgroup.aggregate_over(
"TotalCost", method=fb.AggFunc.SUM,
feature_names=[
"OVERALL_item_TotalCost_across_product_ProductGroups"
+ "_" + w for w in windows
],
windows=windows
)
# Get OVERALL_item_TotalCost_across_product_ProductGroups_28d object from feature group.
overall_item_totalcost_across_product_productgroups_28d =\
feature_group["OVERALL_item_TotalCost_across_product_ProductGroups_28d"]
# Distribution representing the cumulative TotalCost of item, categorized by their respective
# product's ProductGroup, over time.
feature_group =\
invoiceitems_view_by_overall_across_productgroup.aggregate_over(
"TotalCost", method=fb.AggFunc.SUM,
feature_names=[
"OVERALL_item_TotalCost_across_product_ProductGroups"
+ "_" + w for w in windows
],
windows=windows
)
# Get OVERALL_item_TotalCost_across_product_ProductGroups_28d object from feature group.
overall_item_totalcost_across_product_productgroups_28d =\
feature_group["OVERALL_item_TotalCost_across_product_ProductGroups_28d"]
Derive Similarity feature across entities¶
In [ ]:
Copied!
# Derive Similarity feature from cosine similarity between
# CUSTOMER_item_TotalCost_across_product_ProductGroups_28d
# and OVERALL_item_TotalCost_across_product_ProductGroups_28d
customer_vs_overall_item_totalcost_across_product_productgroups_28d =\
customer_item_totalcost_across_product_productgroups_28d.cd.cosine_similarity(
overall_item_totalcost_across_product_productgroups_28d
)
# Give a name to new feature
customer_vs_overall_item_totalcost_across_product_productgroups_28d.name = \
"CUSTOMER_vs_OVERALL_item_TotalCost_across_product_ProductGroups_28d"
# Derive Similarity feature from cosine similarity between
# CUSTOMER_item_TotalCost_across_product_ProductGroups_28d
# and OVERALL_item_TotalCost_across_product_ProductGroups_28d
customer_vs_overall_item_totalcost_across_product_productgroups_28d =\
customer_item_totalcost_across_product_productgroups_28d.cd.cosine_similarity(
overall_item_totalcost_across_product_productgroups_28d
)
# Give a name to new feature
customer_vs_overall_item_totalcost_across_product_productgroups_28d.name = \
"CUSTOMER_vs_OVERALL_item_TotalCost_across_product_ProductGroups_28d"
Preview feature¶
Read on the feature primary entity concept
Read on the serving entity concept
In [ ]:
Copied!
#Check the primary entity of the feature'
customer_vs_overall_item_totalcost_across_product_productgroups_28d.primary_entity
#Check the primary entity of the feature'
customer_vs_overall_item_totalcost_across_product_productgroups_28d.primary_entity
In [ ]:
Copied!
#Get observation table: 'Preview Table with 10 items'
preview_table = catalog.get_observation_table(
"Preview Table with 10 items"
)
#Get observation table: 'Preview Table with 10 items'
preview_table = catalog.get_observation_table(
"Preview Table with 10 items"
)
In [ ]:
Copied!
#Preview CUSTOMER_vs_OVERALL_item_TotalCost_across_product_ProductGroups_28d
customer_vs_overall_item_totalcost_across_product_productgroups_28d.preview(
preview_table
)
#Preview CUSTOMER_vs_OVERALL_item_TotalCost_across_product_ProductGroups_28d
customer_vs_overall_item_totalcost_across_product_productgroups_28d.preview(
preview_table
)
Save feature¶
In [ ]:
Copied!
# Save feature
customer_vs_overall_item_totalcost_across_product_productgroups_28d.save()
# Save feature
customer_vs_overall_item_totalcost_across_product_productgroups_28d.save()
Add description and see feature definition file¶
In [ ]:
Copied!
# Add description
customer_vs_overall_item_totalcost_across_product_productgroups_28d.update_description(
"Similarity between the customer and all customers measured by the "
"Cosine Similarity between the Distribution representing the cumulative"
" TotalCost of item, categorized by their respective product's "
"ProductGroup, over 28d for both entities."
)
# See feature definition file
customer_vs_overall_item_totalcost_across_product_productgroups_28d.definition
# Add description
customer_vs_overall_item_totalcost_across_product_productgroups_28d.update_description(
"Similarity between the customer and all customers measured by the "
"Cosine Similarity between the Distribution representing the cumulative"
" TotalCost of item, categorized by their respective product's "
"ProductGroup, over 28d for both entities."
)
# See feature definition file
customer_vs_overall_item_totalcost_across_product_productgroups_28d.definition