CUSTOMER Avg ProductGroup Embedding 14d vs 182d
Aggregate embedding features over time¶
Another useful way of aggregating embedding vectors is aggregation over time.
This capability allows to compute average/max embeddings over various time windows, which can be useful for capturing change of grocery customer's basket over time, compute similarities between baskets at current time vs the past.
In [ ]:
Copied!
import featurebyte as fb
fb.use_profile("tutorial")
import featurebyte as fb
fb.use_profile("tutorial")
In [ ]:
Copied!
catalog = fb.Catalog.activate("Grocery Dataset Tutorial")
catalog = fb.Catalog.activate("Grocery Dataset Tutorial")
Create UDF function¶
F_SBERT_EMBEDDING
is a name of SQL function awailable in Data Warehouse, which calls deployed transformer model.
In [ ]:
Copied!
embedding_udf = fb.UserDefinedFunction.create(
name='embedding',
sql_function_name='F_SBERT_EMBEDDING',
function_parameters=[fb.FunctionParameter(name="x", dtype=fb.enum.DBVarType.VARCHAR)],
output_dtype=fb.enum.DBVarType.ARRAY,
is_global=False,
)
embedding_udf = fb.UserDefinedFunction.create(
name='embedding',
sql_function_name='F_SBERT_EMBEDDING',
function_parameters=[fb.FunctionParameter(name="x", dtype=fb.enum.DBVarType.VARCHAR)],
output_dtype=fb.enum.DBVarType.ARRAY,
is_global=False,
)
Get views¶
In [ ]:
Copied!
# Get view from GROCERYPRODUCT dimension table.
groceryproduct_view = catalog.get_view("GROCERYPRODUCT")
# Get view from INVOICEITEMS item table.
invoiceitems_view = catalog.get_view("INVOICEITEMS")
# Get view from GROCERYPRODUCT dimension table.
groceryproduct_view = catalog.get_view("GROCERYPRODUCT")
# Get view from INVOICEITEMS item table.
invoiceitems_view = catalog.get_view("INVOICEITEMS")
Run embedding UDF on the ProductGroup column¶
In [ ]:
Copied!
groceryproduct_view["ProductGroupEmbedding"] = embedding_udf(groceryproduct_view["ProductGroup"])
groceryproduct_view["ProductGroupEmbedding"] = embedding_udf(groceryproduct_view["ProductGroup"])
Join views¶
In [ ]:
Copied!
# Join GROCERYPRODUCT view to INVOICEITEMS view.
invoiceitems_view = invoiceitems_view.join(groceryproduct_view, rsuffix="")
# Join GROCERYPRODUCT view to INVOICEITEMS view.
invoiceitems_view = invoiceitems_view.join(groceryproduct_view, rsuffix="")
Create observation table¶
In [ ]:
Copied!
observation_table = invoiceitems_view.create_observation_table(
name="Preview tables with Invoice Items",
sample_rows=10,
columns=["Timestamp", "GroceryInvoiceItemGuid"],
columns_rename_mapping={
"Timestamp": "POINT_IN_TIME",
"GroceryInvoiceItemGuid": "GROCERYINVOICEITEMGUID",
},
)
observation_table = invoiceitems_view.create_observation_table(
name="Preview tables with Invoice Items",
sample_rows=10,
columns=["Timestamp", "GroceryInvoiceItemGuid"],
columns_rename_mapping={
"Timestamp": "POINT_IN_TIME",
"GroceryInvoiceItemGuid": "GROCERYINVOICEITEMGUID",
},
)
Create features from embedding column¶
In [ ]:
Copied!
customer_avg_product_groups = invoiceitems_view.groupby("GroceryCustomerGuid").aggregate_over(
"ProductGroupEmbedding",
method=fb.AggFunc.AVG,
feature_names=["CUSTOMER_Avg_of_ProductGroup_Embedding_14d", "CUSTOMER_Avg_of_ProductGroup_Embedding_183d"],
windows=["14d", "183d"]
)
customer_avg_product_groups_cosine = customer_avg_product_groups["CUSTOMER_Avg_of_ProductGroup_Embedding_14d"].vec.cosine_similarity(
customer_avg_product_groups["CUSTOMER_Avg_of_ProductGroup_Embedding_183d"]
)
customer_avg_product_groups_cosine.name = "CUSTOMER_Consistency_of_Avg_of_ProductGroup_Embedding_14d_183d"
customer_avg_product_groups = invoiceitems_view.groupby("GroceryCustomerGuid").aggregate_over(
"ProductGroupEmbedding",
method=fb.AggFunc.AVG,
feature_names=["CUSTOMER_Avg_of_ProductGroup_Embedding_14d", "CUSTOMER_Avg_of_ProductGroup_Embedding_183d"],
windows=["14d", "183d"]
)
customer_avg_product_groups_cosine = customer_avg_product_groups["CUSTOMER_Avg_of_ProductGroup_Embedding_14d"].vec.cosine_similarity(
customer_avg_product_groups["CUSTOMER_Avg_of_ProductGroup_Embedding_183d"]
)
customer_avg_product_groups_cosine.name = "CUSTOMER_Consistency_of_Avg_of_ProductGroup_Embedding_14d_183d"
In [ ]:
Copied!
observation_table = catalog.get_observation_table("Preview tables with Invoice Items")
customer_avg_product_groups_cosine.preview(observation_table.to_pandas())
observation_table = catalog.get_observation_table("Preview tables with Invoice Items")
customer_avg_product_groups_cosine.preview(observation_table.to_pandas())
Save feature and view definition file¶
In [ ]:
Copied!
customer_avg_product_groups_cosine.save()
customer_avg_product_groups_cosine.save()
In [ ]:
Copied!
# Add description
customer_avg_product_groups_cosine.update_description(
"Similarity between average customer's baskets in 14 and 183 days periods"
)
# See feature definition file
customer_avg_product_groups_cosine.definition
# Add description
customer_avg_product_groups_cosine.update_description(
"Similarity between average customer's baskets in 14 and 183 days periods"
)
# See feature definition file
customer_avg_product_groups_cosine.definition
In [ ]:
Copied!