Sdk dlt example
In [1]:
                Copied!
                
                
            import time
import os 
import featurebyte as fb
import time
import os 
import featurebyte as fb
    
        10:51:42 | INFO | Using configuration file at: /Users/viktor/.featurebyte/config.yaml 10:51:42 | INFO | Active profile: local (http://127.0.0.1:8088) 10:51:42 | INFO | SDK version: 0.6.0 10:51:42 | INFO | No catalog activated. 10:51:42 | INFO | 0 feature list, 0 feature deployed
Connect to databricks data warehouse¶
In [2]:
                Copied!
                
                
            ds = fb.FeatureStore.get_or_create(
    name="databricks",
    source_type=fb.SourceType.DATABRICKS,
    details=fb.DatabricksDetails(
        host="<host>",
        http_path="<http_path>",
        featurebyte_catalog="hive_metastore",
        featurebyte_schema="grocery",        
        storage_type=fb.StorageType.S3,
        storage_url=f"<storage_url>",
        storage_spark_url=f"<storage_spark_url>",
    ),
    database_credential=fb.AccessTokenCredential(
        access_token="<databricks_access_token>"
    ),
    storage_credential=fb.S3StorageCredential(
        s3_access_key_id="<s3_access_key_id>",
        s3_secret_access_key="<s3_secret_access_key>",
    )
).get_data_source()
ds = fb.FeatureStore.get_or_create(
    name="databricks",
    source_type=fb.SourceType.DATABRICKS,
    details=fb.DatabricksDetails(
        host="",
        http_path="",
        featurebyte_catalog="hive_metastore",
        featurebyte_schema="grocery",        
        storage_type=fb.StorageType.S3,
        storage_url=f"",
        storage_spark_url=f"",
    ),
    database_credential=fb.AccessTokenCredential(
        access_token=""
    ),
    storage_credential=fb.S3StorageCredential(
        s3_access_key_id="",
        s3_secret_access_key="",
    )
).get_data_source()       
    
        Create catalog¶
In [3]:
                Copied!
                
                
            catalog_name = "DLT Example Catalog"
fb.Catalog.create(catalog_name, feature_store_name="databricks")
catalog = fb.Catalog.activate(catalog_name)
catalog_name = "DLT Example Catalog"
fb.Catalog.create(catalog_name, feature_store_name="databricks")
catalog = fb.Catalog.activate(catalog_name)
    
        10:51:44 | INFO | Catalog activated: DLT Example Catalog
Create tables¶
In [4]:
                Copied!
                
                
            %%capture 
customer_table = ds.get_source_table(
    database_name="hive_metastore", schema_name="grocery", table_name="grocerycustomer"
).create_scd_table(
    name="grocerycustomer",
    surrogate_key_column='RowID',
    natural_key_column="GroceryCustomerGuid",
    effective_timestamp_column="ValidFrom",
    current_flag_column="CurrentRecord",
    record_creation_timestamp_column="record_available_at",
)
invoice_table = ds.get_source_table(
    database_name="hive_metastore", schema_name="grocery", table_name="groceryinvoice"
).create_event_table(
    name="groceryinvoice",
    event_id_column="GroceryInvoiceGuid",
    event_timestamp_column="Timestamp",
    event_timestamp_timezone_offset_column="tz_offset",
    record_creation_timestamp_column="record_available_at",
)
invoice_table.initialize_default_feature_job_setting()
items_table = ds.get_source_table(
    database_name="hive_metastore", schema_name="grocery", table_name="invoiceitems"
).create_item_table(
    name="invoiceitems",
    event_id_column="GroceryInvoiceGuid",
    item_id_column="GroceryInvoiceItemGuid",
    event_table_name="groceryinvoice",
    record_creation_timestamp_column="record_available_at",
)
product_table = ds.get_source_table(
    database_name="hive_metastore", schema_name="grocery", table_name="groceryproduct_embedding"
).create_dimension_table(name="groceryproduct", dimension_id_column="GroceryProductGuid")
%%capture 
customer_table = ds.get_source_table(
    database_name="hive_metastore", schema_name="grocery", table_name="grocerycustomer"
).create_scd_table(
    name="grocerycustomer",
    surrogate_key_column='RowID',
    natural_key_column="GroceryCustomerGuid",
    effective_timestamp_column="ValidFrom",
    current_flag_column="CurrentRecord",
    record_creation_timestamp_column="record_available_at",
)
invoice_table = ds.get_source_table(
    database_name="hive_metastore", schema_name="grocery", table_name="groceryinvoice"
).create_event_table(
    name="groceryinvoice",
    event_id_column="GroceryInvoiceGuid",
    event_timestamp_column="Timestamp",
    event_timestamp_timezone_offset_column="tz_offset",
    record_creation_timestamp_column="record_available_at",
)
invoice_table.initialize_default_feature_job_setting()
items_table = ds.get_source_table(
    database_name="hive_metastore", schema_name="grocery", table_name="invoiceitems"
).create_item_table(
    name="invoiceitems",
    event_id_column="GroceryInvoiceGuid",
    item_id_column="GroceryInvoiceItemGuid",
    event_table_name="groceryinvoice",
    record_creation_timestamp_column="record_available_at",
)
product_table = ds.get_source_table(
    database_name="hive_metastore", schema_name="grocery", table_name="groceryproduct_embedding"
).create_dimension_table(name="groceryproduct", dimension_id_column="GroceryProductGuid")
    
        Create and tag entities¶
In [5]:
                Copied!
                
                
            # entities
catalog.create_entity(name="grocerycustomer", serving_names=["GROCERYCUSTOMERGUID"])
catalog.create_entity(name="groceryinvoice", serving_names=["GROCERYINVOICEGUID"])
catalog.create_entity(name="groceryitem", serving_names=["GROCERYINVOICEITEMGUID"])
catalog.create_entity(name="groceryproduct", serving_names=["GROCERYPRODUCTGUID"])
catalog.create_entity(name="productgroup", serving_names=["PRODUCTGROUP"])
catalog.create_entity(name="frenchstate", serving_names=["FRENCHSTATE"])
# tag the entities for the grocery customer table
customer_table.GroceryCustomerGuid.as_entity("grocerycustomer")
customer_table.State.as_entity("frenchstate")
# tag the entities for the grocery invoice table
invoice_table.GroceryInvoiceGuid.as_entity("groceryinvoice")
invoice_table.GroceryCustomerGuid.as_entity("grocerycustomer")
# tag the entities for the grocery items table
items_table.GroceryInvoiceItemGuid.as_entity("groceryitem")
items_table.GroceryInvoiceGuid.as_entity("groceryinvoice")
items_table.GroceryProductGuid.as_entity("groceryproduct")
# tag the entities for the grocery product table
product_table.GroceryProductGuid.as_entity("groceryproduct")
product_table.ProductGroup.as_entity("productgroup")
# entities
catalog.create_entity(name="grocerycustomer", serving_names=["GROCERYCUSTOMERGUID"])
catalog.create_entity(name="groceryinvoice", serving_names=["GROCERYINVOICEGUID"])
catalog.create_entity(name="groceryitem", serving_names=["GROCERYINVOICEITEMGUID"])
catalog.create_entity(name="groceryproduct", serving_names=["GROCERYPRODUCTGUID"])
catalog.create_entity(name="productgroup", serving_names=["PRODUCTGROUP"])
catalog.create_entity(name="frenchstate", serving_names=["FRENCHSTATE"])
# tag the entities for the grocery customer table
customer_table.GroceryCustomerGuid.as_entity("grocerycustomer")
customer_table.State.as_entity("frenchstate")
# tag the entities for the grocery invoice table
invoice_table.GroceryInvoiceGuid.as_entity("groceryinvoice")
invoice_table.GroceryCustomerGuid.as_entity("grocerycustomer")
# tag the entities for the grocery items table
items_table.GroceryInvoiceItemGuid.as_entity("groceryitem")
items_table.GroceryInvoiceGuid.as_entity("groceryinvoice")
items_table.GroceryProductGuid.as_entity("groceryproduct")
# tag the entities for the grocery product table
product_table.GroceryProductGuid.as_entity("groceryproduct")
product_table.ProductGroup.as_entity("productgroup")
    
        Get and join views¶
In [6]:
                Copied!
                
                
            invoice_view = invoice_table.get_view()
items_view = items_table.get_view()
product_view = product_table.get_view()
invoice_view = invoice_table.get_view()
items_view = items_table.get_view()
product_view = product_table.get_view()
    
        In [7]:
                Copied!
                
                
            items_view = items_view.join(product_view)
items_view = items_view.join(invoice_view, rsuffix="_invoice")
items_view = items_view.join(product_view)
items_view = items_view.join(invoice_view, rsuffix="_invoice")
    
        Create invoice product groups embedding¶
In [8]:
                Copied!
                
                
            invoice_product_group_embedding = items_view.groupby("GroceryInvoiceGuid").aggregate(
    "ProductGroupEmbedding", 
    method=fb.AggFunc.AVG, 
    feature_name="INVOICE_Avg_of_ProductGroup_Embedding",
)
invoice_product_group_embedding = items_view.groupby("GroceryInvoiceGuid").aggregate(
    "ProductGroupEmbedding", 
    method=fb.AggFunc.AVG, 
    feature_name="INVOICE_Avg_of_ProductGroup_Embedding",
)
    
        Create customer level product group embeddings¶
In [9]:
                Copied!
                
                
            customer_avg_product_groups = items_view.groupby("GroceryCustomerGuid").aggregate_over(
    "ProductGroupEmbedding",
    method=fb.AggFunc.AVG,
    feature_names=[
        "CUSTOMER_Avg_of_ProductGroup_Embedding_14d",
        "CUSTOMER_Avg_of_ProductGroup_Embedding_183d",
    ], 
    windows=["14d", "183d"]
)
customer_avg_product_groups_cosine = \
    customer_avg_product_groups["CUSTOMER_Avg_of_ProductGroup_Embedding_14d"].vec.cosine_similarity(
    customer_avg_product_groups["CUSTOMER_Avg_of_ProductGroup_Embedding_183d"]
)
customer_avg_product_groups_cosine.name = "CUSTOMER_Consistency_of_Avg_of_ProductGroup_Embedding_14d_183d"
customer_avg_product_groups = items_view.groupby("GroceryCustomerGuid").aggregate_over(
    "ProductGroupEmbedding",
    method=fb.AggFunc.AVG,
    feature_names=[
        "CUSTOMER_Avg_of_ProductGroup_Embedding_14d",
        "CUSTOMER_Avg_of_ProductGroup_Embedding_183d",
    ], 
    windows=["14d", "183d"]
)
customer_avg_product_groups_cosine = \
    customer_avg_product_groups["CUSTOMER_Avg_of_ProductGroup_Embedding_14d"].vec.cosine_similarity(
    customer_avg_product_groups["CUSTOMER_Avg_of_ProductGroup_Embedding_183d"]
)
customer_avg_product_groups_cosine.name = "CUSTOMER_Consistency_of_Avg_of_ProductGroup_Embedding_14d_183d"
    
        Preview features¶
In [10]:
                Copied!
                
                
            features = fb.FeatureGroup([
    customer_avg_product_groups_cosine,
    customer_avg_product_groups,
    invoice_product_group_embedding,
])
features = fb.FeatureGroup([
    customer_avg_product_groups_cosine,
    customer_avg_product_groups,
    invoice_product_group_embedding,
])
    
        In [11]:
                Copied!
                
                
            observation_table = items_view.create_observation_table(
     name="Preview table",
     sample_rows=10,
     columns=["Timestamp", "GroceryCustomerGuid", "GroceryInvoiceGuid"],
     columns_rename_mapping={
         "Timestamp": "POINT_IN_TIME",
         "GroceryCustomerGuid": "GROCERYCUSTOMERGUID",
         "GroceryInvoiceGuid": "GROCERYINVOICEGUID",
     },
 )
observation_table = items_view.create_observation_table(
     name="Preview table",
     sample_rows=10,
     columns=["Timestamp", "GroceryCustomerGuid", "GroceryInvoiceGuid"],
     columns_rename_mapping={
         "Timestamp": "POINT_IN_TIME",
         "GroceryCustomerGuid": "GROCERYCUSTOMERGUID",
         "GroceryInvoiceGuid": "GROCERYINVOICEGUID",
     },
 )
    
        In [12]:
                Copied!
                
                
            features.preview(observation_table.to_pandas())
features.preview(observation_table.to_pandas())
    
        Out[12]:
| POINT_IN_TIME | GROCERYCUSTOMERGUID | GROCERYINVOICEGUID | CUSTOMER_Consistency_of_Avg_of_ProductGroup_Embedding_14d_183d | CUSTOMER_Avg_of_ProductGroup_Embedding_14d | CUSTOMER_Avg_of_ProductGroup_Embedding_183d | INVOICE_Avg_of_ProductGroup_Embedding | |
|---|---|---|---|---|---|---|---|
| 0 | 2023-01-25 16:17:02+00:00 | 967e4beb-c889-4ff9-9140-66655248bbde | 452619d8-da2c-4b87-8869-fabd8617f0b5 | 0.947687 | [-0.07795123751428601, 0.026106864757143, -0.0... | [-0.05989266951781601, 0.028268971296264003, -... | [-0.024409980539999997, 0.0467800437, 0.022734... | 
| 1 | 2023-01-25 17:58:34+00:00 | ed957eca-941a-47df-93ee-30ab1ae45ee5 | 3b7f52a4-eab9-409b-b329-11547eff92a3 | NaN | NaN | [-0.050461753724194, 0.03191136753629, -0.0203... | [-0.056538398775, 0.038149968064286, -0.025998... | 
| 2 | 2023-06-22 16:39:56+00:00 | 7de2b5ba-8078-4c60-aa69-ef7b68c42349 | ebe828ea-44af-4865-b2ed-eea02857b889 | 0.943922 | [-0.054724733771875006, 0.04170257735937501, 0... | [-0.054099960254567, 0.036855927644231005, -0.... | [-0.0376417868, 0.0208652486275, -0.0210912462... | 
| 3 | 2022-08-01 09:44:05+00:00 | 52476713-ad96-42e3-983d-31b1d3745a3f | 47ec017f-c65b-4bc9-917d-5a7b211c3621 | 0.866544 | [-0.05683987525, 0.045103899862500005, -0.0329... | [-0.053429381090463, 0.018712235034605, -0.015... | [-0.0524657229375, 0.02363832113125, -0.018923... | 
| 4 | 2022-08-16 17:13:05+00:00 | e5d71d02-7fec-4eaf-8327-84b3583b7cec | bccd2292-1920-407f-a307-79d6237eab7c | 0.956710 | [-0.058180038666667, 0.035606893166667, -0.016... | [-0.044520215697211006, 0.031701440613546, -0.... | [-0.020503219, 0.03539911665625, -0.0233798176... | 
| 5 | 2022-08-18 15:43:51+00:00 | 8ad0c1f9-e6cb-43d2-a459-3297f1b29f55 | ac5c7e47-8f65-4538-9146-f9edf130ea35 | NaN | NaN | [-0.039852984969097005, 0.030179503280556002, ... | [-0.0536430575, 0.037170080630769, -0.01672707... | 
| 6 | 2023-03-14 19:55:25+00:00 | 00b5a352-7300-4fad-adea-907f8d68f393 | 79e801ba-0bec-48fa-b505-6da5bc159434 | 0.756868 | [-0.008574453749999999, 0.06710812775, -0.0338... | [-0.050725246419149, 0.036322272146809005, -0.... | [-0.054892299269231004, 0.047410778044231, -0.... | 
| 7 | 2023-04-05 18:02:15+00:00 | 1b82b9eb-cc54-4cc4-a7e3-9a7417faa8a5 | 17745065-0092-4a56-852c-39e8dde94479 | 0.920511 | [-0.05063521202381, 0.033994672869048004, -0.0... | [-0.054906137815789006, 0.037337559488596, -0.... | [-0.048078875181818005, 0.015896207727273, -0.... | 
| 8 | 2023-04-08 17:43:14+00:00 | 34be2f38-fe5b-4c18-863d-178b7ad6ff4e | 08b15987-d39c-4719-adbb-72e29059c81f | 0.955495 | [-0.059002682755263006, 0.033196180236842, -0.... | [-0.057579961551508005, 0.033872317339907004, ... | [-0.075094953, 0.036157493033333, -0.055885230... | 
| 9 | 2023-04-16 14:53:28+00:00 | 2b068f1d-d99b-4c2f-a737-46f619a76cc8 | 512a824c-641a-4e9e-a259-6fbe700e805f | 0.861831 | [-0.058782766133333005, 0.0196883025, -0.00864... | [-0.051915258536648, 0.02764649977358, -0.0113... | [-0.06907756024999999, 0.01242989875, -0.01187... | 
In [ ]:
                Copied!