9. Create Window Aggregate Features
Create window aggregate features¶
Next feature type we will consider is window aggregate feature. These are features generated by aggregating data within specific time frame.
In [1]:
Copied!
import featurebyte as fb
# Set your profile to the tutorial environment
fb.use_profile("tutorial")
catalog_name = "Grocery Dataset SDK Tutorial"
catalog = fb.Catalog.activate(catalog_name)
import featurebyte as fb
# Set your profile to the tutorial environment
fb.use_profile("tutorial")
catalog_name = "Grocery Dataset SDK Tutorial"
catalog = fb.Catalog.activate(catalog_name)
11:12:41 | INFO | SDK version: 3.2.0.dev66 INFO :featurebyte:SDK version: 3.2.0.dev66 11:12:41 | INFO | No catalog activated. INFO :featurebyte:No catalog activated. 11:12:41 | INFO | Using profile: staging INFO :featurebyte:Using profile: staging 11:12:41 | INFO | Using configuration file at: /Users/gxav/.featurebyte/config.yaml INFO :featurebyte:Using configuration file at: /Users/gxav/.featurebyte/config.yaml 11:12:41 | INFO | Active profile: staging (https://staging.featurebyte.com/api/v1) INFO :featurebyte:Active profile: staging (https://staging.featurebyte.com/api/v1) 11:12:41 | INFO | SDK version: 3.2.0.dev66 INFO :featurebyte:SDK version: 3.2.0.dev66 11:12:41 | INFO | No catalog activated. INFO :featurebyte:No catalog activated. 11:12:41 | INFO | Catalog activated: Grocery Dataset SDK Tutorial INFO :featurebyte.api.catalog:Catalog activated: Grocery Dataset SDK Tutorial 16:07:43 | INFO | Using configuration file at: /Users/gxav/.featurebyte/config.yaml 16:07:43 | INFO | Active profile: tutorial (https://tutorials.featurebyte.com/api/v1) 16:07:43 | WARNING | Remote SDK version (1.1.0.dev7) is different from local (1.1.0.dev1). Update local SDK to avoid unexpected behavior. 16:07:43 | INFO | No catalog activated. 16:07:43 | INFO | Catalog activated: Grocery Dataset Tutorial
In [2]:
Copied!
# Set desired windows
windows = ['14d', '28d']
# Set desired windows
windows = ['14d', '28d']
Do window aggregation from INVOICEITEMS¶
Let's start with some aggregations from the items view and create features for the interaction between Customer and Product Group.
In [3]:
Copied!
# Get view from GROCERYPRODUCT dimension table.
groceryproduct_view = catalog.get_view("GROCERYPRODUCT")
# Get view from INVOICEITEMS item table.
invoiceitems_view = catalog.get_view("INVOICEITEMS")
# Join GROCERYPRODUCT view to INVOICEITEMS view.
invoiceitems_view = invoiceitems_view.join(groceryproduct_view, rsuffix="")
# Get view from GROCERYPRODUCT dimension table.
groceryproduct_view = catalog.get_view("GROCERYPRODUCT")
# Get view from INVOICEITEMS item table.
invoiceitems_view = catalog.get_view("INVOICEITEMS")
# Join GROCERYPRODUCT view to INVOICEITEMS view.
invoiceitems_view = invoiceitems_view.join(groceryproduct_view, rsuffix="")
In [4]:
Copied!
# Group GROCERYINVOICE view by customer entity (GroceryCustomerGuid) and productgroup entity (ProductGroup).
invoiceitems_view_by_customer_x_productgroup = invoiceitems_view.groupby(
['GroceryCustomerGuid', 'ProductGroup']
)
# Group GROCERYINVOICE view by customer entity (GroceryCustomerGuid) and productgroup entity (ProductGroup).
invoiceitems_view_by_customer_x_productgroup = invoiceitems_view.groupby(
['GroceryCustomerGuid', 'ProductGroup']
)
In [5]:
Copied!
# Get Sum of TotalCost for the customer x productgroup over time.
customer_productgroup_sum_of_totalcost_14d_28d = \
invoiceitems_view_by_customer_x_productgroup.aggregate_over(
"TotalCost", method="sum",
feature_names=[
"CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost"
+ "_" + w for w in windows
],
windows=windows
)
# Get Sum of TotalCost for the customer x productgroup over time.
customer_productgroup_sum_of_totalcost_14d_28d = \
invoiceitems_view_by_customer_x_productgroup.aggregate_over(
"TotalCost", method="sum",
feature_names=[
"CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost"
+ "_" + w for w in windows
],
windows=windows
)
In [6]:
Copied!
# Get Latest Interaction between Customer and ProductGroup
customer_x_productgroup_latest_timestamp = \
invoiceitems_view_by_customer_x_productgroup.aggregate_over(
"Timestamp", method="latest",
feature_names=["CUSTOMER_x_PRODUCTGROUP_Latest_Timestamp"],
windows=[None]
)["CUSTOMER_x_PRODUCTGROUP_Latest_Timestamp"]
# Get Latest Interaction between Customer and ProductGroup
customer_x_productgroup_latest_timestamp = \
invoiceitems_view_by_customer_x_productgroup.aggregate_over(
"Timestamp", method="latest",
feature_names=["CUSTOMER_x_PRODUCTGROUP_Latest_Timestamp"],
windows=[None]
)["CUSTOMER_x_PRODUCTGROUP_Latest_Timestamp"]
In [7]:
Copied!
# Create recency feature: Time Since Latest Interaction between Customer and Product Group
customer_x_productgroup_time_since_latest_timestamp = (
fb.RequestColumn.point_in_time()
- customer_x_productgroup_latest_timestamp
).dt.hour
customer_x_productgroup_time_since_latest_timestamp.name = \
"CUSTOMER_x_PRODUCTGROUP_Time_Since_Latest_Timestamp"
# Create recency feature: Time Since Latest Interaction between Customer and Product Group
customer_x_productgroup_time_since_latest_timestamp = (
fb.RequestColumn.point_in_time()
- customer_x_productgroup_latest_timestamp
).dt.hour
customer_x_productgroup_time_since_latest_timestamp.name = \
"CUSTOMER_x_PRODUCTGROUP_Time_Since_Latest_Timestamp"
Do window aggregation from GROCERYINVOICE¶
Now, let's do some aggregations on the invoices view for the Customer entity.
In [8]:
Copied!
# Get view from GROCERYINVOICE event table.
groceryinvoice_view = catalog.get_view("GROCERYINVOICE")
# Get view from GROCERYINVOICE event table.
groceryinvoice_view = catalog.get_view("GROCERYINVOICE")
In [9]:
Copied!
# Group GROCERYINVOICE view by customer entity (GroceryCustomerGuid).
groceryinvoice_view_by_customer = groceryinvoice_view.groupby(['GroceryCustomerGuid'])
# Group GROCERYINVOICE view by customer entity (GroceryCustomerGuid).
groceryinvoice_view_by_customer = groceryinvoice_view.groupby(['GroceryCustomerGuid'])
In [10]:
Copied!
# Get Latest invoice Amount for the customer
customer_latest_invoice_amount = groceryinvoice_view_by_customer.aggregate_over(
"Amount", method="latest",
feature_names=["CUSTOMER_Latest_invoice_Amount"],
windows=[None]
)["CUSTOMER_Latest_invoice_Amount"]
# Get Latest invoice Amount for the customer
customer_latest_invoice_amount = groceryinvoice_view_by_customer.aggregate_over(
"Amount", method="latest",
feature_names=["CUSTOMER_Latest_invoice_Amount"],
windows=[None]
)["CUSTOMER_Latest_invoice_Amount"]
In [11]:
Copied!
# Get Count of invoices for the customer
customer_count_of_invoice_14d_28d = groceryinvoice_view_by_customer.aggregate_over(
value_column=None,
method="count",
feature_names=[
"CUSTOMER_Count_of_invoice"
+ "_" + w for w in windows
],
windows=windows
)
# Get Count of invoices for the customer
customer_count_of_invoice_14d_28d = groceryinvoice_view_by_customer.aggregate_over(
value_column=None,
method="count",
feature_names=[
"CUSTOMER_Count_of_invoice"
+ "_" + w for w in windows
],
windows=windows
)
In [12]:
Copied!
# Get Avg of Amount for the customer over time.
customer_avg_of_invoice_amount_14d_28d = groceryinvoice_view_by_customer.aggregate_over(
"Amount", method="avg",
feature_names=[
"CUSTOMER_Avg_of_invoice_Amount"
+ "_" + w for w in windows
],
windows=windows
)
# Get Avg of Amount for the customer over time.
customer_avg_of_invoice_amount_14d_28d = groceryinvoice_view_by_customer.aggregate_over(
"Amount", method="avg",
feature_names=[
"CUSTOMER_Avg_of_invoice_Amount"
+ "_" + w for w in windows
],
windows=windows
)
In [13]:
Copied!
# Get Std of Amount for the customer over time.
customer_std_of_invoice_amount_14d_28d = groceryinvoice_view_by_customer.aggregate_over(
"Amount", method="std",
feature_names=[
"CUSTOMER_Std_of_invoice_Amount"
+ "_" + w for w in windows
],
windows=windows
)
# Get Std of Amount for the customer over time.
customer_std_of_invoice_amount_14d_28d = groceryinvoice_view_by_customer.aggregate_over(
"Amount", method="std",
feature_names=[
"CUSTOMER_Std_of_invoice_Amount"
+ "_" + w for w in windows
],
windows=windows
)
Preview a feature group¶
For convenience, we can create a feature group to preview/save all features we just created.
In [14]:
Copied!
feature_group = fb.FeatureGroup([
customer_x_productgroup_time_since_latest_timestamp,
customer_productgroup_sum_of_totalcost_14d_28d,
customer_latest_invoice_amount,
customer_count_of_invoice_14d_28d,
customer_avg_of_invoice_amount_14d_28d,
customer_std_of_invoice_amount_14d_28d,
])
feature_group = fb.FeatureGroup([
customer_x_productgroup_time_since_latest_timestamp,
customer_productgroup_sum_of_totalcost_14d_28d,
customer_latest_invoice_amount,
customer_count_of_invoice_14d_28d,
customer_avg_of_invoice_amount_14d_28d,
customer_std_of_invoice_amount_14d_28d,
])
In [15]:
Copied!
# Check the primary entity of the feature group. It should be the interaction Customer x ProductGroup.
feature_group.primary_entity
# Check the primary entity of the feature group. It should be the interaction Customer x ProductGroup.
feature_group.primary_entity
Out[15]:
[<featurebyte.api.entity.Entity at 0x107eb5440> { 'name': 'customer', 'created_at': '2025-10-15T03:06:58.482000', 'updated_at': '2025-10-15T03:07:01.153000', 'description': None, 'serving_names': [ 'GROCERYCUSTOMERGUID' ], 'catalog_name': 'Grocery Dataset SDK Tutorial' }, <featurebyte.api.entity.Entity at 0x31ac27880> { 'name': 'productgroup', 'created_at': '2025-10-15T03:06:59.192000', 'updated_at': '2025-10-15T03:07:02.278000', 'description': None, 'serving_names': [ 'PRODUCTGROUP' ], 'catalog_name': 'Grocery Dataset SDK Tutorial' }]
In [16]:
Copied!
# Get observation table: 'Preview Table with 10 items'
preview_table = catalog.get_observation_table("Preview Table with 10 items")
# Get observation table: 'Preview Table with 10 items'
preview_table = catalog.get_observation_table("Preview Table with 10 items")
In [17]:
Copied!
# Preview feature_group
feature_group.preview(preview_table)
# Preview feature_group
feature_group.preview(preview_table)
Out[17]:
POINT_IN_TIME | GROCERYINVOICEITEMGUID | CUSTOMER_x_PRODUCTGROUP_Time_Since_Latest_Timestamp | CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost_14d | CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost_28d | CUSTOMER_Latest_invoice_Amount | CUSTOMER_Count_of_invoice_14d | CUSTOMER_Count_of_invoice_28d | CUSTOMER_Avg_of_invoice_Amount_14d | CUSTOMER_Avg_of_invoice_Amount_28d | CUSTOMER_Std_of_invoice_Amount_14d | CUSTOMER_Std_of_invoice_Amount_28d | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2023-04-07 13:41:35 | f017e72e-1645-4fbe-988e-a9c09920c506 | 261.460556 | 4.57 | 9.74 | 2.29 | 6 | 12 | 14.563333 | 11.569167 | 9.129048 | 7.539124 |
1 | 2023-01-08 15:40:02 | 879de04d-36ef-49a3-b1f7-a96495100dbe | 2974.097500 | NaN | NaN | 1.47 | 4 | 5 | 7.605000 | 7.040000 | 5.901197 | 5.397796 |
2 | 2022-12-01 13:28:57 | 600a7549-9f3e-42f9-8408-e3996d6c4750 | 385.071944 | NaN | 4.45 | 8.00 | 1 | 3 | 8.000000 | 24.470000 | 0.000000 | 13.905124 |
3 | 2022-07-14 18:12:49 | a3245668-aeba-4259-87e1-1b99a1e7391c | 1400.391944 | NaN | NaN | 3.28 | 1 | 1 | 3.280000 | 3.280000 | 0.000000 | 0.000000 |
4 | 2022-12-12 10:27:30 | 9526bbd7-3b85-4bd5-99d7-2eecda85ada2 | 73.632222 | 5.99 | 22.64 | 29.00 | 2 | 4 | 19.200000 | 35.765000 | 9.800000 | 19.740801 |
5 | 2022-10-03 12:02:23 | f25a7864-4e0d-43bb-8191-0ffdf56b5a21 | 2017.952500 | NaN | NaN | 16.21 | 3 | 5 | 15.686667 | 20.564000 | 10.151717 | 10.307305 |
6 | 2022-09-12 16:29:49 | f7de9fec-9e01-4478-8b2e-5a17427a53c1 | 217.573333 | 6.79 | 9.78 | 63.04 | 7 | 15 | 20.120000 | 23.150667 | 22.723340 | 20.312497 |
7 | 2023-03-18 09:43:40 | 7ec0da36-b85f-47cf-873e-461b5c2b1cbf | 1458.053889 | NaN | NaN | 26.01 | 8 | 10 | 21.716250 | 18.890000 | 18.080614 | 17.198734 |
8 | 2022-10-30 09:08:11 | 5ee681b9-583c-4968-88bb-993704a6c54e | 113.858889 | 7.47 | 7.47 | 51.02 | 2 | 2 | 87.385000 | 87.385000 | 36.365000 | 36.365000 |
9 | 2023-05-14 15:41:46 | e904b0bc-b342-4491-a7d9-216489b6ac01 | 768.540278 | NaN | NaN | 67.87 | 1 | 2 | 67.870000 | 57.170000 | 0.000000 | 10.700000 |
Save features into catalog¶
With feature groups we can do it in one call.
In [18]:
Copied!
feature_group.save()
feature_group.save()
Done! |████████████████████████████████████████| 100% in 9.1s (0.11%/s) Done! |████████████████████████████████████████| 100% in 6.1s (0.17%/s) Loading Feature(s) |████████████████████████████████████████| 10/10 [100%] in 0. Done! |████████████████████████████████████████| 100% in 9.1s (0.11%/s) Done! |████████████████████████████████████████| 100% in 6.1s (0.17%/s) Loading Feature(s) |████████████████████████████████████████| 10/10 [100%] in 0.
Add description¶
In [19]:
Copied!
# Add description
customer_x_productgroup_time_since_latest_timestamp.update_description(
"Time Since Latest interaction between the customer and the product group"
)
# Add description
customer_x_productgroup_time_since_latest_timestamp.update_description(
"Time Since Latest interaction between the customer and the product group"
)
In [20]:
Copied!
# Add description
customer_productgroup_sum_of_totalcost_14d = \
customer_productgroup_sum_of_totalcost_14d_28d["CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost_14d"]
customer_productgroup_sum_of_totalcost_14d.update_description(
"Total spent by the customer on the product group over a 14d period."
)
customer_productgroup_sum_of_totalcost_28d = \
customer_productgroup_sum_of_totalcost_14d_28d["CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost_28d"]
customer_productgroup_sum_of_totalcost_28d.update_description(
"Total spent by the customer on the product group over a 28d period."
)
# Add description
customer_productgroup_sum_of_totalcost_14d = \
customer_productgroup_sum_of_totalcost_14d_28d["CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost_14d"]
customer_productgroup_sum_of_totalcost_14d.update_description(
"Total spent by the customer on the product group over a 14d period."
)
customer_productgroup_sum_of_totalcost_28d = \
customer_productgroup_sum_of_totalcost_14d_28d["CUSTOMER_x_PRODUCTGROUP_Sum_of_item_TotalCost_28d"]
customer_productgroup_sum_of_totalcost_28d.update_description(
"Total spent by the customer on the product group over a 28d period."
)
In [21]:
Copied!
# Add description
customer_latest_invoice_amount.update_description("Latest invoice Amount for the customer")
# Add description
customer_latest_invoice_amount.update_description("Latest invoice Amount for the customer")
In [22]:
Copied!
# Add description
customer_count_of_invoice_14d = customer_count_of_invoice_14d_28d["CUSTOMER_Count_of_invoice_14d"]
customer_count_of_invoice_14d.update_description(
"Count of invoice for the customer over a 14d period."
)
customer_count_of_invoice_28d = customer_count_of_invoice_14d_28d["CUSTOMER_Count_of_invoice_28d"]
customer_count_of_invoice_28d.update_description(
"Count of invoice for the customer over a 28d period."
)
# Add description
customer_count_of_invoice_14d = customer_count_of_invoice_14d_28d["CUSTOMER_Count_of_invoice_14d"]
customer_count_of_invoice_14d.update_description(
"Count of invoice for the customer over a 14d period."
)
customer_count_of_invoice_28d = customer_count_of_invoice_14d_28d["CUSTOMER_Count_of_invoice_28d"]
customer_count_of_invoice_28d.update_description(
"Count of invoice for the customer over a 28d period."
)
In [23]:
Copied!
# Add description
customer_avg_of_invoice_amount_14d = customer_avg_of_invoice_amount_14d_28d["CUSTOMER_Avg_of_invoice_Amount_14d"]
customer_avg_of_invoice_amount_14d.update_description(
"Avg of invoice Amount for the customer over a 14d period."
)
customer_avg_of_invoice_amount_28d = customer_avg_of_invoice_amount_14d_28d["CUSTOMER_Avg_of_invoice_Amount_28d"]
customer_avg_of_invoice_amount_28d.update_description(
"Avg of invoice Amount for the customer over a 28d period."
)
# Add description
customer_avg_of_invoice_amount_14d = customer_avg_of_invoice_amount_14d_28d["CUSTOMER_Avg_of_invoice_Amount_14d"]
customer_avg_of_invoice_amount_14d.update_description(
"Avg of invoice Amount for the customer over a 14d period."
)
customer_avg_of_invoice_amount_28d = customer_avg_of_invoice_amount_14d_28d["CUSTOMER_Avg_of_invoice_Amount_28d"]
customer_avg_of_invoice_amount_28d.update_description(
"Avg of invoice Amount for the customer over a 28d period."
)
In [24]:
Copied!
# Add description
customer_std_of_invoice_amount_14d = customer_std_of_invoice_amount_14d_28d["CUSTOMER_Std_of_invoice_Amount_14d"]
customer_std_of_invoice_amount_14d.update_description(
"Std of invoice Amount for the customer over a 14d period."
)
customer_std_of_invoice_amount_28d = customer_std_of_invoice_amount_14d_28d["CUSTOMER_Std_of_invoice_Amount_28d"]
customer_std_of_invoice_amount_28d.update_description(
"Std of invoice Amount for the customer over a 28d period."
)
# Add description
customer_std_of_invoice_amount_14d = customer_std_of_invoice_amount_14d_28d["CUSTOMER_Std_of_invoice_Amount_14d"]
customer_std_of_invoice_amount_14d.update_description(
"Std of invoice Amount for the customer over a 14d period."
)
customer_std_of_invoice_amount_28d = customer_std_of_invoice_amount_14d_28d["CUSTOMER_Std_of_invoice_Amount_28d"]
customer_std_of_invoice_amount_28d.update_description(
"Std of invoice Amount for the customer over a 28d period."
)
In [ ]:
Copied!