Skip to content

Commit

Permalink
[FSTORE-1001] [APPEND] Clean up recsys tutorial and fix deployment (#213
Browse files Browse the repository at this point in the history
)
  • Loading branch information
davitbzh authored Nov 16, 2023
1 parent 8ddaad7 commit 3465ce2
Show file tree
Hide file tree
Showing 11 changed files with 292 additions and 473 deletions.
74 changes: 59 additions & 15 deletions advanced_tutorials/recommender-system/1_feature_engineering.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
" os.system('cd features && wget https://raw.githubusercontent.com/logicalclocks/hopsworks-tutorials/master/advanced_tutorials/recommender-system/features/articles.py')\n",
" os.system('cd features && wget https://raw.githubusercontent.com/logicalclocks/hopsworks-tutorials/master/advanced_tutorials/recommender-system/features/customers.py')\n",
" os.system('cd features && wget https://raw.githubusercontent.com/logicalclocks/hopsworks-tutorials/master/advanced_tutorials/recommender-system/features/transactions.py')\n",
" os.system('cd features && wget https://raw.githubusercontent.com/logicalclocks/hopsworks-tutorials/master/advanced_tutorials/recommender-system/features/ranking.py') \n",
" print('✅ Done!')\n",
"else:\n",
" print(\"Local environment\")"
Expand All @@ -68,16 +69,19 @@
" from features.articles import prepare_articles\n",
" from features.customers import prepare_customers\n",
" from features.transactions import prepare_transactions\n",
" from features.ranking import compute_ranking_dataset\n",
"except ImportError:\n",
" print(\"⚙️ Downloading modules...\")\n",
" os.system('mkdir -p features')\n",
" os.system('cd features && wget https://raw.githubusercontent.com/logicalclocks/hopsworks-tutorials/master/advanced_tutorials/recommender-system/features/articles.py')\n",
" os.system('cd features && wget https://raw.githubusercontent.com/logicalclocks/hopsworks-tutorials/master/advanced_tutorials/recommender-system/features/customers.py')\n",
" os.system('cd features && wget https://raw.githubusercontent.com/logicalclocks/hopsworks-tutorials/master/advanced_tutorials/recommender-system/features/transactions.py')\n",
" os.system('cd features && wget https://raw.githubusercontent.com/logicalclocks/hopsworks-tutorials/master/advanced_tutorials/recommender-system/features/ranking.py') \n",
" print('✅ Done!')\n",
" from features.articles import prepare_articles\n",
" from features.customers import prepare_customers\n",
" from features.transactions import prepare_transactions"
" from features.transactions import prepare_transactions\n",
" from features.ranking import compute_ranking_dataset "
]
},
{
Expand Down Expand Up @@ -215,7 +219,7 @@
"metadata": {},
"outputs": [],
"source": [
"trans_df = pd.read_parquet('https://repo.hops.works/dev/jdowling/transactions_train.parquet')[:15788324]\n",
"trans_df = pd.read_parquet('https://repo.hops.works/dev/jdowling/transactions_train.parquet')[:600000]\n",
"print(trans_df.shape)\n",
"trans_df.head(3)"
]
Expand Down Expand Up @@ -379,7 +383,32 @@
" online_enabled=True,\n",
" event_time=\"t_dat\",\n",
")\n",
"trans_fg.insert(trans_df)"
"trans_fg.insert(trans_df, write_options={\"wait_for_job\": True})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ranking_df = compute_ranking_dataset(trans_fg, articles_fg, customers_fg)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"rank_fg = fs.get_or_create_feature_group(\n",
" name=\"ranking\",\n",
" version=1,\n",
" description=\"Derived feature group for ranking\",\n",
" primary_key=[\"customer_id\", \"article_id\"], \n",
" parents=[articles_fg, customers_fg, trans_fg],\n",
")\n",
"rank_fg.insert(ranking_df)"
]
},
{
Expand Down Expand Up @@ -419,7 +448,11 @@
"outputs": [],
"source": [
"customers_query = customers_fg.select_all()\n",
"customers_query"
"fs.get_or_create_feature_view( \n",
" name='customers',\n",
" query=customers_query,\n",
" version=1,\n",
")"
]
},
{
Expand All @@ -428,9 +461,10 @@
"metadata": {},
"outputs": [],
"source": [
"customers_feature_view = fs.get_or_create_feature_view( \n",
" name='customers',\n",
" query=customers_query,\n",
"articles_query = articles_fg.select_all()\n",
"fs.get_or_create_feature_view(\n",
" name='articles',\n",
" query=articles_query,\n",
" version=1,\n",
")"
]
Expand All @@ -441,8 +475,9 @@
"metadata": {},
"outputs": [],
"source": [
"articles_query = articles_fg.select_all()\n",
"articles_query"
"rank_fg = fs.get_or_create_feature_group(\n",
" name=\"ranking\",\n",
" version=1)"
]
},
{
Expand All @@ -451,9 +486,11 @@
"metadata": {},
"outputs": [],
"source": [
"articles_feature_view = fs.get_or_create_feature_view(\n",
" name='articles',\n",
" query=articles_query,\n",
"ranking_query = rank_fg.select_except([\"customer_id\", \"article_id\"])\n",
"fs.get_or_create_feature_view(\n",
" name='ranking',\n",
" query=ranking_query,\n",
" labels = [\"label\"],\n",
" version=1,\n",
")"
]
Expand All @@ -466,14 +503,21 @@
"## <span style=\"color:#ff5f27\">⏩️ Next Steps </span>\n",
"In the next notebook you'll train a retrieval model."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"interpreter": {
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
},
"kernelspec": {
"display_name": "Python",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -487,9 +531,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -676,7 +676,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -690,9 +690,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
}
13 changes: 10 additions & 3 deletions advanced_tutorials/recommender-system/3_build_index.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -343,14 +343,21 @@
"\n",
"At this point we have a recommender system that is able to generate a set of candidate items for a customer. However, many of these could be poor, as the candidate model was trained with only a few subset of the features. In the next notebook, we'll create a ranking dataset to train a *ranking model* to do more fine-grained predictions."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"interpreter": {
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
},
"kernelspec": {
"display_name": "Python",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -364,9 +371,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,8 @@
"metadata": {},
"outputs": [],
"source": [
"ranking_train_fv = fs.get_feature_view(\n",
" name='ranking_train',\n",
" version=1,\n",
")\n",
"ranking_val_fv = fs.get_feature_view(\n",
" name='ranking_val',\n",
"feature_view_ranking = fs.get_feature_view(\n",
" name='ranking',\n",
" version=1,\n",
")"
]
Expand All @@ -77,13 +73,12 @@
"metadata": {},
"outputs": [],
"source": [
"X_train = ranking_train_fv.get_batch_data()\n",
"X_val = ranking_val_fv.get_batch_data()\n",
"\n",
"y_train = X_train.pop(\"label\")\n",
"y_val = X_val.pop(\"label\")\n",
"X_train, X_val, y_train, y_val = feature_view_ranking.train_test_split(\n",
" test_size=0.1,\n",
" description='Ranking training dataset',\n",
")\n",
"\n",
"X_train.head(3)"
"#X_train, X_val, y_train, y_val = feature_view_ranking.get_train_test_split(1)"
]
},
{
Expand Down Expand Up @@ -260,7 +255,7 @@
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
},
"kernelspec": {
"display_name": "Python",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -274,9 +269,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
}
Loading

0 comments on commit 3465ce2

Please sign in to comment.