From 02555dc7cdf0f06bb4790dc57d5219dec5ec82a0 Mon Sep 17 00:00:00 2001 From: Jules Damji Date: Mon, 26 Jul 2021 13:51:42 -0700 Subject: [PATCH 1/8] Added a Google Golab version of the tutorial Signed-off-by: Jules Damji --- Driver_Ranking_Tutorial.ipynb | 766 ++++++++++++++++++++++++++++++++++ 1 file changed, 766 insertions(+) create mode 100644 Driver_Ranking_Tutorial.ipynb diff --git a/Driver_Ranking_Tutorial.ipynb b/Driver_Ranking_Tutorial.ipynb new file mode 100644 index 0000000..38026f5 --- /dev/null +++ b/Driver_Ranking_Tutorial.ipynb @@ -0,0 +1,766 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Driver Ranking Tutorial", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "A7ffktm_Ty80" + }, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qa-41097T0vH" + }, + "source": [ + "### Overview\n", + "Making a prediction using a linear regression model is a common use case in ML. In this guide tutorial, we build the model that predicts if a driver will complete a trip based on a number of features ingested into Feast.\n", + "\n", + "The basic local mode gives you ability to quickly try Feast, while the advanced mode shows how you can use Feast in a production setting, in particular for the Google Cloud Platform (GCP) cloud.\n", + "\n", + "This tutorial uses Feast with scikit learn to:\n", + "\n", + "* Train a model locally using data from BigQuery\n", + "* Test the model for online inference using SQLite (for fast iteration)\n", + "* Test the model for online inference using Firestore (to represent production)\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "j1Qipu_GUYdA" + }, + "source": [ + "## Step 1: Install feast, scikit-learn\n", + "\n", + "Install feast, gcp dependencies and scikit-learn\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "gxuVxKG3Ua6z", + "outputId": "09ba5826-ef68-400c-a7a5-6a2ac5b9ebb5" + }, + "source": [ + "!pip install feast scikit-learn 'feast[gcp]'" + ], + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Collecting feast\n", + " Downloading feast-0.11.0-py3-none-any.whl (190 kB)\n", + "\u001b[?25l\r\u001b[K |█▊ | 10 kB 27.9 MB/s eta 0:00:01\r\u001b[K |███▌ | 20 kB 28.6 MB/s eta 0:00:01\r\u001b[K |█████▏ | 30 kB 12.4 MB/s eta 0:00:01\r\u001b[K |███████ | 40 kB 9.2 MB/s eta 0:00:01\r\u001b[K |████████▋ | 51 kB 4.2 MB/s eta 0:00:01\r\u001b[K |██████████▍ | 61 kB 4.5 MB/s eta 0:00:01\r\u001b[K |████████████ | 71 kB 4.6 MB/s eta 0:00:01\r\u001b[K |█████████████▉ | 81 kB 4.7 MB/s eta 0:00:01\r\u001b[K |███████████████▌ | 92 kB 4.9 MB/s eta 0:00:01\r\u001b[K |█████████████████▎ | 102 kB 5.0 MB/s eta 0:00:01\r\u001b[K |███████████████████ | 112 kB 5.0 MB/s eta 0:00:01\r\u001b[K |████████████████████▊ | 122 kB 5.0 MB/s eta 0:00:01\r\u001b[K |██████████████████████▍ | 133 kB 5.0 MB/s eta 0:00:01\r\u001b[K |████████████████████████▏ | 143 kB 5.0 MB/s eta 0:00:01\r\u001b[K |█████████████████████████▉ | 153 kB 5.0 MB/s eta 0:00:01\r\u001b[K |███████████████████████████▋ | 163 kB 5.0 MB/s eta 0:00:01\r\u001b[K |█████████████████████████████▎ | 174 kB 5.0 MB/s eta 0:00:01\r\u001b[K |███████████████████████████████ | 184 kB 5.0 MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 190 kB 5.0 MB/s \n", + "\u001b[?25hRequirement already satisfied: scikit-learn in /usr/local/lib/python3.7/dist-packages (0.22.2.post1)\n", + "Requirement already satisfied: pandas>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from feast) (1.1.5)\n", + "Requirement already satisfied: Click==7.* in /usr/local/lib/python3.7/dist-packages (from feast) (7.1.2)\n", + "Requirement already satisfied: protobuf>=3.10 in /usr/local/lib/python3.7/dist-packages (from feast) (3.17.3)\n", + "Requirement already satisfied: tabulate==0.8.* in /usr/local/lib/python3.7/dist-packages (from feast) (0.8.9)\n", + "Collecting fastavro>=1.1.0\n", + " Downloading fastavro-1.4.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.3 MB)\n", + "\u001b[K |████████████████████████████████| 2.3 MB 23.0 MB/s \n", + "\u001b[?25hCollecting tenacity>=7.*\n", + " Downloading tenacity-8.0.1-py3-none-any.whl (24 kB)\n", + "Collecting pydantic>=1.0.0\n", + " Downloading pydantic-1.8.2-cp37-cp37m-manylinux2014_x86_64.whl (10.1 MB)\n", + "\u001b[K |████████████████████████████████| 10.1 MB 26.1 MB/s \n", + "\u001b[?25hCollecting mmh3\n", + " Downloading mmh3-3.0.0-cp37-cp37m-manylinux2010_x86_64.whl (50 kB)\n", + "\u001b[K |████████████████████████████████| 50 kB 6.5 MB/s \n", + "\u001b[?25hRequirement already satisfied: toml==0.10.* in /usr/local/lib/python3.7/dist-packages (from feast) (0.10.2)\n", + "Collecting colorama>=0.3.9\n", + " Downloading colorama-0.4.4-py2.py3-none-any.whl (16 kB)\n", + "Collecting PyYAML==5.3.*\n", + " Downloading PyYAML-5.3.1.tar.gz (269 kB)\n", + "\u001b[K |████████████████████████████████| 269 kB 52.1 MB/s \n", + "\u001b[?25hRequirement already satisfied: google-api-core>=1.23.0 in /usr/local/lib/python3.7/dist-packages (from feast) (1.26.3)\n", + "Requirement already satisfied: Jinja2>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from feast) (2.11.3)\n", + "Requirement already satisfied: grpcio>=1.34.0 in /usr/local/lib/python3.7/dist-packages (from feast) (1.34.1)\n", + "Collecting googleapis-common-protos==1.52.*\n", + " Downloading googleapis_common_protos-1.52.0-py2.py3-none-any.whl (100 kB)\n", + "\u001b[K |████████████████████████████████| 100 kB 7.6 MB/s \n", + "\u001b[?25hRequirement already satisfied: jsonschema in /usr/local/lib/python3.7/dist-packages (from feast) (2.6.0)\n", + "Requirement already satisfied: tqdm==4.* in /usr/local/lib/python3.7/dist-packages (from feast) (4.41.1)\n", + "Collecting pandavro==1.5.*\n", + " Downloading pandavro-1.5.2.tar.gz (3.8 kB)\n", + "Requirement already satisfied: pyarrow>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from feast) (3.0.0)\n", + "Requirement already satisfied: numpy>=1.7.0 in /usr/local/lib/python3.7/dist-packages (from pandavro==1.5.*->feast) (1.19.5)\n", + "Requirement already satisfied: six>=1.9 in /usr/local/lib/python3.7/dist-packages (from pandavro==1.5.*->feast) (1.15.0)\n", + "Requirement already satisfied: setuptools>=40.3.0 in /usr/local/lib/python3.7/dist-packages (from google-api-core>=1.23.0->feast) (57.2.0)\n", + "Requirement already satisfied: packaging>=14.3 in /usr/local/lib/python3.7/dist-packages (from google-api-core>=1.23.0->feast) (21.0)\n", + "Requirement already satisfied: pytz in /usr/local/lib/python3.7/dist-packages (from google-api-core>=1.23.0->feast) (2018.9)\n", + "Requirement already satisfied: requests<3.0.0dev,>=2.18.0 in /usr/local/lib/python3.7/dist-packages (from google-api-core>=1.23.0->feast) (2.23.0)\n", + "Requirement already satisfied: google-auth<2.0dev,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from google-api-core>=1.23.0->feast) (1.32.1)\n", + "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from google-auth<2.0dev,>=1.21.1->google-api-core>=1.23.0->feast) (4.2.2)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.7/dist-packages (from google-auth<2.0dev,>=1.21.1->google-api-core>=1.23.0->feast) (0.2.8)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.7/dist-packages (from google-auth<2.0dev,>=1.21.1->google-api-core>=1.23.0->feast) (4.7.2)\n", + "Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.7/dist-packages (from Jinja2>=2.0.0->feast) (2.0.1)\n", + "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=14.3->google-api-core>=1.23.0->feast) (2.4.7)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=1.0.0->feast) (2.8.1)\n", + "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.7/dist-packages (from pyasn1-modules>=0.2.1->google-auth<2.0dev,>=1.21.1->google-api-core>=1.23.0->feast) (0.4.8)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from pydantic>=1.0.0->feast) (3.7.4.3)\n", + "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0dev,>=2.18.0->google-api-core>=1.23.0->feast) (3.0.4)\n", + "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0dev,>=2.18.0->google-api-core>=1.23.0->feast) (1.24.3)\n", + "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0dev,>=2.18.0->google-api-core>=1.23.0->feast) (2.10)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0dev,>=2.18.0->google-api-core>=1.23.0->feast) (2021.5.30)\n", + "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn) (1.0.1)\n", + "Requirement already satisfied: scipy>=0.17.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn) (1.4.1)\n", + "Collecting google-cloud-core==1.4.*\n", + " Downloading google_cloud_core-1.4.4-py2.py3-none-any.whl (27 kB)\n", + "Collecting google-cloud-datastore>=2.1.*\n", + " Downloading google_cloud_datastore-2.1.5-py2.py3-none-any.whl (127 kB)\n", + "\u001b[K |████████████████████████████████| 127 kB 58.5 MB/s \n", + "\u001b[?25hCollecting google-cloud-storage>=1.20.*\n", + " Downloading google_cloud_storage-1.41.1-py2.py3-none-any.whl (105 kB)\n", + "\u001b[K |████████████████████████████████| 105 kB 54.2 MB/s \n", + "\u001b[?25hCollecting google-cloud-bigquery-storage>=2.0.0\n", + " Downloading google_cloud_bigquery_storage-2.6.1-py2.py3-none-any.whl (125 kB)\n", + "\u001b[K |████████████████████████████████| 125 kB 60.5 MB/s \n", + "\u001b[?25hCollecting google-cloud-bigquery>=2.0.*\n", + " Downloading google_cloud_bigquery-2.22.1-py2.py3-none-any.whl (195 kB)\n", + "\u001b[K |████████████████████████████████| 195 kB 57.9 MB/s \n", + "\u001b[?25hCollecting grpcio>=1.34.0\n", + " Downloading grpcio-1.39.0-cp37-cp37m-manylinux2014_x86_64.whl (4.3 MB)\n", + "\u001b[K |████████████████████████████████| 4.3 MB 53.3 MB/s \n", + "\u001b[?25hCollecting proto-plus>=1.10.0\n", + " Downloading proto_plus-1.19.0-py3-none-any.whl (42 kB)\n", + "\u001b[K |████████████████████████████████| 42 kB 1.3 MB/s \n", + "\u001b[?25hCollecting google-resumable-media<3.0dev,>=0.6.0\n", + " Downloading google_resumable_media-1.3.1-py2.py3-none-any.whl (75 kB)\n", + "\u001b[K |████████████████████████████████| 75 kB 4.8 MB/s \n", + "\u001b[?25hCollecting google-api-core[grpc]<3.0.0dev,>=1.29.0\n", + " Downloading google_api_core-1.31.0-py2.py3-none-any.whl (93 kB)\n", + "\u001b[K |████████████████████████████████| 93 kB 1.4 MB/s \n", + "\u001b[?25hCollecting libcst>=0.2.5\n", + " Downloading libcst-0.3.19-py3-none-any.whl (513 kB)\n", + "\u001b[K |████████████████████████████████| 513 kB 50.1 MB/s \n", + "\u001b[?25hCollecting google-cloud-storage>=1.20.*\n", + " Downloading google_cloud_storage-1.41.0-py2.py3-none-any.whl (104 kB)\n", + "\u001b[K |████████████████████████████████| 104 kB 64.4 MB/s \n", + "\u001b[?25h Downloading google_cloud_storage-1.40.0-py2.py3-none-any.whl (104 kB)\n", + "\u001b[K |████████████████████████████████| 104 kB 75.0 MB/s \n", + "\u001b[?25hCollecting google-crc32c<2.0dev,>=1.0\n", + " Downloading google_crc32c-1.1.2-cp37-cp37m-manylinux2014_x86_64.whl (38 kB)\n", + "Requirement already satisfied: cffi>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from google-crc32c<2.0dev,>=1.0->google-resumable-media<3.0dev,>=0.6.0->google-cloud-bigquery>=2.0.*->feast) (1.14.6)\n", + "Requirement already satisfied: pycparser in /usr/local/lib/python3.7/dist-packages (from cffi>=1.0.0->google-crc32c<2.0dev,>=1.0->google-resumable-media<3.0dev,>=0.6.0->google-cloud-bigquery>=2.0.*->feast) (2.20)\n", + "Collecting typing-inspect>=0.4.0\n", + " Downloading typing_inspect-0.7.1-py3-none-any.whl (8.4 kB)\n", + "Collecting mypy-extensions>=0.3.0\n", + " Downloading mypy_extensions-0.4.3-py2.py3-none-any.whl (4.5 kB)\n", + "Building wheels for collected packages: pandavro, PyYAML\n", + " Building wheel for pandavro (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for pandavro: filename=pandavro-1.5.2-py3-none-any.whl size=2953 sha256=ef61d7c0b4e22b55a5c39c933ee0a88fe71974f888751b75a8baca95690ea171\n", + " Stored in directory: /root/.cache/pip/wheels/33/3f/96/9f1b46a9f7f6043ff4741b1aa1a7b249ba33be4dc1d08843e4\n", + " Building wheel for PyYAML (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for PyYAML: filename=PyYAML-5.3.1-cp37-cp37m-linux_x86_64.whl size=44636 sha256=f01413855e330051c5c6b3f9d8bac9a478552572803aa3418838bd1d1f4cd756\n", + " Stored in directory: /root/.cache/pip/wheels/5e/03/1e/e1e954795d6f35dfc7b637fe2277bff021303bd9570ecea653\n", + "Successfully built pandavro PyYAML\n", + "Installing collected packages: mypy-extensions, googleapis-common-protos, typing-inspect, PyYAML, grpcio, google-crc32c, google-api-core, fastavro, tenacity, pydantic, proto-plus, pandavro, mmh3, libcst, google-resumable-media, google-cloud-core, colorama, google-cloud-storage, google-cloud-datastore, google-cloud-bigquery-storage, google-cloud-bigquery, feast\n", + " Attempting uninstall: googleapis-common-protos\n", + " Found existing installation: googleapis-common-protos 1.53.0\n", + " Uninstalling googleapis-common-protos-1.53.0:\n", + " Successfully uninstalled googleapis-common-protos-1.53.0\n", + " Attempting uninstall: PyYAML\n", + " Found existing installation: PyYAML 3.13\n", + " Uninstalling PyYAML-3.13:\n", + " Successfully uninstalled PyYAML-3.13\n", + " Attempting uninstall: grpcio\n", + " Found existing installation: grpcio 1.34.1\n", + " Uninstalling grpcio-1.34.1:\n", + " Successfully uninstalled grpcio-1.34.1\n", + " Attempting uninstall: google-api-core\n", + " Found existing installation: google-api-core 1.26.3\n", + " Uninstalling google-api-core-1.26.3:\n", + " Successfully uninstalled google-api-core-1.26.3\n", + " Attempting uninstall: google-resumable-media\n", + " Found existing installation: google-resumable-media 0.4.1\n", + " Uninstalling google-resumable-media-0.4.1:\n", + " Successfully uninstalled google-resumable-media-0.4.1\n", + " Attempting uninstall: google-cloud-core\n", + " Found existing installation: google-cloud-core 1.0.3\n", + " Uninstalling google-cloud-core-1.0.3:\n", + " Successfully uninstalled google-cloud-core-1.0.3\n", + " Attempting uninstall: google-cloud-storage\n", + " Found existing installation: google-cloud-storage 1.18.1\n", + " Uninstalling google-cloud-storage-1.18.1:\n", + " Successfully uninstalled google-cloud-storage-1.18.1\n", + " Attempting uninstall: google-cloud-datastore\n", + " Found existing installation: google-cloud-datastore 1.8.0\n", + " Uninstalling google-cloud-datastore-1.8.0:\n", + " Successfully uninstalled google-cloud-datastore-1.8.0\n", + " Attempting uninstall: google-cloud-bigquery-storage\n", + " Found existing installation: google-cloud-bigquery-storage 1.1.0\n", + " Uninstalling google-cloud-bigquery-storage-1.1.0:\n", + " Successfully uninstalled google-cloud-bigquery-storage-1.1.0\n", + " Attempting uninstall: google-cloud-bigquery\n", + " Found existing installation: google-cloud-bigquery 1.21.0\n", + " Uninstalling google-cloud-bigquery-1.21.0:\n", + " Successfully uninstalled google-cloud-bigquery-1.21.0\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "tensorflow 2.5.0 requires grpcio~=1.34.0, but you have grpcio 1.39.0 which is incompatible.\n", + "pandas-gbq 0.13.3 requires google-cloud-bigquery[bqstorage,pandas]<2.0.0dev,>=1.11.1, but you have google-cloud-bigquery 2.22.1 which is incompatible.\u001b[0m\n", + "Successfully installed PyYAML-5.3.1 colorama-0.4.4 fastavro-1.4.4 feast-0.11.0 google-api-core-1.31.0 google-cloud-bigquery-2.22.1 google-cloud-bigquery-storage-2.6.1 google-cloud-core-1.4.4 google-cloud-datastore-2.1.5 google-cloud-storage-1.40.0 google-crc32c-1.1.2 google-resumable-media-1.3.1 googleapis-common-protos-1.52.0 grpcio-1.39.0 libcst-0.3.19 mmh3-3.0.0 mypy-extensions-0.4.3 pandavro-1.5.2 proto-plus-1.19.0 pydantic-1.8.2 tenacity-8.0.1 typing-inspect-0.7.1\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.colab-display-data+json": { + "pip_warning": { + "packages": [ + "google" + ] + } + } + }, + "metadata": { + "tags": [] + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "P8pFSVUp34W5" + }, + "source": [ + "#### Check feast version" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "soTYiMPXcNco" + }, + "source": [ + "" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cOSAfdZiUnFa", + "outputId": "2462ce77-242b-4018-b5d6-fd0baa239836" + }, + "source": [ + "!feast version " + ], + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Feast is an open source project that collects anonymized error reporting and usage statistics. To opt out or learn more see https://docs.feast.dev/reference/usage\n", + "Feast SDK Version: \"feast 0.11.0\"\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pC4AzJ_b396l" + }, + "source": [ + "## Step 2: Clone the Git repo\n", + "\n", + "Clone the Driver Ranking Git repo into your Colab Folder" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4Qim_qbtUyGA", + "outputId": "6556ee99-aac3-468c-a9d5-8a643387712d" + }, + "source": [ + "!git clone https://github.com/feast-dev/feast-driver-ranking-tutorial.git" + ], + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Cloning into 'feast-driver-ranking-tutorial'...\n", + "remote: Enumerating objects: 34, done.\u001b[K\n", + "remote: Counting objects: 100% (34/34), done.\u001b[K\n", + "remote: Compressing objects: 100% (24/24), done.\u001b[K\n", + "remote: Total 34 (delta 13), reused 28 (delta 8), pack-reused 0\u001b[K\n", + "Unpacking objects: 100% (34/34), done.\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tLnM3IOy5C5l" + }, + "source": [ + "## Step 3: Set up your Goggle Cloud Platform (GCP) Configurations" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KiNaOTKzWIcb" + }, + "source": [ + "## Authenticate into GCP\n", + "This will allow you to do the advanced section of the tutorial, where you materialize remotely on a GCP\n", + "Feast spins up infrastructure on GCP using the credentials in our environment. Run the following cell to log into GCP:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "8Tj3MUPHWPTF" + }, + "source": [ + "from google.colab import auth\n", + "auth.authenticate_user()" + ], + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "d8yzazB-Wfqc" + }, + "source": [ + "Set configurations\n", + "Set the following configuration, which we'll be using throughout the tutorial:\n", + "\n", + "PROJECT_ID: Your project.\n", + "BUCKET_NAME: The name of a bucket which will be used to store the feature store registry and model artifacts.\n", + "BIGQUERY_DATASET_NAME: The name of a dataset which will be used to create tables containing features.\n", + "AI_PLATFORM_MODEL_NAME: The name of a model name which will be created in AI Platform." + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zV0sgfOTWeXz", + "outputId": "d324609f-83e9-444c-c581-f2ecfd103bb6" + }, + "source": [ + "PROJECT_ID= \"kf-feast\" #@param {type:\"string\"}\n", + "BUCKET_NAME= \"driver_ranking_tutorial\" #@param {type:\"string\"} custom\n", + "BIGQUERY_DATASET_NAME=\"feast_driver_ranking_tutorial\" #@param {type:\"string\"} custom\n", + "AI_PLATFORM_MODEL_NAME=\"feast_driver_rankin_jsd_model\" #@param {type:\"string\"\n", + "\n", + "! gcloud config set project $PROJECT_ID\n", + "%env GOOGLE_CLOUD_PROJECT=$PROJECT_ID\n", + "!echo project_id = $PROJECT_ID > ~/.bigqueryrc" + ], + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Updated property [core/project].\n", + "env: GOOGLE_CLOUD_PROJECT=kf-feast\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "6Cs_SYr2gOjR", + "outputId": "4b3d811b-6685-46f8-9830-b0378962bbef" + }, + "source": [ + "# Only run if your bucket doesn't already exist!\n", + "! gsutil mb gs://$BUCKET_NAME" + ], + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Creating gs://driver_ranking_tutorial/...\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ohWMCVhS5PPN" + }, + "source": [ + "## Step 4: Apply and deploy feature definitions\n", + "\n", + "`feast apply` scans python files in the current directory for feature definitions and deploys infrastructure according to `feature_store.yaml`" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "izhTk0WWX3Tx", + "outputId": "444a11d3-db43-4170-c28b-51d6ea618660" + }, + "source": [ + "%%shell\n", + "cd /content/feast-driver-ranking-tutorial/driver_ranking/\n", + "feast apply" + ], + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Registered entity \u001b[1m\u001b[32mdriver_id\u001b[0m\n", + "Registered feature view \u001b[1m\u001b[32mdriver_hourly_stats\u001b[0m\n", + "Deploying infrastructure for \u001b[1m\u001b[32mdriver_hourly_stats\u001b[0m\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 9 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lJlrf2Iu53BR" + }, + "source": [ + "### Inspect the files created under your local folder" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IrJ6gqtdmKk7", + "outputId": "db32950c-9a73-4c06-fde3-52c753929c9b" + }, + "source": [ + "%%shell\n", + "cd /content/feast-driver-ranking-tutorial/driver_ranking/data/\n", + "ls -l " + ], + "execution_count": 10, + "outputs": [ + { + "output_type": "stream", + "text": [ + "total 20\n", + "-rw-r--r-- 1 root root 16384 Jul 26 20:43 online.db\n", + "-rw-r--r-- 1 root root 310 Jul 26 20:43 registry.db\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 10 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bHBfTEau6Qt9" + }, + "source": [ + "## Step 5: Train your model" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "F-Pc4Jo4kzBL", + "outputId": "6414795f-9330-44b3-ee5c-992c9dd55db7" + }, + "source": [ + "import feast\n", + "from joblib import dump\n", + "import pandas as pd\n", + "from sklearn.linear_model import LinearRegression\n", + "\n", + "# Load driver order data\n", + "orders = pd.read_csv(\"/content/feast-driver-ranking-tutorial/driver_orders.csv\", sep=\"\\t\")\n", + "orders[\"event_timestamp\"] = pd.to_datetime(orders[\"event_timestamp\"])\n", + "\n", + "# Connect to your feature store provider\n", + "fs = feast.FeatureStore(repo_path=\"/content/feast-driver-ranking-tutorial/driver_ranking\")\n", + " \n", + "# Retrieve training data from BigQuery\n", + "training_df = fs.get_historical_features(\n", + " entity_df=orders,\n", + " feature_refs=[\n", + " \"driver_hourly_stats:conv_rate\",\n", + " \"driver_hourly_stats:acc_rate\",\n", + " \"driver_hourly_stats:avg_daily_trips\",\n", + " ],\n", + ").to_df()\n", + "\n", + "print(\"----- Feature schema -----\\n\")\n", + "print(training_df.info())\n", + "\n", + "print()\n", + "print(\"----- Example features -----\\n\")\n", + "print(training_df.head())\n", + "\n", + "# Train model\n", + "target = \"trip_completed\"\n", + "\n", + "reg = LinearRegression()\n", + "train_X = training_df[training_df.columns.drop(target).drop(\"event_timestamp\")]\n", + "train_Y = training_df.loc[:, target]\n", + "reg.fit(train_X[sorted(train_X)], train_Y)\n", + "\n", + "# Save model\n", + "dump(reg, \"driver_model.bin\")" + ], + "execution_count": 13, + "outputs": [ + { + "output_type": "stream", + "text": [ + "----- Feature schema -----\n", + "\n", + "\n", + "RangeIndex: 10 entries, 0 to 9\n", + "Data columns (total 6 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 event_timestamp 10 non-null datetime64[ns, UTC]\n", + " 1 driver_id 10 non-null int64 \n", + " 2 trip_completed 10 non-null int64 \n", + " 3 driver_hourly_stats__conv_rate 10 non-null float64 \n", + " 4 driver_hourly_stats__acc_rate 10 non-null float64 \n", + " 5 driver_hourly_stats__avg_daily_trips 10 non-null int64 \n", + "dtypes: datetime64[ns, UTC](1), float64(2), int64(3)\n", + "memory usage: 608.0 bytes\n", + "None\n", + "\n", + "----- Example features -----\n", + "\n", + " event_timestamp ... driver_hourly_stats__avg_daily_trips\n", + "0 2021-04-17 04:29:28+00:00 ... 982\n", + "1 2021-04-18 04:29:28+00:00 ... 982\n", + "2 2021-04-19 04:29:28+00:00 ... 982\n", + "3 2021-04-17 12:29:28+00:00 ... 551\n", + "4 2021-04-18 12:29:28+00:00 ... 551\n", + "\n", + "[5 rows x 6 columns]\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['driver_model.bin']" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 13 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HpHacyo47Are" + }, + "source": [ + "## Step 6: Materialize your online store\n", + "Change the provider field in `driver_ranking/feature_store.yam` from `local` to `gcp`\n", + "\n", + "Then apply and materialize data to Firestore" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "67627WRSajIk", + "outputId": "056ce886-36a5-48dc-dcbb-23e032695708" + }, + "source": [ + "!cd /content/feast-driver-ranking-tutorial/driver_ranking/ && feast materialize-incremental 2022-01-01T00:00:00" + ], + "execution_count": 14, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Materializing \u001b[1m\u001b[32m1\u001b[0m feature views to \u001b[1m\u001b[32m2022-01-01 00:00:00+00:00\u001b[0m into the \u001b[1m\u001b[32mdatastore\u001b[0m online store.\n", + "\n", + "\u001b[1m\u001b[32mdriver_hourly_stats\u001b[0m from \u001b[1m\u001b[32m2020-07-27 20:45:14+00:00\u001b[0m to \u001b[1m\u001b[32m2022-01-01 00:00:00+00:00\u001b[0m:\n", + "100%|███████████████████████████████████████████████████████████████| 10/10 [00:01<00:00, 6.16it/s]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-869cxQO2ana" + }, + "source": [ + "### Step 7: Make Prediction" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "VP85XeGFzNYl" + }, + "source": [ + "import pandas as pd\n", + "import feast\n", + "from joblib import load\n", + "\n", + "\n", + "class DriverRankingModel:\n", + " def __init__(self):\n", + " # Load model\n", + " self.model = load(\"/content/driver_model.bin\")\n", + "\n", + " # Set up feature store\n", + " self.fs = feast.FeatureStore(repo_path=\"/content/feast-driver-ranking-tutorial/driver_ranking/\")\n", + "\n", + " def predict(self, driver_ids):\n", + " # Read features from Feast\n", + " driver_features = self.fs.get_online_features(\n", + " entity_rows=[{\"driver_id\": driver_id} for driver_id in driver_ids],\n", + " feature_refs=[\n", + " \"driver_hourly_stats:conv_rate\",\n", + " \"driver_hourly_stats:acc_rate\",\n", + " \"driver_hourly_stats:avg_daily_trips\",\n", + " ],\n", + " )\n", + " df = pd.DataFrame.from_dict(driver_features.to_dict())\n", + "\n", + " # Make prediction\n", + " df[\"prediction\"] = self.model.predict(df[sorted(df)])\n", + "\n", + " # Choose best driver\n", + " best_driver_id = df[\"driver_id\"].iloc[df[\"prediction\"].argmax()]\n", + "\n", + " # return best driver\n", + " return best_driver_id" + ], + "execution_count": 19, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "f9AJ842Rk3E9" + }, + "source": [ + "def make_drivers_prediction():\n", + " drivers = [1001, 1002, 1003, 1004]\n", + " model = DriverRankingModel()\n", + " best_driver = model.predict(drivers)\n", + " print(f\"Prediction for best driver id: {best_driver}\")" + ], + "execution_count": 20, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "lq2TNXfjbb8e", + "outputId": "7c163361-491b-4eb7-87e0-6b68eccc9030" + }, + "source": [ + "make_drivers_prediction()" + ], + "execution_count": 21, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Prediction for best driver id: 1001\n" + ], + "name": "stdout" + } + ] + } + ] +} \ No newline at end of file From 6f89aaea47866d655256fa94ee5c5edce3d1e32f Mon Sep 17 00:00:00 2001 From: Jules Damji Date: Mon, 26 Jul 2021 13:54:05 -0700 Subject: [PATCH 2/8] moved notebook to diretory Signed-off-by: Jules Damji --- notebooks/Driver_Ranking_Tutorial.ipynb | 766 ++++++++++++++++++++++++ 1 file changed, 766 insertions(+) create mode 100644 notebooks/Driver_Ranking_Tutorial.ipynb diff --git a/notebooks/Driver_Ranking_Tutorial.ipynb b/notebooks/Driver_Ranking_Tutorial.ipynb new file mode 100644 index 0000000..38026f5 --- /dev/null +++ b/notebooks/Driver_Ranking_Tutorial.ipynb @@ -0,0 +1,766 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Driver Ranking Tutorial", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "A7ffktm_Ty80" + }, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qa-41097T0vH" + }, + "source": [ + "### Overview\n", + "Making a prediction using a linear regression model is a common use case in ML. In this guide tutorial, we build the model that predicts if a driver will complete a trip based on a number of features ingested into Feast.\n", + "\n", + "The basic local mode gives you ability to quickly try Feast, while the advanced mode shows how you can use Feast in a production setting, in particular for the Google Cloud Platform (GCP) cloud.\n", + "\n", + "This tutorial uses Feast with scikit learn to:\n", + "\n", + "* Train a model locally using data from BigQuery\n", + "* Test the model for online inference using SQLite (for fast iteration)\n", + "* Test the model for online inference using Firestore (to represent production)\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "j1Qipu_GUYdA" + }, + "source": [ + "## Step 1: Install feast, scikit-learn\n", + "\n", + "Install feast, gcp dependencies and scikit-learn\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "gxuVxKG3Ua6z", + "outputId": "09ba5826-ef68-400c-a7a5-6a2ac5b9ebb5" + }, + "source": [ + "!pip install feast scikit-learn 'feast[gcp]'" + ], + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Collecting feast\n", + " Downloading feast-0.11.0-py3-none-any.whl (190 kB)\n", + "\u001b[?25l\r\u001b[K |█▊ | 10 kB 27.9 MB/s eta 0:00:01\r\u001b[K |███▌ | 20 kB 28.6 MB/s eta 0:00:01\r\u001b[K |█████▏ | 30 kB 12.4 MB/s eta 0:00:01\r\u001b[K |███████ | 40 kB 9.2 MB/s eta 0:00:01\r\u001b[K |████████▋ | 51 kB 4.2 MB/s eta 0:00:01\r\u001b[K |██████████▍ | 61 kB 4.5 MB/s eta 0:00:01\r\u001b[K |████████████ | 71 kB 4.6 MB/s eta 0:00:01\r\u001b[K |█████████████▉ | 81 kB 4.7 MB/s eta 0:00:01\r\u001b[K |███████████████▌ | 92 kB 4.9 MB/s eta 0:00:01\r\u001b[K |█████████████████▎ | 102 kB 5.0 MB/s eta 0:00:01\r\u001b[K |███████████████████ | 112 kB 5.0 MB/s eta 0:00:01\r\u001b[K |████████████████████▊ | 122 kB 5.0 MB/s eta 0:00:01\r\u001b[K |██████████████████████▍ | 133 kB 5.0 MB/s eta 0:00:01\r\u001b[K |████████████████████████▏ | 143 kB 5.0 MB/s eta 0:00:01\r\u001b[K |█████████████████████████▉ | 153 kB 5.0 MB/s eta 0:00:01\r\u001b[K |███████████████████████████▋ | 163 kB 5.0 MB/s eta 0:00:01\r\u001b[K |█████████████████████████████▎ | 174 kB 5.0 MB/s eta 0:00:01\r\u001b[K |███████████████████████████████ | 184 kB 5.0 MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 190 kB 5.0 MB/s \n", + "\u001b[?25hRequirement already satisfied: scikit-learn in /usr/local/lib/python3.7/dist-packages (0.22.2.post1)\n", + "Requirement already satisfied: pandas>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from feast) (1.1.5)\n", + "Requirement already satisfied: Click==7.* in /usr/local/lib/python3.7/dist-packages (from feast) (7.1.2)\n", + "Requirement already satisfied: protobuf>=3.10 in /usr/local/lib/python3.7/dist-packages (from feast) (3.17.3)\n", + "Requirement already satisfied: tabulate==0.8.* in /usr/local/lib/python3.7/dist-packages (from feast) (0.8.9)\n", + "Collecting fastavro>=1.1.0\n", + " Downloading fastavro-1.4.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.3 MB)\n", + "\u001b[K |████████████████████████████████| 2.3 MB 23.0 MB/s \n", + "\u001b[?25hCollecting tenacity>=7.*\n", + " Downloading tenacity-8.0.1-py3-none-any.whl (24 kB)\n", + "Collecting pydantic>=1.0.0\n", + " Downloading pydantic-1.8.2-cp37-cp37m-manylinux2014_x86_64.whl (10.1 MB)\n", + "\u001b[K |████████████████████████████████| 10.1 MB 26.1 MB/s \n", + "\u001b[?25hCollecting mmh3\n", + " Downloading mmh3-3.0.0-cp37-cp37m-manylinux2010_x86_64.whl (50 kB)\n", + "\u001b[K |████████████████████████████████| 50 kB 6.5 MB/s \n", + "\u001b[?25hRequirement already satisfied: toml==0.10.* in /usr/local/lib/python3.7/dist-packages (from feast) (0.10.2)\n", + "Collecting colorama>=0.3.9\n", + " Downloading colorama-0.4.4-py2.py3-none-any.whl (16 kB)\n", + "Collecting PyYAML==5.3.*\n", + " Downloading PyYAML-5.3.1.tar.gz (269 kB)\n", + "\u001b[K |████████████████████████████████| 269 kB 52.1 MB/s \n", + "\u001b[?25hRequirement already satisfied: google-api-core>=1.23.0 in /usr/local/lib/python3.7/dist-packages (from feast) (1.26.3)\n", + "Requirement already satisfied: Jinja2>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from feast) (2.11.3)\n", + "Requirement already satisfied: grpcio>=1.34.0 in /usr/local/lib/python3.7/dist-packages (from feast) (1.34.1)\n", + "Collecting googleapis-common-protos==1.52.*\n", + " Downloading googleapis_common_protos-1.52.0-py2.py3-none-any.whl (100 kB)\n", + "\u001b[K |████████████████████████████████| 100 kB 7.6 MB/s \n", + "\u001b[?25hRequirement already satisfied: jsonschema in /usr/local/lib/python3.7/dist-packages (from feast) (2.6.0)\n", + "Requirement already satisfied: tqdm==4.* in /usr/local/lib/python3.7/dist-packages (from feast) (4.41.1)\n", + "Collecting pandavro==1.5.*\n", + " Downloading pandavro-1.5.2.tar.gz (3.8 kB)\n", + "Requirement already satisfied: pyarrow>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from feast) (3.0.0)\n", + "Requirement already satisfied: numpy>=1.7.0 in /usr/local/lib/python3.7/dist-packages (from pandavro==1.5.*->feast) (1.19.5)\n", + "Requirement already satisfied: six>=1.9 in /usr/local/lib/python3.7/dist-packages (from pandavro==1.5.*->feast) (1.15.0)\n", + "Requirement already satisfied: setuptools>=40.3.0 in /usr/local/lib/python3.7/dist-packages (from google-api-core>=1.23.0->feast) (57.2.0)\n", + "Requirement already satisfied: packaging>=14.3 in /usr/local/lib/python3.7/dist-packages (from google-api-core>=1.23.0->feast) (21.0)\n", + "Requirement already satisfied: pytz in /usr/local/lib/python3.7/dist-packages (from google-api-core>=1.23.0->feast) (2018.9)\n", + "Requirement already satisfied: requests<3.0.0dev,>=2.18.0 in /usr/local/lib/python3.7/dist-packages (from google-api-core>=1.23.0->feast) (2.23.0)\n", + "Requirement already satisfied: google-auth<2.0dev,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from google-api-core>=1.23.0->feast) (1.32.1)\n", + "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from google-auth<2.0dev,>=1.21.1->google-api-core>=1.23.0->feast) (4.2.2)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.7/dist-packages (from google-auth<2.0dev,>=1.21.1->google-api-core>=1.23.0->feast) (0.2.8)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.7/dist-packages (from google-auth<2.0dev,>=1.21.1->google-api-core>=1.23.0->feast) (4.7.2)\n", + "Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.7/dist-packages (from Jinja2>=2.0.0->feast) (2.0.1)\n", + "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=14.3->google-api-core>=1.23.0->feast) (2.4.7)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=1.0.0->feast) (2.8.1)\n", + "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.7/dist-packages (from pyasn1-modules>=0.2.1->google-auth<2.0dev,>=1.21.1->google-api-core>=1.23.0->feast) (0.4.8)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from pydantic>=1.0.0->feast) (3.7.4.3)\n", + "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0dev,>=2.18.0->google-api-core>=1.23.0->feast) (3.0.4)\n", + "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0dev,>=2.18.0->google-api-core>=1.23.0->feast) (1.24.3)\n", + "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0dev,>=2.18.0->google-api-core>=1.23.0->feast) (2.10)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0dev,>=2.18.0->google-api-core>=1.23.0->feast) (2021.5.30)\n", + "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn) (1.0.1)\n", + "Requirement already satisfied: scipy>=0.17.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn) (1.4.1)\n", + "Collecting google-cloud-core==1.4.*\n", + " Downloading google_cloud_core-1.4.4-py2.py3-none-any.whl (27 kB)\n", + "Collecting google-cloud-datastore>=2.1.*\n", + " Downloading google_cloud_datastore-2.1.5-py2.py3-none-any.whl (127 kB)\n", + "\u001b[K |████████████████████████████████| 127 kB 58.5 MB/s \n", + "\u001b[?25hCollecting google-cloud-storage>=1.20.*\n", + " Downloading google_cloud_storage-1.41.1-py2.py3-none-any.whl (105 kB)\n", + "\u001b[K |████████████████████████████████| 105 kB 54.2 MB/s \n", + "\u001b[?25hCollecting google-cloud-bigquery-storage>=2.0.0\n", + " Downloading google_cloud_bigquery_storage-2.6.1-py2.py3-none-any.whl (125 kB)\n", + "\u001b[K |████████████████████████████████| 125 kB 60.5 MB/s \n", + "\u001b[?25hCollecting google-cloud-bigquery>=2.0.*\n", + " Downloading google_cloud_bigquery-2.22.1-py2.py3-none-any.whl (195 kB)\n", + "\u001b[K |████████████████████████████████| 195 kB 57.9 MB/s \n", + "\u001b[?25hCollecting grpcio>=1.34.0\n", + " Downloading grpcio-1.39.0-cp37-cp37m-manylinux2014_x86_64.whl (4.3 MB)\n", + "\u001b[K |████████████████████████████████| 4.3 MB 53.3 MB/s \n", + "\u001b[?25hCollecting proto-plus>=1.10.0\n", + " Downloading proto_plus-1.19.0-py3-none-any.whl (42 kB)\n", + "\u001b[K |████████████████████████████████| 42 kB 1.3 MB/s \n", + "\u001b[?25hCollecting google-resumable-media<3.0dev,>=0.6.0\n", + " Downloading google_resumable_media-1.3.1-py2.py3-none-any.whl (75 kB)\n", + "\u001b[K |████████████████████████████████| 75 kB 4.8 MB/s \n", + "\u001b[?25hCollecting google-api-core[grpc]<3.0.0dev,>=1.29.0\n", + " Downloading google_api_core-1.31.0-py2.py3-none-any.whl (93 kB)\n", + "\u001b[K |████████████████████████████████| 93 kB 1.4 MB/s \n", + "\u001b[?25hCollecting libcst>=0.2.5\n", + " Downloading libcst-0.3.19-py3-none-any.whl (513 kB)\n", + "\u001b[K |████████████████████████████████| 513 kB 50.1 MB/s \n", + "\u001b[?25hCollecting google-cloud-storage>=1.20.*\n", + " Downloading google_cloud_storage-1.41.0-py2.py3-none-any.whl (104 kB)\n", + "\u001b[K |████████████████████████████████| 104 kB 64.4 MB/s \n", + "\u001b[?25h Downloading google_cloud_storage-1.40.0-py2.py3-none-any.whl (104 kB)\n", + "\u001b[K |████████████████████████████████| 104 kB 75.0 MB/s \n", + "\u001b[?25hCollecting google-crc32c<2.0dev,>=1.0\n", + " Downloading google_crc32c-1.1.2-cp37-cp37m-manylinux2014_x86_64.whl (38 kB)\n", + "Requirement already satisfied: cffi>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from google-crc32c<2.0dev,>=1.0->google-resumable-media<3.0dev,>=0.6.0->google-cloud-bigquery>=2.0.*->feast) (1.14.6)\n", + "Requirement already satisfied: pycparser in /usr/local/lib/python3.7/dist-packages (from cffi>=1.0.0->google-crc32c<2.0dev,>=1.0->google-resumable-media<3.0dev,>=0.6.0->google-cloud-bigquery>=2.0.*->feast) (2.20)\n", + "Collecting typing-inspect>=0.4.0\n", + " Downloading typing_inspect-0.7.1-py3-none-any.whl (8.4 kB)\n", + "Collecting mypy-extensions>=0.3.0\n", + " Downloading mypy_extensions-0.4.3-py2.py3-none-any.whl (4.5 kB)\n", + "Building wheels for collected packages: pandavro, PyYAML\n", + " Building wheel for pandavro (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for pandavro: filename=pandavro-1.5.2-py3-none-any.whl size=2953 sha256=ef61d7c0b4e22b55a5c39c933ee0a88fe71974f888751b75a8baca95690ea171\n", + " Stored in directory: /root/.cache/pip/wheels/33/3f/96/9f1b46a9f7f6043ff4741b1aa1a7b249ba33be4dc1d08843e4\n", + " Building wheel for PyYAML (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for PyYAML: filename=PyYAML-5.3.1-cp37-cp37m-linux_x86_64.whl size=44636 sha256=f01413855e330051c5c6b3f9d8bac9a478552572803aa3418838bd1d1f4cd756\n", + " Stored in directory: /root/.cache/pip/wheels/5e/03/1e/e1e954795d6f35dfc7b637fe2277bff021303bd9570ecea653\n", + "Successfully built pandavro PyYAML\n", + "Installing collected packages: mypy-extensions, googleapis-common-protos, typing-inspect, PyYAML, grpcio, google-crc32c, google-api-core, fastavro, tenacity, pydantic, proto-plus, pandavro, mmh3, libcst, google-resumable-media, google-cloud-core, colorama, google-cloud-storage, google-cloud-datastore, google-cloud-bigquery-storage, google-cloud-bigquery, feast\n", + " Attempting uninstall: googleapis-common-protos\n", + " Found existing installation: googleapis-common-protos 1.53.0\n", + " Uninstalling googleapis-common-protos-1.53.0:\n", + " Successfully uninstalled googleapis-common-protos-1.53.0\n", + " Attempting uninstall: PyYAML\n", + " Found existing installation: PyYAML 3.13\n", + " Uninstalling PyYAML-3.13:\n", + " Successfully uninstalled PyYAML-3.13\n", + " Attempting uninstall: grpcio\n", + " Found existing installation: grpcio 1.34.1\n", + " Uninstalling grpcio-1.34.1:\n", + " Successfully uninstalled grpcio-1.34.1\n", + " Attempting uninstall: google-api-core\n", + " Found existing installation: google-api-core 1.26.3\n", + " Uninstalling google-api-core-1.26.3:\n", + " Successfully uninstalled google-api-core-1.26.3\n", + " Attempting uninstall: google-resumable-media\n", + " Found existing installation: google-resumable-media 0.4.1\n", + " Uninstalling google-resumable-media-0.4.1:\n", + " Successfully uninstalled google-resumable-media-0.4.1\n", + " Attempting uninstall: google-cloud-core\n", + " Found existing installation: google-cloud-core 1.0.3\n", + " Uninstalling google-cloud-core-1.0.3:\n", + " Successfully uninstalled google-cloud-core-1.0.3\n", + " Attempting uninstall: google-cloud-storage\n", + " Found existing installation: google-cloud-storage 1.18.1\n", + " Uninstalling google-cloud-storage-1.18.1:\n", + " Successfully uninstalled google-cloud-storage-1.18.1\n", + " Attempting uninstall: google-cloud-datastore\n", + " Found existing installation: google-cloud-datastore 1.8.0\n", + " Uninstalling google-cloud-datastore-1.8.0:\n", + " Successfully uninstalled google-cloud-datastore-1.8.0\n", + " Attempting uninstall: google-cloud-bigquery-storage\n", + " Found existing installation: google-cloud-bigquery-storage 1.1.0\n", + " Uninstalling google-cloud-bigquery-storage-1.1.0:\n", + " Successfully uninstalled google-cloud-bigquery-storage-1.1.0\n", + " Attempting uninstall: google-cloud-bigquery\n", + " Found existing installation: google-cloud-bigquery 1.21.0\n", + " Uninstalling google-cloud-bigquery-1.21.0:\n", + " Successfully uninstalled google-cloud-bigquery-1.21.0\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "tensorflow 2.5.0 requires grpcio~=1.34.0, but you have grpcio 1.39.0 which is incompatible.\n", + "pandas-gbq 0.13.3 requires google-cloud-bigquery[bqstorage,pandas]<2.0.0dev,>=1.11.1, but you have google-cloud-bigquery 2.22.1 which is incompatible.\u001b[0m\n", + "Successfully installed PyYAML-5.3.1 colorama-0.4.4 fastavro-1.4.4 feast-0.11.0 google-api-core-1.31.0 google-cloud-bigquery-2.22.1 google-cloud-bigquery-storage-2.6.1 google-cloud-core-1.4.4 google-cloud-datastore-2.1.5 google-cloud-storage-1.40.0 google-crc32c-1.1.2 google-resumable-media-1.3.1 googleapis-common-protos-1.52.0 grpcio-1.39.0 libcst-0.3.19 mmh3-3.0.0 mypy-extensions-0.4.3 pandavro-1.5.2 proto-plus-1.19.0 pydantic-1.8.2 tenacity-8.0.1 typing-inspect-0.7.1\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "application/vnd.colab-display-data+json": { + "pip_warning": { + "packages": [ + "google" + ] + } + } + }, + "metadata": { + "tags": [] + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "P8pFSVUp34W5" + }, + "source": [ + "#### Check feast version" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "soTYiMPXcNco" + }, + "source": [ + "" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cOSAfdZiUnFa", + "outputId": "2462ce77-242b-4018-b5d6-fd0baa239836" + }, + "source": [ + "!feast version " + ], + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Feast is an open source project that collects anonymized error reporting and usage statistics. To opt out or learn more see https://docs.feast.dev/reference/usage\n", + "Feast SDK Version: \"feast 0.11.0\"\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pC4AzJ_b396l" + }, + "source": [ + "## Step 2: Clone the Git repo\n", + "\n", + "Clone the Driver Ranking Git repo into your Colab Folder" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4Qim_qbtUyGA", + "outputId": "6556ee99-aac3-468c-a9d5-8a643387712d" + }, + "source": [ + "!git clone https://github.com/feast-dev/feast-driver-ranking-tutorial.git" + ], + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Cloning into 'feast-driver-ranking-tutorial'...\n", + "remote: Enumerating objects: 34, done.\u001b[K\n", + "remote: Counting objects: 100% (34/34), done.\u001b[K\n", + "remote: Compressing objects: 100% (24/24), done.\u001b[K\n", + "remote: Total 34 (delta 13), reused 28 (delta 8), pack-reused 0\u001b[K\n", + "Unpacking objects: 100% (34/34), done.\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tLnM3IOy5C5l" + }, + "source": [ + "## Step 3: Set up your Goggle Cloud Platform (GCP) Configurations" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KiNaOTKzWIcb" + }, + "source": [ + "## Authenticate into GCP\n", + "This will allow you to do the advanced section of the tutorial, where you materialize remotely on a GCP\n", + "Feast spins up infrastructure on GCP using the credentials in our environment. Run the following cell to log into GCP:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "8Tj3MUPHWPTF" + }, + "source": [ + "from google.colab import auth\n", + "auth.authenticate_user()" + ], + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "d8yzazB-Wfqc" + }, + "source": [ + "Set configurations\n", + "Set the following configuration, which we'll be using throughout the tutorial:\n", + "\n", + "PROJECT_ID: Your project.\n", + "BUCKET_NAME: The name of a bucket which will be used to store the feature store registry and model artifacts.\n", + "BIGQUERY_DATASET_NAME: The name of a dataset which will be used to create tables containing features.\n", + "AI_PLATFORM_MODEL_NAME: The name of a model name which will be created in AI Platform." + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zV0sgfOTWeXz", + "outputId": "d324609f-83e9-444c-c581-f2ecfd103bb6" + }, + "source": [ + "PROJECT_ID= \"kf-feast\" #@param {type:\"string\"}\n", + "BUCKET_NAME= \"driver_ranking_tutorial\" #@param {type:\"string\"} custom\n", + "BIGQUERY_DATASET_NAME=\"feast_driver_ranking_tutorial\" #@param {type:\"string\"} custom\n", + "AI_PLATFORM_MODEL_NAME=\"feast_driver_rankin_jsd_model\" #@param {type:\"string\"\n", + "\n", + "! gcloud config set project $PROJECT_ID\n", + "%env GOOGLE_CLOUD_PROJECT=$PROJECT_ID\n", + "!echo project_id = $PROJECT_ID > ~/.bigqueryrc" + ], + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Updated property [core/project].\n", + "env: GOOGLE_CLOUD_PROJECT=kf-feast\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "6Cs_SYr2gOjR", + "outputId": "4b3d811b-6685-46f8-9830-b0378962bbef" + }, + "source": [ + "# Only run if your bucket doesn't already exist!\n", + "! gsutil mb gs://$BUCKET_NAME" + ], + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Creating gs://driver_ranking_tutorial/...\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ohWMCVhS5PPN" + }, + "source": [ + "## Step 4: Apply and deploy feature definitions\n", + "\n", + "`feast apply` scans python files in the current directory for feature definitions and deploys infrastructure according to `feature_store.yaml`" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "izhTk0WWX3Tx", + "outputId": "444a11d3-db43-4170-c28b-51d6ea618660" + }, + "source": [ + "%%shell\n", + "cd /content/feast-driver-ranking-tutorial/driver_ranking/\n", + "feast apply" + ], + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Registered entity \u001b[1m\u001b[32mdriver_id\u001b[0m\n", + "Registered feature view \u001b[1m\u001b[32mdriver_hourly_stats\u001b[0m\n", + "Deploying infrastructure for \u001b[1m\u001b[32mdriver_hourly_stats\u001b[0m\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 9 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lJlrf2Iu53BR" + }, + "source": [ + "### Inspect the files created under your local folder" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IrJ6gqtdmKk7", + "outputId": "db32950c-9a73-4c06-fde3-52c753929c9b" + }, + "source": [ + "%%shell\n", + "cd /content/feast-driver-ranking-tutorial/driver_ranking/data/\n", + "ls -l " + ], + "execution_count": 10, + "outputs": [ + { + "output_type": "stream", + "text": [ + "total 20\n", + "-rw-r--r-- 1 root root 16384 Jul 26 20:43 online.db\n", + "-rw-r--r-- 1 root root 310 Jul 26 20:43 registry.db\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 10 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bHBfTEau6Qt9" + }, + "source": [ + "## Step 5: Train your model" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "F-Pc4Jo4kzBL", + "outputId": "6414795f-9330-44b3-ee5c-992c9dd55db7" + }, + "source": [ + "import feast\n", + "from joblib import dump\n", + "import pandas as pd\n", + "from sklearn.linear_model import LinearRegression\n", + "\n", + "# Load driver order data\n", + "orders = pd.read_csv(\"/content/feast-driver-ranking-tutorial/driver_orders.csv\", sep=\"\\t\")\n", + "orders[\"event_timestamp\"] = pd.to_datetime(orders[\"event_timestamp\"])\n", + "\n", + "# Connect to your feature store provider\n", + "fs = feast.FeatureStore(repo_path=\"/content/feast-driver-ranking-tutorial/driver_ranking\")\n", + " \n", + "# Retrieve training data from BigQuery\n", + "training_df = fs.get_historical_features(\n", + " entity_df=orders,\n", + " feature_refs=[\n", + " \"driver_hourly_stats:conv_rate\",\n", + " \"driver_hourly_stats:acc_rate\",\n", + " \"driver_hourly_stats:avg_daily_trips\",\n", + " ],\n", + ").to_df()\n", + "\n", + "print(\"----- Feature schema -----\\n\")\n", + "print(training_df.info())\n", + "\n", + "print()\n", + "print(\"----- Example features -----\\n\")\n", + "print(training_df.head())\n", + "\n", + "# Train model\n", + "target = \"trip_completed\"\n", + "\n", + "reg = LinearRegression()\n", + "train_X = training_df[training_df.columns.drop(target).drop(\"event_timestamp\")]\n", + "train_Y = training_df.loc[:, target]\n", + "reg.fit(train_X[sorted(train_X)], train_Y)\n", + "\n", + "# Save model\n", + "dump(reg, \"driver_model.bin\")" + ], + "execution_count": 13, + "outputs": [ + { + "output_type": "stream", + "text": [ + "----- Feature schema -----\n", + "\n", + "\n", + "RangeIndex: 10 entries, 0 to 9\n", + "Data columns (total 6 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 event_timestamp 10 non-null datetime64[ns, UTC]\n", + " 1 driver_id 10 non-null int64 \n", + " 2 trip_completed 10 non-null int64 \n", + " 3 driver_hourly_stats__conv_rate 10 non-null float64 \n", + " 4 driver_hourly_stats__acc_rate 10 non-null float64 \n", + " 5 driver_hourly_stats__avg_daily_trips 10 non-null int64 \n", + "dtypes: datetime64[ns, UTC](1), float64(2), int64(3)\n", + "memory usage: 608.0 bytes\n", + "None\n", + "\n", + "----- Example features -----\n", + "\n", + " event_timestamp ... driver_hourly_stats__avg_daily_trips\n", + "0 2021-04-17 04:29:28+00:00 ... 982\n", + "1 2021-04-18 04:29:28+00:00 ... 982\n", + "2 2021-04-19 04:29:28+00:00 ... 982\n", + "3 2021-04-17 12:29:28+00:00 ... 551\n", + "4 2021-04-18 12:29:28+00:00 ... 551\n", + "\n", + "[5 rows x 6 columns]\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['driver_model.bin']" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 13 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HpHacyo47Are" + }, + "source": [ + "## Step 6: Materialize your online store\n", + "Change the provider field in `driver_ranking/feature_store.yam` from `local` to `gcp`\n", + "\n", + "Then apply and materialize data to Firestore" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "67627WRSajIk", + "outputId": "056ce886-36a5-48dc-dcbb-23e032695708" + }, + "source": [ + "!cd /content/feast-driver-ranking-tutorial/driver_ranking/ && feast materialize-incremental 2022-01-01T00:00:00" + ], + "execution_count": 14, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Materializing \u001b[1m\u001b[32m1\u001b[0m feature views to \u001b[1m\u001b[32m2022-01-01 00:00:00+00:00\u001b[0m into the \u001b[1m\u001b[32mdatastore\u001b[0m online store.\n", + "\n", + "\u001b[1m\u001b[32mdriver_hourly_stats\u001b[0m from \u001b[1m\u001b[32m2020-07-27 20:45:14+00:00\u001b[0m to \u001b[1m\u001b[32m2022-01-01 00:00:00+00:00\u001b[0m:\n", + "100%|███████████████████████████████████████████████████████████████| 10/10 [00:01<00:00, 6.16it/s]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-869cxQO2ana" + }, + "source": [ + "### Step 7: Make Prediction" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "VP85XeGFzNYl" + }, + "source": [ + "import pandas as pd\n", + "import feast\n", + "from joblib import load\n", + "\n", + "\n", + "class DriverRankingModel:\n", + " def __init__(self):\n", + " # Load model\n", + " self.model = load(\"/content/driver_model.bin\")\n", + "\n", + " # Set up feature store\n", + " self.fs = feast.FeatureStore(repo_path=\"/content/feast-driver-ranking-tutorial/driver_ranking/\")\n", + "\n", + " def predict(self, driver_ids):\n", + " # Read features from Feast\n", + " driver_features = self.fs.get_online_features(\n", + " entity_rows=[{\"driver_id\": driver_id} for driver_id in driver_ids],\n", + " feature_refs=[\n", + " \"driver_hourly_stats:conv_rate\",\n", + " \"driver_hourly_stats:acc_rate\",\n", + " \"driver_hourly_stats:avg_daily_trips\",\n", + " ],\n", + " )\n", + " df = pd.DataFrame.from_dict(driver_features.to_dict())\n", + "\n", + " # Make prediction\n", + " df[\"prediction\"] = self.model.predict(df[sorted(df)])\n", + "\n", + " # Choose best driver\n", + " best_driver_id = df[\"driver_id\"].iloc[df[\"prediction\"].argmax()]\n", + "\n", + " # return best driver\n", + " return best_driver_id" + ], + "execution_count": 19, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "f9AJ842Rk3E9" + }, + "source": [ + "def make_drivers_prediction():\n", + " drivers = [1001, 1002, 1003, 1004]\n", + " model = DriverRankingModel()\n", + " best_driver = model.predict(drivers)\n", + " print(f\"Prediction for best driver id: {best_driver}\")" + ], + "execution_count": 20, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "lq2TNXfjbb8e", + "outputId": "7c163361-491b-4eb7-87e0-6b68eccc9030" + }, + "source": [ + "make_drivers_prediction()" + ], + "execution_count": 21, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Prediction for best driver id: 1001\n" + ], + "name": "stdout" + } + ] + } + ] +} \ No newline at end of file From 4cba6fec93dc305092aad191a5202759dceba44c Mon Sep 17 00:00:00 2001 From: Jules Damji Date: Mon, 26 Jul 2021 13:57:12 -0700 Subject: [PATCH 3/8] deleted the notebook from top-level directoy Signed-off-by: Jules Damji --- Driver_Ranking_Tutorial.ipynb | 766 ---------------------------------- 1 file changed, 766 deletions(-) delete mode 100644 Driver_Ranking_Tutorial.ipynb diff --git a/Driver_Ranking_Tutorial.ipynb b/Driver_Ranking_Tutorial.ipynb deleted file mode 100644 index 38026f5..0000000 --- a/Driver_Ranking_Tutorial.ipynb +++ /dev/null @@ -1,766 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "Driver Ranking Tutorial", - "provenance": [], - "collapsed_sections": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "A7ffktm_Ty80" - }, - "source": [ - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qa-41097T0vH" - }, - "source": [ - "### Overview\n", - "Making a prediction using a linear regression model is a common use case in ML. In this guide tutorial, we build the model that predicts if a driver will complete a trip based on a number of features ingested into Feast.\n", - "\n", - "The basic local mode gives you ability to quickly try Feast, while the advanced mode shows how you can use Feast in a production setting, in particular for the Google Cloud Platform (GCP) cloud.\n", - "\n", - "This tutorial uses Feast with scikit learn to:\n", - "\n", - "* Train a model locally using data from BigQuery\n", - "* Test the model for online inference using SQLite (for fast iteration)\n", - "* Test the model for online inference using Firestore (to represent production)\n", - " " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "j1Qipu_GUYdA" - }, - "source": [ - "## Step 1: Install feast, scikit-learn\n", - "\n", - "Install feast, gcp dependencies and scikit-learn\n" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "gxuVxKG3Ua6z", - "outputId": "09ba5826-ef68-400c-a7a5-6a2ac5b9ebb5" - }, - "source": [ - "!pip install feast scikit-learn 'feast[gcp]'" - ], - "execution_count": 1, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Collecting feast\n", - " Downloading feast-0.11.0-py3-none-any.whl (190 kB)\n", - "\u001b[?25l\r\u001b[K |█▊ | 10 kB 27.9 MB/s eta 0:00:01\r\u001b[K |███▌ | 20 kB 28.6 MB/s eta 0:00:01\r\u001b[K |█████▏ | 30 kB 12.4 MB/s eta 0:00:01\r\u001b[K |███████ | 40 kB 9.2 MB/s eta 0:00:01\r\u001b[K |████████▋ | 51 kB 4.2 MB/s eta 0:00:01\r\u001b[K |██████████▍ | 61 kB 4.5 MB/s eta 0:00:01\r\u001b[K |████████████ | 71 kB 4.6 MB/s eta 0:00:01\r\u001b[K |█████████████▉ | 81 kB 4.7 MB/s eta 0:00:01\r\u001b[K |███████████████▌ | 92 kB 4.9 MB/s eta 0:00:01\r\u001b[K |█████████████████▎ | 102 kB 5.0 MB/s eta 0:00:01\r\u001b[K |███████████████████ | 112 kB 5.0 MB/s eta 0:00:01\r\u001b[K |████████████████████▊ | 122 kB 5.0 MB/s eta 0:00:01\r\u001b[K |██████████████████████▍ | 133 kB 5.0 MB/s eta 0:00:01\r\u001b[K |████████████████████████▏ | 143 kB 5.0 MB/s eta 0:00:01\r\u001b[K |█████████████████████████▉ | 153 kB 5.0 MB/s eta 0:00:01\r\u001b[K |███████████████████████████▋ | 163 kB 5.0 MB/s eta 0:00:01\r\u001b[K |█████████████████████████████▎ | 174 kB 5.0 MB/s eta 0:00:01\r\u001b[K |███████████████████████████████ | 184 kB 5.0 MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 190 kB 5.0 MB/s \n", - "\u001b[?25hRequirement already satisfied: scikit-learn in /usr/local/lib/python3.7/dist-packages (0.22.2.post1)\n", - "Requirement already satisfied: pandas>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from feast) (1.1.5)\n", - "Requirement already satisfied: Click==7.* in /usr/local/lib/python3.7/dist-packages (from feast) (7.1.2)\n", - "Requirement already satisfied: protobuf>=3.10 in /usr/local/lib/python3.7/dist-packages (from feast) (3.17.3)\n", - "Requirement already satisfied: tabulate==0.8.* in /usr/local/lib/python3.7/dist-packages (from feast) (0.8.9)\n", - "Collecting fastavro>=1.1.0\n", - " Downloading fastavro-1.4.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.3 MB)\n", - "\u001b[K |████████████████████████████████| 2.3 MB 23.0 MB/s \n", - "\u001b[?25hCollecting tenacity>=7.*\n", - " Downloading tenacity-8.0.1-py3-none-any.whl (24 kB)\n", - "Collecting pydantic>=1.0.0\n", - " Downloading pydantic-1.8.2-cp37-cp37m-manylinux2014_x86_64.whl (10.1 MB)\n", - "\u001b[K |████████████████████████████████| 10.1 MB 26.1 MB/s \n", - "\u001b[?25hCollecting mmh3\n", - " Downloading mmh3-3.0.0-cp37-cp37m-manylinux2010_x86_64.whl (50 kB)\n", - "\u001b[K |████████████████████████████████| 50 kB 6.5 MB/s \n", - "\u001b[?25hRequirement already satisfied: toml==0.10.* in /usr/local/lib/python3.7/dist-packages (from feast) (0.10.2)\n", - "Collecting colorama>=0.3.9\n", - " Downloading colorama-0.4.4-py2.py3-none-any.whl (16 kB)\n", - "Collecting PyYAML==5.3.*\n", - " Downloading PyYAML-5.3.1.tar.gz (269 kB)\n", - "\u001b[K |████████████████████████████████| 269 kB 52.1 MB/s \n", - "\u001b[?25hRequirement already satisfied: google-api-core>=1.23.0 in /usr/local/lib/python3.7/dist-packages (from feast) (1.26.3)\n", - "Requirement already satisfied: Jinja2>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from feast) (2.11.3)\n", - "Requirement already satisfied: grpcio>=1.34.0 in /usr/local/lib/python3.7/dist-packages (from feast) (1.34.1)\n", - "Collecting googleapis-common-protos==1.52.*\n", - " Downloading googleapis_common_protos-1.52.0-py2.py3-none-any.whl (100 kB)\n", - "\u001b[K |████████████████████████████████| 100 kB 7.6 MB/s \n", - "\u001b[?25hRequirement already satisfied: jsonschema in /usr/local/lib/python3.7/dist-packages (from feast) (2.6.0)\n", - "Requirement already satisfied: tqdm==4.* in /usr/local/lib/python3.7/dist-packages (from feast) (4.41.1)\n", - "Collecting pandavro==1.5.*\n", - " Downloading pandavro-1.5.2.tar.gz (3.8 kB)\n", - "Requirement already satisfied: pyarrow>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from feast) (3.0.0)\n", - "Requirement already satisfied: numpy>=1.7.0 in /usr/local/lib/python3.7/dist-packages (from pandavro==1.5.*->feast) (1.19.5)\n", - "Requirement already satisfied: six>=1.9 in /usr/local/lib/python3.7/dist-packages (from pandavro==1.5.*->feast) (1.15.0)\n", - "Requirement already satisfied: setuptools>=40.3.0 in /usr/local/lib/python3.7/dist-packages (from google-api-core>=1.23.0->feast) (57.2.0)\n", - "Requirement already satisfied: packaging>=14.3 in /usr/local/lib/python3.7/dist-packages (from google-api-core>=1.23.0->feast) (21.0)\n", - "Requirement already satisfied: pytz in /usr/local/lib/python3.7/dist-packages (from google-api-core>=1.23.0->feast) (2018.9)\n", - "Requirement already satisfied: requests<3.0.0dev,>=2.18.0 in /usr/local/lib/python3.7/dist-packages (from google-api-core>=1.23.0->feast) (2.23.0)\n", - "Requirement already satisfied: google-auth<2.0dev,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from google-api-core>=1.23.0->feast) (1.32.1)\n", - "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from google-auth<2.0dev,>=1.21.1->google-api-core>=1.23.0->feast) (4.2.2)\n", - "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.7/dist-packages (from google-auth<2.0dev,>=1.21.1->google-api-core>=1.23.0->feast) (0.2.8)\n", - "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.7/dist-packages (from google-auth<2.0dev,>=1.21.1->google-api-core>=1.23.0->feast) (4.7.2)\n", - "Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.7/dist-packages (from Jinja2>=2.0.0->feast) (2.0.1)\n", - "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=14.3->google-api-core>=1.23.0->feast) (2.4.7)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=1.0.0->feast) (2.8.1)\n", - "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.7/dist-packages (from pyasn1-modules>=0.2.1->google-auth<2.0dev,>=1.21.1->google-api-core>=1.23.0->feast) (0.4.8)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from pydantic>=1.0.0->feast) (3.7.4.3)\n", - "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0dev,>=2.18.0->google-api-core>=1.23.0->feast) (3.0.4)\n", - "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0dev,>=2.18.0->google-api-core>=1.23.0->feast) (1.24.3)\n", - "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0dev,>=2.18.0->google-api-core>=1.23.0->feast) (2.10)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3.0.0dev,>=2.18.0->google-api-core>=1.23.0->feast) (2021.5.30)\n", - "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn) (1.0.1)\n", - "Requirement already satisfied: scipy>=0.17.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn) (1.4.1)\n", - "Collecting google-cloud-core==1.4.*\n", - " Downloading google_cloud_core-1.4.4-py2.py3-none-any.whl (27 kB)\n", - "Collecting google-cloud-datastore>=2.1.*\n", - " Downloading google_cloud_datastore-2.1.5-py2.py3-none-any.whl (127 kB)\n", - "\u001b[K |████████████████████████████████| 127 kB 58.5 MB/s \n", - "\u001b[?25hCollecting google-cloud-storage>=1.20.*\n", - " Downloading google_cloud_storage-1.41.1-py2.py3-none-any.whl (105 kB)\n", - "\u001b[K |████████████████████████████████| 105 kB 54.2 MB/s \n", - "\u001b[?25hCollecting google-cloud-bigquery-storage>=2.0.0\n", - " Downloading google_cloud_bigquery_storage-2.6.1-py2.py3-none-any.whl (125 kB)\n", - "\u001b[K |████████████████████████████████| 125 kB 60.5 MB/s \n", - "\u001b[?25hCollecting google-cloud-bigquery>=2.0.*\n", - " Downloading google_cloud_bigquery-2.22.1-py2.py3-none-any.whl (195 kB)\n", - "\u001b[K |████████████████████████████████| 195 kB 57.9 MB/s \n", - "\u001b[?25hCollecting grpcio>=1.34.0\n", - " Downloading grpcio-1.39.0-cp37-cp37m-manylinux2014_x86_64.whl (4.3 MB)\n", - "\u001b[K |████████████████████████████████| 4.3 MB 53.3 MB/s \n", - "\u001b[?25hCollecting proto-plus>=1.10.0\n", - " Downloading proto_plus-1.19.0-py3-none-any.whl (42 kB)\n", - "\u001b[K |████████████████████████████████| 42 kB 1.3 MB/s \n", - "\u001b[?25hCollecting google-resumable-media<3.0dev,>=0.6.0\n", - " Downloading google_resumable_media-1.3.1-py2.py3-none-any.whl (75 kB)\n", - "\u001b[K |████████████████████████████████| 75 kB 4.8 MB/s \n", - "\u001b[?25hCollecting google-api-core[grpc]<3.0.0dev,>=1.29.0\n", - " Downloading google_api_core-1.31.0-py2.py3-none-any.whl (93 kB)\n", - "\u001b[K |████████████████████████████████| 93 kB 1.4 MB/s \n", - "\u001b[?25hCollecting libcst>=0.2.5\n", - " Downloading libcst-0.3.19-py3-none-any.whl (513 kB)\n", - "\u001b[K |████████████████████████████████| 513 kB 50.1 MB/s \n", - "\u001b[?25hCollecting google-cloud-storage>=1.20.*\n", - " Downloading google_cloud_storage-1.41.0-py2.py3-none-any.whl (104 kB)\n", - "\u001b[K |████████████████████████████████| 104 kB 64.4 MB/s \n", - "\u001b[?25h Downloading google_cloud_storage-1.40.0-py2.py3-none-any.whl (104 kB)\n", - "\u001b[K |████████████████████████████████| 104 kB 75.0 MB/s \n", - "\u001b[?25hCollecting google-crc32c<2.0dev,>=1.0\n", - " Downloading google_crc32c-1.1.2-cp37-cp37m-manylinux2014_x86_64.whl (38 kB)\n", - "Requirement already satisfied: cffi>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from google-crc32c<2.0dev,>=1.0->google-resumable-media<3.0dev,>=0.6.0->google-cloud-bigquery>=2.0.*->feast) (1.14.6)\n", - "Requirement already satisfied: pycparser in /usr/local/lib/python3.7/dist-packages (from cffi>=1.0.0->google-crc32c<2.0dev,>=1.0->google-resumable-media<3.0dev,>=0.6.0->google-cloud-bigquery>=2.0.*->feast) (2.20)\n", - "Collecting typing-inspect>=0.4.0\n", - " Downloading typing_inspect-0.7.1-py3-none-any.whl (8.4 kB)\n", - "Collecting mypy-extensions>=0.3.0\n", - " Downloading mypy_extensions-0.4.3-py2.py3-none-any.whl (4.5 kB)\n", - "Building wheels for collected packages: pandavro, PyYAML\n", - " Building wheel for pandavro (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for pandavro: filename=pandavro-1.5.2-py3-none-any.whl size=2953 sha256=ef61d7c0b4e22b55a5c39c933ee0a88fe71974f888751b75a8baca95690ea171\n", - " Stored in directory: /root/.cache/pip/wheels/33/3f/96/9f1b46a9f7f6043ff4741b1aa1a7b249ba33be4dc1d08843e4\n", - " Building wheel for PyYAML (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for PyYAML: filename=PyYAML-5.3.1-cp37-cp37m-linux_x86_64.whl size=44636 sha256=f01413855e330051c5c6b3f9d8bac9a478552572803aa3418838bd1d1f4cd756\n", - " Stored in directory: /root/.cache/pip/wheels/5e/03/1e/e1e954795d6f35dfc7b637fe2277bff021303bd9570ecea653\n", - "Successfully built pandavro PyYAML\n", - "Installing collected packages: mypy-extensions, googleapis-common-protos, typing-inspect, PyYAML, grpcio, google-crc32c, google-api-core, fastavro, tenacity, pydantic, proto-plus, pandavro, mmh3, libcst, google-resumable-media, google-cloud-core, colorama, google-cloud-storage, google-cloud-datastore, google-cloud-bigquery-storage, google-cloud-bigquery, feast\n", - " Attempting uninstall: googleapis-common-protos\n", - " Found existing installation: googleapis-common-protos 1.53.0\n", - " Uninstalling googleapis-common-protos-1.53.0:\n", - " Successfully uninstalled googleapis-common-protos-1.53.0\n", - " Attempting uninstall: PyYAML\n", - " Found existing installation: PyYAML 3.13\n", - " Uninstalling PyYAML-3.13:\n", - " Successfully uninstalled PyYAML-3.13\n", - " Attempting uninstall: grpcio\n", - " Found existing installation: grpcio 1.34.1\n", - " Uninstalling grpcio-1.34.1:\n", - " Successfully uninstalled grpcio-1.34.1\n", - " Attempting uninstall: google-api-core\n", - " Found existing installation: google-api-core 1.26.3\n", - " Uninstalling google-api-core-1.26.3:\n", - " Successfully uninstalled google-api-core-1.26.3\n", - " Attempting uninstall: google-resumable-media\n", - " Found existing installation: google-resumable-media 0.4.1\n", - " Uninstalling google-resumable-media-0.4.1:\n", - " Successfully uninstalled google-resumable-media-0.4.1\n", - " Attempting uninstall: google-cloud-core\n", - " Found existing installation: google-cloud-core 1.0.3\n", - " Uninstalling google-cloud-core-1.0.3:\n", - " Successfully uninstalled google-cloud-core-1.0.3\n", - " Attempting uninstall: google-cloud-storage\n", - " Found existing installation: google-cloud-storage 1.18.1\n", - " Uninstalling google-cloud-storage-1.18.1:\n", - " Successfully uninstalled google-cloud-storage-1.18.1\n", - " Attempting uninstall: google-cloud-datastore\n", - " Found existing installation: google-cloud-datastore 1.8.0\n", - " Uninstalling google-cloud-datastore-1.8.0:\n", - " Successfully uninstalled google-cloud-datastore-1.8.0\n", - " Attempting uninstall: google-cloud-bigquery-storage\n", - " Found existing installation: google-cloud-bigquery-storage 1.1.0\n", - " Uninstalling google-cloud-bigquery-storage-1.1.0:\n", - " Successfully uninstalled google-cloud-bigquery-storage-1.1.0\n", - " Attempting uninstall: google-cloud-bigquery\n", - " Found existing installation: google-cloud-bigquery 1.21.0\n", - " Uninstalling google-cloud-bigquery-1.21.0:\n", - " Successfully uninstalled google-cloud-bigquery-1.21.0\n", - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "tensorflow 2.5.0 requires grpcio~=1.34.0, but you have grpcio 1.39.0 which is incompatible.\n", - "pandas-gbq 0.13.3 requires google-cloud-bigquery[bqstorage,pandas]<2.0.0dev,>=1.11.1, but you have google-cloud-bigquery 2.22.1 which is incompatible.\u001b[0m\n", - "Successfully installed PyYAML-5.3.1 colorama-0.4.4 fastavro-1.4.4 feast-0.11.0 google-api-core-1.31.0 google-cloud-bigquery-2.22.1 google-cloud-bigquery-storage-2.6.1 google-cloud-core-1.4.4 google-cloud-datastore-2.1.5 google-cloud-storage-1.40.0 google-crc32c-1.1.2 google-resumable-media-1.3.1 googleapis-common-protos-1.52.0 grpcio-1.39.0 libcst-0.3.19 mmh3-3.0.0 mypy-extensions-0.4.3 pandavro-1.5.2 proto-plus-1.19.0 pydantic-1.8.2 tenacity-8.0.1 typing-inspect-0.7.1\n" - ], - "name": "stdout" - }, - { - "output_type": "display_data", - "data": { - "application/vnd.colab-display-data+json": { - "pip_warning": { - "packages": [ - "google" - ] - } - } - }, - "metadata": { - "tags": [] - } - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "P8pFSVUp34W5" - }, - "source": [ - "#### Check feast version" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "soTYiMPXcNco" - }, - "source": [ - "" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "cOSAfdZiUnFa", - "outputId": "2462ce77-242b-4018-b5d6-fd0baa239836" - }, - "source": [ - "!feast version " - ], - "execution_count": 1, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Feast is an open source project that collects anonymized error reporting and usage statistics. To opt out or learn more see https://docs.feast.dev/reference/usage\n", - "Feast SDK Version: \"feast 0.11.0\"\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pC4AzJ_b396l" - }, - "source": [ - "## Step 2: Clone the Git repo\n", - "\n", - "Clone the Driver Ranking Git repo into your Colab Folder" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "4Qim_qbtUyGA", - "outputId": "6556ee99-aac3-468c-a9d5-8a643387712d" - }, - "source": [ - "!git clone https://github.com/feast-dev/feast-driver-ranking-tutorial.git" - ], - "execution_count": 2, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Cloning into 'feast-driver-ranking-tutorial'...\n", - "remote: Enumerating objects: 34, done.\u001b[K\n", - "remote: Counting objects: 100% (34/34), done.\u001b[K\n", - "remote: Compressing objects: 100% (24/24), done.\u001b[K\n", - "remote: Total 34 (delta 13), reused 28 (delta 8), pack-reused 0\u001b[K\n", - "Unpacking objects: 100% (34/34), done.\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "tLnM3IOy5C5l" - }, - "source": [ - "## Step 3: Set up your Goggle Cloud Platform (GCP) Configurations" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "KiNaOTKzWIcb" - }, - "source": [ - "## Authenticate into GCP\n", - "This will allow you to do the advanced section of the tutorial, where you materialize remotely on a GCP\n", - "Feast spins up infrastructure on GCP using the credentials in our environment. Run the following cell to log into GCP:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "8Tj3MUPHWPTF" - }, - "source": [ - "from google.colab import auth\n", - "auth.authenticate_user()" - ], - "execution_count": 3, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "d8yzazB-Wfqc" - }, - "source": [ - "Set configurations\n", - "Set the following configuration, which we'll be using throughout the tutorial:\n", - "\n", - "PROJECT_ID: Your project.\n", - "BUCKET_NAME: The name of a bucket which will be used to store the feature store registry and model artifacts.\n", - "BIGQUERY_DATASET_NAME: The name of a dataset which will be used to create tables containing features.\n", - "AI_PLATFORM_MODEL_NAME: The name of a model name which will be created in AI Platform." - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "zV0sgfOTWeXz", - "outputId": "d324609f-83e9-444c-c581-f2ecfd103bb6" - }, - "source": [ - "PROJECT_ID= \"kf-feast\" #@param {type:\"string\"}\n", - "BUCKET_NAME= \"driver_ranking_tutorial\" #@param {type:\"string\"} custom\n", - "BIGQUERY_DATASET_NAME=\"feast_driver_ranking_tutorial\" #@param {type:\"string\"} custom\n", - "AI_PLATFORM_MODEL_NAME=\"feast_driver_rankin_jsd_model\" #@param {type:\"string\"\n", - "\n", - "! gcloud config set project $PROJECT_ID\n", - "%env GOOGLE_CLOUD_PROJECT=$PROJECT_ID\n", - "!echo project_id = $PROJECT_ID > ~/.bigqueryrc" - ], - "execution_count": 7, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Updated property [core/project].\n", - "env: GOOGLE_CLOUD_PROJECT=kf-feast\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "6Cs_SYr2gOjR", - "outputId": "4b3d811b-6685-46f8-9830-b0378962bbef" - }, - "source": [ - "# Only run if your bucket doesn't already exist!\n", - "! gsutil mb gs://$BUCKET_NAME" - ], - "execution_count": 8, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Creating gs://driver_ranking_tutorial/...\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ohWMCVhS5PPN" - }, - "source": [ - "## Step 4: Apply and deploy feature definitions\n", - "\n", - "`feast apply` scans python files in the current directory for feature definitions and deploys infrastructure according to `feature_store.yaml`" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "izhTk0WWX3Tx", - "outputId": "444a11d3-db43-4170-c28b-51d6ea618660" - }, - "source": [ - "%%shell\n", - "cd /content/feast-driver-ranking-tutorial/driver_ranking/\n", - "feast apply" - ], - "execution_count": 9, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Registered entity \u001b[1m\u001b[32mdriver_id\u001b[0m\n", - "Registered feature view \u001b[1m\u001b[32mdriver_hourly_stats\u001b[0m\n", - "Deploying infrastructure for \u001b[1m\u001b[32mdriver_hourly_stats\u001b[0m\n" - ], - "name": "stdout" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 9 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "lJlrf2Iu53BR" - }, - "source": [ - "### Inspect the files created under your local folder" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "IrJ6gqtdmKk7", - "outputId": "db32950c-9a73-4c06-fde3-52c753929c9b" - }, - "source": [ - "%%shell\n", - "cd /content/feast-driver-ranking-tutorial/driver_ranking/data/\n", - "ls -l " - ], - "execution_count": 10, - "outputs": [ - { - "output_type": "stream", - "text": [ - "total 20\n", - "-rw-r--r-- 1 root root 16384 Jul 26 20:43 online.db\n", - "-rw-r--r-- 1 root root 310 Jul 26 20:43 registry.db\n" - ], - "name": "stdout" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 10 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "bHBfTEau6Qt9" - }, - "source": [ - "## Step 5: Train your model" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "F-Pc4Jo4kzBL", - "outputId": "6414795f-9330-44b3-ee5c-992c9dd55db7" - }, - "source": [ - "import feast\n", - "from joblib import dump\n", - "import pandas as pd\n", - "from sklearn.linear_model import LinearRegression\n", - "\n", - "# Load driver order data\n", - "orders = pd.read_csv(\"/content/feast-driver-ranking-tutorial/driver_orders.csv\", sep=\"\\t\")\n", - "orders[\"event_timestamp\"] = pd.to_datetime(orders[\"event_timestamp\"])\n", - "\n", - "# Connect to your feature store provider\n", - "fs = feast.FeatureStore(repo_path=\"/content/feast-driver-ranking-tutorial/driver_ranking\")\n", - " \n", - "# Retrieve training data from BigQuery\n", - "training_df = fs.get_historical_features(\n", - " entity_df=orders,\n", - " feature_refs=[\n", - " \"driver_hourly_stats:conv_rate\",\n", - " \"driver_hourly_stats:acc_rate\",\n", - " \"driver_hourly_stats:avg_daily_trips\",\n", - " ],\n", - ").to_df()\n", - "\n", - "print(\"----- Feature schema -----\\n\")\n", - "print(training_df.info())\n", - "\n", - "print()\n", - "print(\"----- Example features -----\\n\")\n", - "print(training_df.head())\n", - "\n", - "# Train model\n", - "target = \"trip_completed\"\n", - "\n", - "reg = LinearRegression()\n", - "train_X = training_df[training_df.columns.drop(target).drop(\"event_timestamp\")]\n", - "train_Y = training_df.loc[:, target]\n", - "reg.fit(train_X[sorted(train_X)], train_Y)\n", - "\n", - "# Save model\n", - "dump(reg, \"driver_model.bin\")" - ], - "execution_count": 13, - "outputs": [ - { - "output_type": "stream", - "text": [ - "----- Feature schema -----\n", - "\n", - "\n", - "RangeIndex: 10 entries, 0 to 9\n", - "Data columns (total 6 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 event_timestamp 10 non-null datetime64[ns, UTC]\n", - " 1 driver_id 10 non-null int64 \n", - " 2 trip_completed 10 non-null int64 \n", - " 3 driver_hourly_stats__conv_rate 10 non-null float64 \n", - " 4 driver_hourly_stats__acc_rate 10 non-null float64 \n", - " 5 driver_hourly_stats__avg_daily_trips 10 non-null int64 \n", - "dtypes: datetime64[ns, UTC](1), float64(2), int64(3)\n", - "memory usage: 608.0 bytes\n", - "None\n", - "\n", - "----- Example features -----\n", - "\n", - " event_timestamp ... driver_hourly_stats__avg_daily_trips\n", - "0 2021-04-17 04:29:28+00:00 ... 982\n", - "1 2021-04-18 04:29:28+00:00 ... 982\n", - "2 2021-04-19 04:29:28+00:00 ... 982\n", - "3 2021-04-17 12:29:28+00:00 ... 551\n", - "4 2021-04-18 12:29:28+00:00 ... 551\n", - "\n", - "[5 rows x 6 columns]\n" - ], - "name": "stdout" - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "['driver_model.bin']" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 13 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HpHacyo47Are" - }, - "source": [ - "## Step 6: Materialize your online store\n", - "Change the provider field in `driver_ranking/feature_store.yam` from `local` to `gcp`\n", - "\n", - "Then apply and materialize data to Firestore" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "67627WRSajIk", - "outputId": "056ce886-36a5-48dc-dcbb-23e032695708" - }, - "source": [ - "!cd /content/feast-driver-ranking-tutorial/driver_ranking/ && feast materialize-incremental 2022-01-01T00:00:00" - ], - "execution_count": 14, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Materializing \u001b[1m\u001b[32m1\u001b[0m feature views to \u001b[1m\u001b[32m2022-01-01 00:00:00+00:00\u001b[0m into the \u001b[1m\u001b[32mdatastore\u001b[0m online store.\n", - "\n", - "\u001b[1m\u001b[32mdriver_hourly_stats\u001b[0m from \u001b[1m\u001b[32m2020-07-27 20:45:14+00:00\u001b[0m to \u001b[1m\u001b[32m2022-01-01 00:00:00+00:00\u001b[0m:\n", - "100%|███████████████████████████████████████████████████████████████| 10/10 [00:01<00:00, 6.16it/s]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-869cxQO2ana" - }, - "source": [ - "### Step 7: Make Prediction" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "VP85XeGFzNYl" - }, - "source": [ - "import pandas as pd\n", - "import feast\n", - "from joblib import load\n", - "\n", - "\n", - "class DriverRankingModel:\n", - " def __init__(self):\n", - " # Load model\n", - " self.model = load(\"/content/driver_model.bin\")\n", - "\n", - " # Set up feature store\n", - " self.fs = feast.FeatureStore(repo_path=\"/content/feast-driver-ranking-tutorial/driver_ranking/\")\n", - "\n", - " def predict(self, driver_ids):\n", - " # Read features from Feast\n", - " driver_features = self.fs.get_online_features(\n", - " entity_rows=[{\"driver_id\": driver_id} for driver_id in driver_ids],\n", - " feature_refs=[\n", - " \"driver_hourly_stats:conv_rate\",\n", - " \"driver_hourly_stats:acc_rate\",\n", - " \"driver_hourly_stats:avg_daily_trips\",\n", - " ],\n", - " )\n", - " df = pd.DataFrame.from_dict(driver_features.to_dict())\n", - "\n", - " # Make prediction\n", - " df[\"prediction\"] = self.model.predict(df[sorted(df)])\n", - "\n", - " # Choose best driver\n", - " best_driver_id = df[\"driver_id\"].iloc[df[\"prediction\"].argmax()]\n", - "\n", - " # return best driver\n", - " return best_driver_id" - ], - "execution_count": 19, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "f9AJ842Rk3E9" - }, - "source": [ - "def make_drivers_prediction():\n", - " drivers = [1001, 1002, 1003, 1004]\n", - " model = DriverRankingModel()\n", - " best_driver = model.predict(drivers)\n", - " print(f\"Prediction for best driver id: {best_driver}\")" - ], - "execution_count": 20, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "lq2TNXfjbb8e", - "outputId": "7c163361-491b-4eb7-87e0-6b68eccc9030" - }, - "source": [ - "make_drivers_prediction()" - ], - "execution_count": 21, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Prediction for best driver id: 1001\n" - ], - "name": "stdout" - } - ] - } - ] -} \ No newline at end of file From 8a21034a384344daeed64818a47b86bc6d9fcdd7 Mon Sep 17 00:00:00 2001 From: Jules Damji Date: Mon, 26 Jul 2021 14:45:32 -0700 Subject: [PATCH 4/8] Added a Colab button link Signed-off-by: Jules Damji --- notebooks/Driver_Ranking_Tutorial.ipynb | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/notebooks/Driver_Ranking_Tutorial.ipynb b/notebooks/Driver_Ranking_Tutorial.ipynb index 38026f5..7a54805 100644 --- a/notebooks/Driver_Ranking_Tutorial.ipynb +++ b/notebooks/Driver_Ranking_Tutorial.ipynb @@ -5,7 +5,9 @@ "colab": { "name": "Driver Ranking Tutorial", "provenance": [], - "collapsed_sections": [] + "collapsed_sections": [], + "authorship_tag": "ABX9TyPxfJo7miJVPRo4IDVQvAqB", + "include_colab_link": true }, "kernelspec": { "name": "python3", @@ -16,6 +18,16 @@ } }, "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, { "cell_type": "markdown", "metadata": { From a8bfa4f099907a08fd8719d48ee2f0923edcc57a Mon Sep 17 00:00:00 2001 From: Jules Damji Date: Mon, 26 Jul 2021 16:37:55 -0700 Subject: [PATCH 5/8] Fixed colab link to point to the origi/master Signed-off-by: Jules Damji --- notebooks/Driver_Ranking_Tutorial.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/notebooks/Driver_Ranking_Tutorial.ipynb b/notebooks/Driver_Ranking_Tutorial.ipynb index 7a54805..f0a22a2 100644 --- a/notebooks/Driver_Ranking_Tutorial.ipynb +++ b/notebooks/Driver_Ranking_Tutorial.ipynb @@ -25,7 +25,7 @@ "colab_type": "text" }, "source": [ - "\"Open" + "\"Open" ] }, { @@ -775,4 +775,4 @@ ] } ] -} \ No newline at end of file +} From b38d205b38765fe7403e09a2801ff077ba5c690c Mon Sep 17 00:00:00 2001 From: Felix Wang Date: Tue, 30 Nov 2021 15:17:38 -0800 Subject: [PATCH 6/8] Switch from local to gcp provider Signed-off-by: Felix Wang --- driver_ranking/feature_store.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/driver_ranking/feature_store.yaml b/driver_ranking/feature_store.yaml index f8dac74..c2d0a74 100644 --- a/driver_ranking/feature_store.yaml +++ b/driver_ranking/feature_store.yaml @@ -1,3 +1,3 @@ project: driver_ranking registry: data/registry.db -provider: local \ No newline at end of file +provider: gcp From d9aa09636a2ec890029340cc1639fc05be70000b Mon Sep 17 00:00:00 2001 From: Felix Wang Date: Tue, 30 Nov 2021 15:22:42 -0800 Subject: [PATCH 7/8] Update Driver_Ranking_Tutorial.ipynb --- notebooks/Driver_Ranking_Tutorial.ipynb | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/notebooks/Driver_Ranking_Tutorial.ipynb b/notebooks/Driver_Ranking_Tutorial.ipynb index f0a22a2..4aece9e 100644 --- a/notebooks/Driver_Ranking_Tutorial.ipynb +++ b/notebooks/Driver_Ranking_Tutorial.ipynb @@ -653,9 +653,7 @@ }, "source": [ "## Step 6: Materialize your online store\n", - "Change the provider field in `driver_ranking/feature_store.yam` from `local` to `gcp`\n", - "\n", - "Then apply and materialize data to Firestore" + "Apply and materialize data to Firestore" ] }, { @@ -716,7 +714,7 @@ " # Read features from Feast\n", " driver_features = self.fs.get_online_features(\n", " entity_rows=[{\"driver_id\": driver_id} for driver_id in driver_ids],\n", - " feature_refs=[\n", + " features=[\n", " \"driver_hourly_stats:conv_rate\",\n", " \"driver_hourly_stats:acc_rate\",\n", " \"driver_hourly_stats:avg_daily_trips\",\n", From 18979e7af56f573c56c83caf8b320af57cab9500 Mon Sep 17 00:00:00 2001 From: Felix Wang Date: Tue, 30 Nov 2021 15:29:16 -0800 Subject: [PATCH 8/8] Update Driver_Ranking_Tutorial.ipynb --- notebooks/Driver_Ranking_Tutorial.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/Driver_Ranking_Tutorial.ipynb b/notebooks/Driver_Ranking_Tutorial.ipynb index 4aece9e..8d37c59 100644 --- a/notebooks/Driver_Ranking_Tutorial.ipynb +++ b/notebooks/Driver_Ranking_Tutorial.ipynb @@ -293,7 +293,7 @@ "output_type": "stream", "text": [ "Feast is an open source project that collects anonymized error reporting and usage statistics. To opt out or learn more see https://docs.feast.dev/reference/usage\n", - "Feast SDK Version: \"feast 0.11.0\"\n" + "Feast SDK Version: \"feast 0.15.1\"\n" ], "name": "stdout" }