From 0d3770495150660cd9045f3d64ae91d409b5e82f Mon Sep 17 00:00:00 2001
From: bryangalindo <galindo.bryan08@gmail.com>
Date: Sun, 10 Sep 2023 23:34:11 -0400
Subject: [PATCH 1/2] Adds Dask Jupyter Notebook example for e.g., data
 scientists

---
 examples/dask/hello_world/notebook.ipynb | 340 +++++++++++++++++++++++
 1 file changed, 340 insertions(+)
 create mode 100644 examples/dask/hello_world/notebook.ipynb

diff --git a/examples/dask/hello_world/notebook.ipynb b/examples/dask/hello_world/notebook.ipynb
new file mode 100644
index 000000000..60d6000b6
--- /dev/null
+++ b/examples/dask/hello_world/notebook.ipynb
@@ -0,0 +1,340 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
+   },
+   "source": [
+    "Uncomment and run the cell below if you are in a Google Colab environment. It will:\n",
+    "1. Mount google drive. You will be asked to authenticate and give permissions.\n",
+    "2. Change directory to google drive.\n",
+    "3. Make a directory \"hamilton-tutorials\"\n",
+    "4. Change directory to it.\n",
+    "5. Clone this repository to your google drive\n",
+    "6. Move your current directory to the hello_world example\n",
+    "7. Install requirements.\n",
+    "\n",
+    "This means that any modifications will be saved, and you won't lose them if you close your browser."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "## 1. Mount google drive\n",
+    "# from google.colab import drive\n",
+    "# drive.mount('/content/drive')\n",
+    "## 2. Change directory to google drive.\n",
+    "# %cd /content/drive/MyDrive\n",
+    "## 3. Make a directory \"hamilton-tutorials\"\n",
+    "# !mkdir hamilton-tutorials\n",
+    "## 4. Change directory to it.\n",
+    "# %cd hamilton-tutorials\n",
+    "## 5. Clone this repository to your google drive\n",
+    "# !git clone https://github.com/DAGWorks-Inc/hamilton/\n",
+    "## 6. Move your current directory to the hello_world example\n",
+    "# %cd hamilton/examples/hello_world\n",
+    "## 7. Install requirements.\n",
+    "# %pip install -r requirements.txt\n",
+    "# clear_output()  # optionally clear outputs\n",
+    "# To check your current working directory you can type `!pwd` in a cell and run it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# Cell 2 - import modules to create part of the DAG from\n",
+    "# We use the autoreload extension that comes with ipython to automatically reload modules when\n",
+    "# the code in them changes.\n",
+    "\n",
+    "# import the jupyter extension\n",
+    "%load_ext autoreload\n",
+    "# set it to only reload the modules imported\n",
+    "%autoreload 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# Import modules\n",
+    "import pandas as pd\n",
+    "from dask import dataframe\n",
+    "from dask.distributed import (\n",
+    "    Client,\n",
+    "    LocalCluster,\n",
+    ")\n",
+    "\n",
+    "from hamilton import (\n",
+    "    ad_hoc_utils,\n",
+    "    driver,\n",
+    ")\n",
+    "from hamilton.plugins import h_dask"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# We'll place the data loaders into a new module\n",
+    "\n",
+    "def spend(spend_location: str, spend_partitions: int) -> dataframe.Series:\n",
+    "    \"\"\"Dummy function showing how to wire through loading data.\n",
+    "\n",
+    "    :param spend_location:\n",
+    "    :param spend_partitions: number of partitions to segment the data into\n",
+    "    :return:\n",
+    "    \"\"\"\n",
+    "    return dataframe.from_pandas(\n",
+    "        pd.Series([10, 10, 20, 40, 40, 50]), name=\"spend\", npartitions=spend_partitions\n",
+    "    )\n",
+    "\n",
+    "\n",
+    "def signups(signups_location: str, signups_partitions: int) -> dataframe.Series:\n",
+    "    \"\"\"Dummy function showing how to wire through loading data.\n",
+    "\n",
+    "    :param signups_location:\n",
+    "    :param signups_partitions: number of partitions to segment the data into\n",
+    "    :return:\n",
+    "    \"\"\"\n",
+    "    return dataframe.from_pandas(\n",
+    "        pd.Series([1, 10, 50, 100, 200, 400]), name=\"signups\", npartitions=signups_partitions\n",
+    "    )\n",
+    "\n",
+    "data_loaders = ad_hoc_utils.create_temporary_module(\n",
+    "    spend,\n",
+    "    signups,\n",
+    "    module_name=\"data_loaders\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# We'll place the spend calculations into a new module\n",
+    "\n",
+    "def avg_3wk_spend(spend: pd.Series) -> pd.Series:\n",
+    "    \"\"\"Rolling 3 week average spend.\"\"\"\n",
+    "    return spend.rolling(3).mean()\n",
+    "\n",
+    "\n",
+    "def spend_per_signup(spend: pd.Series, signups: pd.Series) -> pd.Series:\n",
+    "    \"\"\"The cost per signup in relation to spend.\"\"\"\n",
+    "    return spend / signups\n",
+    "\n",
+    "\n",
+    "def spend_mean(spend: pd.Series) -> float:\n",
+    "    \"\"\"Shows function creating a scalar. In this case it computes the mean of the entire column.\"\"\"\n",
+    "    return spend.mean()\n",
+    "\n",
+    "\n",
+    "def spend_zero_mean(spend: pd.Series, spend_mean: float) -> pd.Series:\n",
+    "    \"\"\"Shows function that takes a scalar. In this case to zero mean spend.\"\"\"\n",
+    "    return spend - spend_mean\n",
+    "\n",
+    "\n",
+    "def spend_std_dev(spend: pd.Series) -> float:\n",
+    "    \"\"\"Function that computes the standard deviation of the spend column.\"\"\"\n",
+    "    return spend.std()\n",
+    "\n",
+    "\n",
+    "def spend_zero_mean_unit_variance(spend_zero_mean: pd.Series, spend_std_dev: float) -> pd.Series:\n",
+    "    \"\"\"Function showing one way to make spend have zero mean and unit variance.\"\"\"\n",
+    "    return spend_zero_mean / spend_std_dev\n",
+    "\n",
+    "spend_calculations = ad_hoc_utils.create_temporary_module(\n",
+    "    avg_3wk_spend,\n",
+    "    spend_per_signup,\n",
+    "    spend_mean,\n",
+    "    spend_zero_mean,\n",
+    "    spend_std_dev,\n",
+    "    spend_zero_mean_unit_variance,\n",
+    "    module_name=\"spend_calculations\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Note: Hamilton collects completely anonymous data about usage. This will help us improve Hamilton over time. See https://github.com/dagworks-inc/hamilton#usage-analytics--data-privacy for details.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "LocalCluster(c13c9b83, 'tcp://127.0.0.1:56333', workers=4, threads=8, memory=16.00 GiB)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Set up the driver, input and output columns\n",
+    "\n",
+    "cluster = LocalCluster()\n",
+    "client  = Client(cluster)\n",
+    "\n",
+    "print(client.cluster)\n",
+    "\n",
+    "adapter = h_dask.DaskGraphAdapter(\n",
+    "    client,\n",
+    "    h_dask.DaskDataFrameResult(),\n",
+    "    visualize_kwargs={\"filename\": \"run_dask.png\", \"format\": \"png\"},\n",
+    "    use_delayed=False,\n",
+    "    compute_at_end=False,\n",
+    ")\n",
+    "\n",
+    "config = {\n",
+    "    \"spend_location\": \"some file path\",\n",
+    "    \"spend_partitions\": 2,\n",
+    "    \"signups_location\": \"some file path\",\n",
+    "    \"signups_partitions\": 2,\n",
+    "    \"foobar\": \"some_other_data\",\n",
+    "}\n",
+    "\n",
+    "dr = driver.Driver(config, spend_calculations, data_loaders, adapter=adapter)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "   spend  signups  avg_3wk_spend  spend_per_signup  spend_mean  spend_zero_mean_unit_variance           foobar\n",
+      "0     10        1            NaN            10.000   28.333333                      -1.064405  some_other_data\n",
+      "1     10       10            NaN             1.000   28.333333                      -1.064405  some_other_data\n",
+      "2     20       50      13.333333             0.400   28.333333                      -0.483821  some_other_data\n",
+      "3     40      100      23.333333             0.400   28.333333                       0.677349  some_other_data\n",
+      "4     40      200      33.333333             0.200   28.333333                       0.677349  some_other_data\n",
+      "5     50      400      43.333333             0.125   28.333333                       1.257934  some_other_data\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Execute the driver.\n",
+    "\n",
+    "output_columns = [\n",
+    "    \"spend\",\n",
+    "    \"signups\",\n",
+    "    \"avg_3wk_spend\",\n",
+    "    \"spend_per_signup\",\n",
+    "    \"spend_mean\",\n",
+    "    \"spend_zero_mean_unit_variance\",\n",
+    "    \"foobar\",\n",
+    "]\n",
+    "\n",
+    "dask_df = dr.execute(output_columns)  # it's dask dataframe -- it hasn't been evaluated yet.\n",
+    "df = dask_df.compute()\n",
+    "\n",
+    "# To visualize do `pip install \"sf-hamilton[visualization]\"` if you want these to work\n",
+    "dr.visualize_execution(output_columns, './hello_world_dask', {\"format\": \"png\"})\n",
+    "dr.display_all_functions(\"./my_full_dag.dot\")\n",
+    "print(df.to_string())\n",
+    "client.shutdown()\n",
+    "\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

From 9b5214f428081a8f6da27c11b56c82ae0ca50bb2 Mon Sep 17 00:00:00 2001
From: bryangalindo <galindo.bryan08@gmail.com>
Date: Tue, 12 Sep 2023 00:40:31 -0400
Subject: [PATCH 2/2] Updates temp module with %%writefile to make prod push
 easier

---
 examples/dask/hello_world/notebook.ipynb | 286 +++++++++++++++++++----
 1 file changed, 245 insertions(+), 41 deletions(-)

diff --git a/examples/dask/hello_world/notebook.ipynb b/examples/dask/hello_world/notebook.ipynb
index 60d6000b6..cc36baf1d 100644
--- a/examples/dask/hello_world/notebook.ipynb
+++ b/examples/dask/hello_world/notebook.ipynb
@@ -93,6 +93,7 @@
    "outputs": [],
    "source": [
     "# Import modules\n",
+    "\n",
     "import pandas as pd\n",
     "from dask import dataframe\n",
     "from dask.distributed import (\n",
@@ -100,10 +101,7 @@
     "    LocalCluster,\n",
     ")\n",
     "\n",
-    "from hamilton import (\n",
-    "    ad_hoc_utils,\n",
-    "    driver,\n",
-    ")\n",
+    "from hamilton import driver\n",
     "from hamilton.plugins import h_dask"
    ]
   },
@@ -119,8 +117,21 @@
      "name": "#%%\n"
     }
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Overwriting data_loaders.py\n"
+     ]
+    }
+   ],
    "source": [
+    "%%writefile data_loaders.py\n",
+    "\n",
+    "import pandas as pd\n",
+    "from dask import dataframe\n",
+    "\n",
     "# We'll place the data loaders into a new module\n",
     "\n",
     "def spend(spend_location: str, spend_partitions: int) -> dataframe.Series:\n",
@@ -144,13 +155,7 @@
     "    \"\"\"\n",
     "    return dataframe.from_pandas(\n",
     "        pd.Series([1, 10, 50, 100, 200, 400]), name=\"signups\", npartitions=signups_partitions\n",
-    "    )\n",
-    "\n",
-    "data_loaders = ad_hoc_utils.create_temporary_module(\n",
-    "    spend,\n",
-    "    signups,\n",
-    "    module_name=\"data_loaders\",\n",
-    ")"
+    "    )"
    ]
   },
   {
@@ -165,10 +170,22 @@
      "name": "#%%\n"
     }
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Overwriting spend_calculations.py\n"
+     ]
+    }
+   ],
    "source": [
+    "%%writefile spend_calculations.py\n",
+    "\n",
     "# We'll place the spend calculations into a new module\n",
     "\n",
+    "import pandas as pd\n",
+    "\n",
     "def avg_3wk_spend(spend: pd.Series) -> pd.Series:\n",
     "    \"\"\"Rolling 3 week average spend.\"\"\"\n",
     "    return spend.rolling(3).mean()\n",
@@ -196,17 +213,7 @@
     "\n",
     "def spend_zero_mean_unit_variance(spend_zero_mean: pd.Series, spend_std_dev: float) -> pd.Series:\n",
     "    \"\"\"Function showing one way to make spend have zero mean and unit variance.\"\"\"\n",
-    "    return spend_zero_mean / spend_std_dev\n",
-    "\n",
-    "spend_calculations = ad_hoc_utils.create_temporary_module(\n",
-    "    avg_3wk_spend,\n",
-    "    spend_per_signup,\n",
-    "    spend_mean,\n",
-    "    spend_zero_mean,\n",
-    "    spend_std_dev,\n",
-    "    spend_zero_mean_unit_variance,\n",
-    "    module_name=\"spend_calculations\",\n",
-    ")"
+    "    return spend_zero_mean / spend_std_dev"
    ]
   },
   {
@@ -233,12 +240,14 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "LocalCluster(c13c9b83, 'tcp://127.0.0.1:56333', workers=4, threads=8, memory=16.00 GiB)\n"
+      "LocalCluster(94f8c394, 'tcp://127.0.0.1:50070', workers=4, threads=8, memory=16.00 GiB)\n"
      ]
     }
    ],
    "source": [
-    "# Set up the driver, input and output columns\n",
+    "%aimport data_loaders, spend_calculations\n",
+    "\n",
+    "# Set up the local Dask cluster, adapter, and driver.\n",
     "\n",
     "cluster = LocalCluster()\n",
     "client  = Client(cluster)\n",
@@ -278,17 +287,184 @@
    },
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "   spend  signups  avg_3wk_spend  spend_per_signup  spend_mean  spend_zero_mean_unit_variance           foobar\n",
-      "0     10        1            NaN            10.000   28.333333                      -1.064405  some_other_data\n",
-      "1     10       10            NaN             1.000   28.333333                      -1.064405  some_other_data\n",
-      "2     20       50      13.333333             0.400   28.333333                      -0.483821  some_other_data\n",
-      "3     40      100      23.333333             0.400   28.333333                       0.677349  some_other_data\n",
-      "4     40      200      33.333333             0.200   28.333333                       0.677349  some_other_data\n",
-      "5     50      400      43.333333             0.125   28.333333                       1.257934  some_other_data\n"
-     ]
+     "data": {
+      "image/svg+xml": [
+       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
+       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
+       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
+       "<!-- Generated by graphviz version 8.1.0 (20230707.0739)\n",
+       " -->\n",
+       "<!-- Pages: 1 -->\n",
+       "<svg width=\"945pt\" height=\"332pt\"\n",
+       " viewBox=\"0.00 0.00 945.02 332.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
+       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 328)\">\n",
+       "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-328 941.02,-328 941.02,4 -4,4\"/>\n",
+       "<!-- signups -->\n",
+       "<g id=\"node1\" class=\"node\">\n",
+       "<title>signups</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"538.02,-252 480.77,-252 480.77,-216 538.02,-216 538.02,-252\"/>\n",
+       "<text text-anchor=\"middle\" x=\"509.39\" y=\"-228.95\" font-family=\"Times,serif\" font-size=\"14.00\">signups</text>\n",
+       "</g>\n",
+       "<!-- spend_per_signup -->\n",
+       "<g id=\"node7\" class=\"node\">\n",
+       "<title>spend_per_signup</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"556.52,-180 442.27,-180 442.27,-144 556.52,-144 556.52,-180\"/>\n",
+       "<text text-anchor=\"middle\" x=\"499.39\" y=\"-156.95\" font-family=\"Times,serif\" font-size=\"14.00\">spend_per_signup</text>\n",
+       "</g>\n",
+       "<!-- signups&#45;&gt;spend_per_signup -->\n",
+       "<g id=\"edge9\" class=\"edge\">\n",
+       "<title>signups&#45;&gt;spend_per_signup</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M506.92,-215.7C505.85,-208.24 504.58,-199.32 503.39,-190.97\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"506.71,-190.51 501.83,-181.1 499.78,-191.5 506.71,-190.51\"/>\n",
+       "</g>\n",
+       "<!-- spend_partitions -->\n",
+       "<g id=\"node2\" class=\"node\">\n",
+       "<title>spend_partitions</title>\n",
+       "<ellipse fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" cx=\"96.39\" cy=\"-306\" rx=\"96.39\" ry=\"18\"/>\n",
+       "<text text-anchor=\"middle\" x=\"96.39\" y=\"-300.95\" font-family=\"Times,serif\" font-size=\"14.00\">Input: spend_partitions</text>\n",
+       "</g>\n",
+       "<!-- spend -->\n",
+       "<g id=\"node5\" class=\"node\">\n",
+       "<title>spend</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"329.39,-252 275.39,-252 275.39,-216 329.39,-216 329.39,-252\"/>\n",
+       "<text text-anchor=\"middle\" x=\"302.39\" y=\"-228.95\" font-family=\"Times,serif\" font-size=\"14.00\">spend</text>\n",
+       "</g>\n",
+       "<!-- spend_partitions&#45;&gt;spend -->\n",
+       "<g id=\"edge7\" class=\"edge\">\n",
+       "<title>spend_partitions&#45;&gt;spend</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M141.6,-289.64C178.52,-277.09 230.09,-259.57 264.79,-247.78\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"265.63,-250.85 273.98,-244.32 263.38,-244.22 265.63,-250.85\"/>\n",
+       "</g>\n",
+       "<!-- spend_std_dev -->\n",
+       "<g id=\"node3\" class=\"node\">\n",
+       "<title>spend_std_dev</title>\n",
+       "<ellipse fill=\"none\" stroke=\"black\" cx=\"131.39\" cy=\"-90\" rx=\"65.68\" ry=\"18\"/>\n",
+       "<text text-anchor=\"middle\" x=\"131.39\" y=\"-84.95\" font-family=\"Times,serif\" font-size=\"14.00\">spend_std_dev</text>\n",
+       "</g>\n",
+       "<!-- spend_zero_mean_unit_variance -->\n",
+       "<g id=\"node4\" class=\"node\">\n",
+       "<title>spend_zero_mean_unit_variance</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"309.64,-36 115.14,-36 115.14,0 309.64,0 309.64,-36\"/>\n",
+       "<text text-anchor=\"middle\" x=\"212.39\" y=\"-12.95\" font-family=\"Times,serif\" font-size=\"14.00\">spend_zero_mean_unit_variance</text>\n",
+       "</g>\n",
+       "<!-- spend_std_dev&#45;&gt;spend_zero_mean_unit_variance -->\n",
+       "<g id=\"edge5\" class=\"edge\">\n",
+       "<title>spend_std_dev&#45;&gt;spend_zero_mean_unit_variance</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M150.59,-72.41C160.58,-63.78 172.99,-53.05 184.06,-43.48\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"185.92,-45.63 191.2,-36.45 181.35,-40.34 185.92,-45.63\"/>\n",
+       "</g>\n",
+       "<!-- spend&#45;&gt;spend_std_dev -->\n",
+       "<g id=\"edge3\" class=\"edge\">\n",
+       "<title>spend&#45;&gt;spend_std_dev</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M274.9,-231.33C240.54,-227.72 182.37,-216.26 150.39,-180 135.78,-163.44 131.46,-138.43 130.55,-119.13\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"134.03,-119.2 130.36,-109.26 127.04,-119.32 134.03,-119.2\"/>\n",
+       "</g>\n",
+       "<!-- spend&#45;&gt;spend_per_signup -->\n",
+       "<g id=\"edge8\" class=\"edge\">\n",
+       "<title>spend&#45;&gt;spend_per_signup</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M329.78,-223.27C358.19,-213.17 403.51,-197.07 440.15,-184.05\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"440.98,-187.11 449.23,-180.47 438.64,-180.52 440.98,-187.11\"/>\n",
+       "</g>\n",
+       "<!-- spend_mean -->\n",
+       "<g id=\"node9\" class=\"node\">\n",
+       "<title>spend_mean</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"243.52,-180 159.27,-180 159.27,-144 243.52,-144 243.52,-180\"/>\n",
+       "<text text-anchor=\"middle\" x=\"201.39\" y=\"-156.95\" font-family=\"Times,serif\" font-size=\"14.00\">spend_mean</text>\n",
+       "</g>\n",
+       "<!-- spend&#45;&gt;spend_mean -->\n",
+       "<g id=\"edge10\" class=\"edge\">\n",
+       "<title>spend&#45;&gt;spend_mean</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M277.16,-215.52C264.42,-206.68 248.77,-195.84 235.01,-186.3\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"237.51,-183.08 227.3,-180.26 233.52,-188.83 237.51,-183.08\"/>\n",
+       "</g>\n",
+       "<!-- avg_3wk_spend -->\n",
+       "<g id=\"node11\" class=\"node\">\n",
+       "<title>avg_3wk_spend</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" points=\"365.27,-180 261.52,-180 261.52,-144 365.27,-144 365.27,-180\"/>\n",
+       "<text text-anchor=\"middle\" x=\"313.39\" y=\"-156.95\" font-family=\"Times,serif\" font-size=\"14.00\">avg_3wk_spend</text>\n",
+       "</g>\n",
+       "<!-- spend&#45;&gt;avg_3wk_spend -->\n",
+       "<g id=\"edge11\" class=\"edge\">\n",
+       "<title>spend&#45;&gt;avg_3wk_spend</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M305.11,-215.7C306.28,-208.24 307.68,-199.32 309,-190.97\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"312.61,-191.53 310.7,-181.1 305.69,-190.44 312.61,-191.53\"/>\n",
+       "</g>\n",
+       "<!-- spend_zero_mean -->\n",
+       "<g id=\"node13\" class=\"node\">\n",
+       "<title>spend_zero_mean</title>\n",
+       "<ellipse fill=\"none\" stroke=\"black\" cx=\"293.39\" cy=\"-90\" rx=\"77.97\" ry=\"18\"/>\n",
+       "<text text-anchor=\"middle\" x=\"293.39\" y=\"-84.95\" font-family=\"Times,serif\" font-size=\"14.00\">spend_zero_mean</text>\n",
+       "</g>\n",
+       "<!-- spend&#45;&gt;spend_zero_mean -->\n",
+       "<g id=\"edge12\" class=\"edge\">\n",
+       "<title>spend&#45;&gt;spend_zero_mean</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M329.83,-220.81C346.01,-212.01 365.08,-198.42 374.39,-180 381.61,-165.72 382,-158.07 374.39,-144 366.92,-130.19 354.11,-119.4 340.86,-111.26\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"343.1,-107.94 332.67,-106.05 339.64,-114.03 343.1,-107.94\"/>\n",
+       "</g>\n",
+       "<!-- signups_location -->\n",
+       "<g id=\"node6\" class=\"node\">\n",
+       "<title>signups_location</title>\n",
+       "<ellipse fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" cx=\"509.39\" cy=\"-306\" rx=\"97.93\" ry=\"18\"/>\n",
+       "<text text-anchor=\"middle\" x=\"509.39\" y=\"-300.95\" font-family=\"Times,serif\" font-size=\"14.00\">Input: signups_location</text>\n",
+       "</g>\n",
+       "<!-- signups_location&#45;&gt;signups -->\n",
+       "<g id=\"edge1\" class=\"edge\">\n",
+       "<title>signups_location&#45;&gt;signups</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M509.39,-287.7C509.39,-280.24 509.39,-271.32 509.39,-262.97\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"512.89,-263.1 509.39,-253.1 505.89,-263.1 512.89,-263.1\"/>\n",
+       "</g>\n",
+       "<!-- foobar -->\n",
+       "<g id=\"node8\" class=\"node\">\n",
+       "<title>foobar</title>\n",
+       "<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"937.02,-324 849.77,-324 849.77,-288 937.02,-288 937.02,-324\"/>\n",
+       "<text text-anchor=\"middle\" x=\"893.39\" y=\"-300.95\" font-family=\"Times,serif\" font-size=\"14.00\">Input: foobar</text>\n",
+       "</g>\n",
+       "<!-- spend_mean&#45;&gt;spend_zero_mean -->\n",
+       "<g id=\"edge13\" class=\"edge\">\n",
+       "<title>spend_mean&#45;&gt;spend_zero_mean</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M224.13,-143.7C235.89,-134.75 250.4,-123.71 263.1,-114.05\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"264.71,-116.46 270.54,-107.62 260.47,-110.89 264.71,-116.46\"/>\n",
+       "</g>\n",
+       "<!-- spend_location -->\n",
+       "<g id=\"node10\" class=\"node\">\n",
+       "<title>spend_location</title>\n",
+       "<ellipse fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" cx=\"302.39\" cy=\"-306\" rx=\"91.27\" ry=\"18\"/>\n",
+       "<text text-anchor=\"middle\" x=\"302.39\" y=\"-300.95\" font-family=\"Times,serif\" font-size=\"14.00\">Input: spend_location</text>\n",
+       "</g>\n",
+       "<!-- spend_location&#45;&gt;spend -->\n",
+       "<g id=\"edge6\" class=\"edge\">\n",
+       "<title>spend_location&#45;&gt;spend</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M302.39,-287.7C302.39,-280.24 302.39,-271.32 302.39,-262.97\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"305.89,-263.1 302.39,-253.1 298.89,-263.1 305.89,-263.1\"/>\n",
+       "</g>\n",
+       "<!-- signups_partitions -->\n",
+       "<g id=\"node12\" class=\"node\">\n",
+       "<title>signups_partitions</title>\n",
+       "<ellipse fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" cx=\"728.39\" cy=\"-306\" rx=\"103.04\" ry=\"18\"/>\n",
+       "<text text-anchor=\"middle\" x=\"728.39\" y=\"-300.95\" font-family=\"Times,serif\" font-size=\"14.00\">Input: signups_partitions</text>\n",
+       "</g>\n",
+       "<!-- signups_partitions&#45;&gt;signups -->\n",
+       "<g id=\"edge2\" class=\"edge\">\n",
+       "<title>signups_partitions&#45;&gt;signups</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M680.33,-289.64C640.79,-277 585.46,-259.31 548.57,-247.52\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"549.81,-243.93 539.22,-244.22 547.68,-250.59 549.81,-243.93\"/>\n",
+       "</g>\n",
+       "<!-- spend_zero_mean&#45;&gt;spend_zero_mean_unit_variance -->\n",
+       "<g id=\"edge4\" class=\"edge\">\n",
+       "<title>spend_zero_mean&#45;&gt;spend_zero_mean_unit_variance</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M274.19,-72.41C264.2,-63.78 251.79,-53.05 240.72,-43.48\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"243.43,-40.34 233.58,-36.45 238.86,-45.63 243.43,-40.34\"/>\n",
+       "</g>\n",
+       "</g>\n",
+       "</svg>\n"
+      ],
+      "text/plain": [
+       "<graphviz.graphs.Digraph at 0x11b250490>"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
@@ -308,11 +484,39 @@
     "df = dask_df.compute()\n",
     "\n",
     "# To visualize do `pip install \"sf-hamilton[visualization]\"` if you want these to work\n",
-    "dr.visualize_execution(output_columns, './hello_world_dask', {\"format\": \"png\"})\n",
-    "dr.display_all_functions(\"./my_full_dag.dot\")\n",
+    "dr.visualize_execution(output_columns)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    },
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "   spend  signups  avg_3wk_spend  spend_per_signup  spend_mean  spend_zero_mean_unit_variance           foobar\n",
+      "0     10        1            NaN            10.000   28.333333                      -1.064405  some_other_data\n",
+      "1     10       10            NaN             1.000   28.333333                      -1.064405  some_other_data\n",
+      "2     20       50      13.333333             0.400   28.333333                      -0.483821  some_other_data\n",
+      "3     40      100      23.333333             0.400   28.333333                       0.677349  some_other_data\n",
+      "4     40      200      33.333333             0.200   28.333333                       0.677349  some_other_data\n",
+      "5     50      400      43.333333             0.125   28.333333                       1.257934  some_other_data\n"
+     ]
+    }
+   ],
+   "source": [
     "print(df.to_string())\n",
-    "client.shutdown()\n",
-    "\n"
+    "client.shutdown()"
    ]
   }
  ],