diff --git a/examples/caching/in_memory_tutorial.ipynb b/examples/caching/in_memory_tutorial.ipynb index 605eea902..754c69b18 100644 --- a/examples/caching/in_memory_tutorial.ipynb +++ b/examples/caching/in_memory_tutorial.ipynb @@ -70,9 +70,97 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "%3\n", + "\n", + "\n", + "cluster__legend\n", + "\n", + "Legend\n", + "\n", + "\n", + "\n", + "raw_data\n", + "\n", + "raw_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "processed_data\n", + "\n", + "processed_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "raw_data->processed_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "amount_per_country\n", + "\n", + "amount_per_country\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "processed_data->amount_per_country\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "_processed_data_inputs\n", + "\n", + "cutoff_date\n", + "str\n", + "\n", + "\n", + "\n", + "_processed_data_inputs->processed_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "input\n", + "\n", + "input\n", + "\n", + "\n", + "\n", + "function\n", + "\n", + "function\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "%%cell_to_module dataflow_module --display\n", "import pandas as pd\n", @@ -140,9 +228,112 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "raw_data::adapter::execute_node\n", + "processed_data::adapter::execute_node\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " cities date amount country currency amound_in_usd\n", + "0 New York 2024-09-13 478.23 USA USD 478.2300\n", + "1 Los Angeles 2024-09-12 251.67 USA USD 251.6700\n", + "2 Chicago 2024-09-11 989.34 USA USD 989.3400\n", + "3 Montréal 2024-09-11 742.14 Canada CAD 526.9194\n", + "4 Vancouver 2024-09-09 584.56 Canada CAD 415.0376\n" + ] + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "%3\n", + "\n", + "\n", + "cluster__legend\n", + "\n", + "Legend\n", + "\n", + "\n", + "\n", + "raw_data\n", + "\n", + "raw_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "processed_data\n", + "\n", + "processed_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "raw_data->processed_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "_processed_data_inputs\n", + "\n", + "cutoff_date\n", + "str\n", + "\n", + "\n", + "\n", + "_processed_data_inputs->processed_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "input\n", + "\n", + "input\n", + "\n", + "\n", + "\n", + "function\n", + "\n", + "function\n", + "\n", + "\n", + "\n", + "output\n", + "\n", + "output\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "results = dr.execute([\"processed_data\"], inputs={\"cutoff_date\": \"2024-09-01\"})\n", "print()\n", @@ -160,9 +351,112 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "raw_data::result_store::get_result::hit\n", + "processed_data::result_store::get_result::hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " cities date amount country currency amound_in_usd\n", + "0 New York 2024-09-13 478.23 USA USD 478.2300\n", + "1 Los Angeles 2024-09-12 251.67 USA USD 251.6700\n", + "2 Chicago 2024-09-11 989.34 USA USD 989.3400\n", + "3 Montréal 2024-09-11 742.14 Canada CAD 526.9194\n", + "4 Vancouver 2024-09-09 584.56 Canada CAD 415.0376\n" + ] + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "%3\n", + "\n", + "\n", + "cluster__legend\n", + "\n", + "Legend\n", + "\n", + "\n", + "\n", + "raw_data\n", + "\n", + "raw_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "processed_data\n", + "\n", + "processed_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "raw_data->processed_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "_processed_data_inputs\n", + "\n", + "cutoff_date\n", + "str\n", + "\n", + "\n", + "\n", + "_processed_data_inputs->processed_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "input\n", + "\n", + "input\n", + "\n", + "\n", + "\n", + "output\n", + "\n", + "output\n", + "\n", + "\n", + "\n", + "from cache\n", + "\n", + "from cache\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "results = dr.execute([\"processed_data\"], inputs={\"cutoff_date\": \"2024-09-01\"})\n", "print()\n", @@ -238,9 +532,113 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "raw_data::result_store::get_result::hit\n", + "processed_data::result_store::get_result::hit\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n", + "\n", + " cities date amount country currency amound_in_usd\n", + "0 New York 2024-09-13 478.23 USA USD 478.2300\n", + "1 Los Angeles 2024-09-12 251.67 USA USD 251.6700\n", + "2 Chicago 2024-09-11 989.34 USA USD 989.3400\n", + "3 Montréal 2024-09-11 742.14 Canada CAD 526.9194\n", + "4 Vancouver 2024-09-09 584.56 Canada CAD 415.0376\n" + ] + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "%3\n", + "\n", + "\n", + "cluster__legend\n", + "\n", + "Legend\n", + "\n", + "\n", + "\n", + "raw_data\n", + "\n", + "raw_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "processed_data\n", + "\n", + "processed_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "raw_data->processed_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "_processed_data_inputs\n", + "\n", + "cutoff_date\n", + "str\n", + "\n", + "\n", + "\n", + "_processed_data_inputs->processed_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "input\n", + "\n", + "input\n", + "\n", + "\n", + "\n", + "output\n", + "\n", + "output\n", + "\n", + "\n", + "\n", + "from cache\n", + "\n", + "from cache\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "print(dr.cache.metadata_store.size)\n", "\n", diff --git a/examples/validate_examples.py b/examples/validate_examples.py index 55dae11ef..d85dcbbf7 100644 --- a/examples/validate_examples.py +++ b/examples/validate_examples.py @@ -65,7 +65,7 @@ def validate_notebook(notebook_path: pathlib.Path) -> int: return SUCCESS if first_cell.cell_type != "code": - issues.append("The first cell should be cell to set up the notebook.") + issues.append("The first cell should be a code cell to set up the notebook.") RETURN_VALUE |= FAILURE if "%pip install" not in first_cell.source: @@ -128,17 +128,10 @@ def add_badges_to_title(path: pathlib.Path): if notebook.cells[1].cell_type != "markdown": return - colab_url = f"https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/{path}" - colab_badge = ( - f"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)]({colab_url})" - ) - github_url = f"https://github.com/dagworks-inc/hamilton/blob/main/{path}" - github_badge = f"[![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)]({github_url})" - updated_content = "" for idx, line in enumerate(notebook.cells[1].source.splitlines()): if idx == 0: - updated_content += f"{line} {colab_badge} {github_badge}\n" + updated_content += f"{line} {_create_colab_badge(path)} {_create_github_badge(path)}\n" else: updated_content += f"\n{line}"