diff --git a/examples/model_examples/modular_example/notebook.ipynb b/examples/model_examples/modular_example/notebook.ipynb index 57b6cdde8..aae090fb6 100644 --- a/examples/model_examples/modular_example/notebook.ipynb +++ b/examples/model_examples/modular_example/notebook.ipynb @@ -40,6 +40,16 @@ "source": "%load_ext hamilton.plugins.jupyter_magic", "id": "initial_id" }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "# Define features module\n", + "\n", + "This is the common data preprocessing step." + ], + "id": "29ebd0ec7fc5b800" + }, { "metadata": { "ExecuteTime": { @@ -74,6 +84,16 @@ ], "execution_count": 2 }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "# Define train module\n", + "\n", + "This is the training bit of the dataflow." + ], + "id": "ee170ce894848eae" + }, { "metadata": { "ExecuteTime": { @@ -126,6 +146,16 @@ ], "execution_count": 3 }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "# Define the inference module\n", + "\n", + "This houses what we need for inference." + ], + "id": "8cae5e1a9c682ea5" + }, { "metadata": { "ExecuteTime": { @@ -159,6 +189,103 @@ ], "execution_count": 4 }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "# We can combine the modules independently with different drivers\n", + "\n", + "But this won't provide us with a single dataflow or DAG." + ], + "id": "3a1a0d9aca3944b1" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2024-12-07T18:08:40.538779Z", + "start_time": "2024-12-07T18:08:39.642181Z" + } + }, + "cell_type": "code", + "source": [ + "# train\n", + "from hamilton import driver\n", + "\n", + "train_dr = (\n", + " driver.Builder()\n", + " .with_config({\"model\": \"RandomForest\", \"model_params\": {\"n_estimators\": 100}})\n", + " .with_modules(features, train, inference)\n", + " .build()\n", + ")\n", + "train_dr.display_all_functions()" + ], + "id": "9ac29701bdd31fb5", + "outputs": [ + { + "data": { + "image/svg+xml": "\n\n\n\n\n\n\n\ncluster__legend\n\nLegend\n\n\n\nmodel\n\n\n\nmodel\nRandomForest\n\n\n\nmodel_params\n\n\n\nmodel_params\n{'n_estimators': 100}\n\n\n\npredicted_data\n\npredicted_data\nDataFrame\n\n\n\nbase_model\n\nbase_model: model\ntyping.Any\n\n\n\nfit_model\n\nfit_model\ntyping.Any\n\n\n\nbase_model->fit_model\n\n\n\n\n\ntransformed_data\n\ntransformed_data\nDataFrame\n\n\n\ntransformed_data->predicted_data\n\n\n\n\n\ntransformed_data->fit_model\n\n\n\n\n\nfit_model->predicted_data\n\n\n\n\n\nraw_data\n\nraw_data\nDataFrame\n\n\n\nraw_data->transformed_data\n\n\n\n\n\n_base_model_inputs\n\nmodel_params\ndict\n\n\n\n_base_model_inputs->base_model\n\n\n\n\n\n_raw_data_inputs\n\npath\nstr\n\n\n\n_raw_data_inputs->raw_data\n\n\n\n\n\nconfig\n\n\n\nconfig\n\n\n\ninput\n\ninput\n\n\n\nfunction\n\nfunction\n\n\n\n", + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 9 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2024-12-07T18:09:13.265102Z", + "start_time": "2024-12-07T18:09:12.750662Z" + } + }, + "cell_type": "code", + "source": [ + "# Inference\n", + "from hamilton import driver\n", + "\n", + "inference_dr = (\n", + " driver.Builder()\n", + " .with_config({})\n", + " .with_modules(features, inference)\n", + " .build()\n", + ")\n", + "inference_dr.display_all_functions()" + ], + "id": "cc9401ed081df22f", + "outputs": [ + { + "data": { + "image/svg+xml": "\n\n\n\n\n\n\n\ncluster__legend\n\nLegend\n\n\n\npredicted_data\n\npredicted_data\nDataFrame\n\n\n\ntransformed_data\n\ntransformed_data\nDataFrame\n\n\n\ntransformed_data->predicted_data\n\n\n\n\n\nraw_data\n\nraw_data\nDataFrame\n\n\n\nraw_data->transformed_data\n\n\n\n\n\n_predicted_data_inputs\n\nfit_model\ntyping.Any\n\n\n\n_predicted_data_inputs->predicted_data\n\n\n\n\n\n_raw_data_inputs\n\npath\nstr\n\n\n\n_raw_data_inputs->raw_data\n\n\n\n\n\ninput\n\ninput\n\n\n\nfunction\n\nfunction\n\n\n\n", + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 10 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "# To combine into a single dataflow we can use @subdag\n", + "\n", + "So if we want a single pipeline that enables us to:\n", + "\n", + "1. train the model & get training set predictions.\n", + "2. then use the fit model to predict on a separate dataset.\n", + "\n", + "To do that we define another module that uses the `@subdag` constructs that we wire together." + ], + "id": "d85c51388733ce96" + }, { "metadata": { "ExecuteTime": { @@ -187,7 +314,8 @@ " \"path\": source(\"path\"),\n", " \"model_params\": source(\"model_params\"),\n", " },\n", - " # config={\n", + " # there are several ways to pass in configuration.\n", + " # config={ \n", " # \"model\": source(\"model\")\n", " # },\n", ")\n",