diff --git a/examples/plotly/README.md b/examples/plotly/README.md new file mode 100644 index 000000000..99bf30044 --- /dev/null +++ b/examples/plotly/README.md @@ -0,0 +1,45 @@ +# Plotly materializer extension + +By importing `hamilton.plugins.plotly_extensions`, you can register two additional materializers for Plotly figures. The `to.plotly()` creates static image files ([docs](https://plotly.com/python/static-image-export/)) and the `to.html()` outputs interactive HTML files ([docs](https://plotly.com/python/interactive-html-export/)). + +## How to +You need to install `plotly` (low-level API) to annotate your function with `plotly.graph_objects.Figure` even if you are using `plotly_express` (high-level API) to generate figures. +```python +# 1. define a function returning a `plotly.graph_objects.Figure` in a python module. +def confusion_matrix(...) -> plotly.graph_objects.Figure: + return plotly.express.imshow(...) + +# 2. import the module and create the Hamilton driver +dr = ( + driver.Builder() + .with_config({...}) + .with_modules(MODULE_NAME) + .build() +) + +# 3. define the materializers +from hamilton.io.materialization import to + +materializers = [ + to.plotly( + dependencies=["confusion_matrix_figure"], + id="confusion_matrix_png", + path="./static.png", + ), + to.html( + dependencies=["confusion_matrix_figure"], + id="confusion_matrix_html", + path="./interactive.html", + ), +] + +# 4. materialize figures +dr.materialize(*materializers) +``` + +## Notes +Here are a few things to consider when using the plotly materializers: +- Any plotly figure is a subclass of `plotly.graph_objects.Figure`, including anything from `plotly.express`, `plotly.graph_objects`, `plotly.figure_factory`. +- `to.plotly()` supports all filetypes of the plotly rendering engine (PNG, SVG, etc.). The output type will be automatically inferred from the `path` value passed to the materializer. Or, you can specify the file type explicitly as `kwarg`. +- `to.html()` outputs an interactive HTML file. These files will be at least ~3Mb each since they include they bundle the plotly JS library. You can reduce that by using the `include_plotlyjs` `kwarg`. Read more about it in the documentation at `https://plotly.com/python/interactive-html-export/` +- `to.html()` will include the data that's being visually displayed, including what's part of the tooltips, which can grow filesize quickly. diff --git a/examples/plotly/interactive.html b/examples/plotly/interactive.html new file mode 100644 index 000000000..efc2c6028 --- /dev/null +++ b/examples/plotly/interactive.html @@ -0,0 +1,14 @@ + + + +
+
+ + diff --git a/examples/plotly/model_training.py b/examples/plotly/model_training.py new file mode 100644 index 000000000..5d148c9df --- /dev/null +++ b/examples/plotly/model_training.py @@ -0,0 +1,135 @@ +from typing import Dict + +import numpy as np +import plotly.express as px +import plotly.graph_objects as go +from sklearn import base, datasets, linear_model, metrics, svm, utils +from sklearn.model_selection import train_test_split + +from hamilton import function_modifiers + + +@function_modifiers.config.when(data_loader="iris") +def data__iris() -> utils.Bunch: + return datasets.load_digits() + + +@function_modifiers.config.when(data_loader="digits") +def data__digits() -> utils.Bunch: + return datasets.load_digits() + + +def target(data: utils.Bunch) -> np.ndarray: + return data.target + + +def target_names(data: utils.Bunch) -> np.ndarray: + return data.target_names + + +def feature_matrix(data: utils.Bunch) -> np.ndarray: + return data.data + + +@function_modifiers.config.when(clf="svm") +def prefit_clf__svm(gamma: float = 0.001) -> base.ClassifierMixin: + """Returns an unfitted SVM classifier object. + + :param gamma: ... + :return: + """ + return svm.SVC(gamma=gamma) + + +@function_modifiers.config.when(clf="logistic") +def prefit_clf__logreg(penalty: str) -> base.ClassifierMixin: + """Returns an unfitted Logistic Regression classifier object. + + :param penalty: + :return: + """ + return linear_model.LogisticRegression(penalty) + + +@function_modifiers.extract_fields( + {"X_train": np.ndarray, "X_test": np.ndarray, "y_train": np.ndarray, "y_test": np.ndarray} +) +def train_test_split_func( + feature_matrix: np.ndarray, + target: np.ndarray, + test_size_fraction: float, + shuffle_train_test_split: bool, +) -> Dict[str, np.ndarray]: + """Function that creates the training & test splits. + + It this then extracted out into constituent components and used downstream. + + :param feature_matrix: + :param target: + :param test_size_fraction: + :param shuffle_train_test_split: + :return: + """ + X_train, X_test, y_train, y_test = train_test_split( + feature_matrix, target, test_size=test_size_fraction, shuffle=shuffle_train_test_split + ) + return {"X_train": X_train, "X_test": X_test, "y_train": y_train, "y_test": y_test} + + +def y_test_with_labels(y_test: np.ndarray, target_names: np.ndarray) -> np.ndarray: + """Adds labels to the target output.""" + return np.array([target_names[idx] for idx in y_test]) + + +def fit_clf( + prefit_clf: base.ClassifierMixin, X_train: np.ndarray, y_train: np.ndarray +) -> base.ClassifierMixin: + """Calls fit on the classifier object; it mutates it.""" + prefit_clf.fit(X_train, y_train) + return prefit_clf + + +def predicted_output(fit_clf: base.ClassifierMixin, X_test: np.ndarray) -> np.ndarray: + """Exercised the fit classifier to perform a prediction.""" + return fit_clf.predict(X_test) + + +def predicted_output_with_labels( + predicted_output: np.ndarray, target_names: np.ndarray +) -> np.ndarray: + """Replaces the predictions with the desired labels.""" + return np.array([target_names[idx] for idx in predicted_output]) + + +def classification_report( + predicted_output_with_labels: np.ndarray, y_test_with_labels: np.ndarray +) -> str: + """Returns a classification report.""" + return metrics.classification_report(y_test_with_labels, predicted_output_with_labels) + + +def confusion_matrix( + predicted_output_with_labels: np.ndarray, y_test_with_labels: np.ndarray +) -> np.ndarray: + """Returns a confusion matrix report.""" + return metrics.confusion_matrix(y_test_with_labels, predicted_output_with_labels) + + +def confusion_matrix_figure(confusion_matrix: np.ndarray, target_names: np.ndarray) -> go.Figure: + """Create a plotly interactive heatmap of the confusion matrix""" + class_indices = np.arange(len(target_names)) + return px.imshow( + confusion_matrix, + x=class_indices, + y=class_indices, + labels=dict( + x="Predicted labels", + y="True labels", + color="Count", + ), + ) + + +def model_parameters(fit_clf: base.ClassifierMixin) -> dict: + """Returns a dictionary of model parameters.""" + return fit_clf.get_params() diff --git a/examples/plotly/notebook.ipynb b/examples/plotly/notebook.ipynb new file mode 100644 index 000000000..955ff36f4 --- /dev/null +++ b/examples/plotly/notebook.ipynb @@ -0,0 +1,150 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "7bf6a40d", + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-20T06:21:27.406089Z", + "start_time": "2023-11-20T06:21:25.198718Z" + } + }, + "outputs": [], + "source": [ + "import model_training\n", + "\n", + "from hamilton import driver\n", + "from hamilton.io.materialization import to" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "7a449245", + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-20T06:21:27.443007Z", + "start_time": "2023-11-20T06:21:27.440097Z" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Note: Hamilton collects completely anonymous data about usage. This will help us improve Hamilton over time. See https://github.com/dagworks-inc/hamilton#usage-analytics--data-privacy for details.\n" + ] + } + ], + "source": [ + "dag_config = {\n", + " \"test_size_fraction\": 0.95,\n", + " \"shuffle_train_test_split\": True,\n", + " \"data_loader\" : \"digits\",\n", + " \"clf\" : \"svm\",\n", + " \"penalty\" : \"l2\"\n", + "}\n", + "dr = (\n", + " driver.Builder()\n", + " .with_config(dag_config)\n", + " .with_modules(model_training)\n", + " .build()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "397b09bc", + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-20T06:21:27.907847Z", + "start_time": "2023-11-20T06:21:27.440712Z" + } + }, + "outputs": [ + { + "data": { + "image/svg+xml": "\n\n\n\n\n\n\n\ncluster__legend\n\nLegend\n\n\n\ny_train\n\ny_train\nndarray\n\n\n\nfit_clf\n\nfit_clf\nClassifierMixin\n\n\n\ny_train->fit_clf\n\n\n\n\n\ny_test\n\ny_test\nndarray\n\n\n\ny_test_with_labels\n\ny_test_with_labels\nndarray\n\n\n\ny_test->y_test_with_labels\n\n\n\n\n\nconfusion_matrix_png\n\n\nconfusion_matrix_png\nPlotlyStaticWriter\n\n\n\nprefit_clf\n\nprefit_clf\nClassifierMixin\n\n\n\nprefit_clf->fit_clf\n\n\n\n\n\nconfusion_matrix_html\n\n\nconfusion_matrix_html\nPlotlyInteractiveWriter\n\n\n\npredicted_output\n\npredicted_output\nndarray\n\n\n\nfit_clf->predicted_output\n\n\n\n\n\ntrain_test_split_func\n\ntrain_test_split_func\ndict\n\n\n\ntrain_test_split_func->y_train\n\n\n\n\n\ntrain_test_split_func->y_test\n\n\n\n\n\nX_test\n\nX_test\nndarray\n\n\n\ntrain_test_split_func->X_test\n\n\n\n\n\nX_train\n\nX_train\nndarray\n\n\n\ntrain_test_split_func->X_train\n\n\n\n\n\ndata\n\ndata\nBunch\n\n\n\nfeature_matrix\n\nfeature_matrix\nndarray\n\n\n\ndata->feature_matrix\n\n\n\n\n\ntarget\n\ntarget\nndarray\n\n\n\ndata->target\n\n\n\n\n\ntarget_names\n\ntarget_names\nndarray\n\n\n\ndata->target_names\n\n\n\n\n\nconfusion_matrix\n\nconfusion_matrix\nndarray\n\n\n\ny_test_with_labels->confusion_matrix\n\n\n\n\n\nfeature_matrix->train_test_split_func\n\n\n\n\n\nconfusion_matrix_figure\n\nconfusion_matrix_figure\nFigure\n\n\n\nconfusion_matrix->confusion_matrix_figure\n\n\n\n\n\npredicted_output_with_labels\n\npredicted_output_with_labels\nndarray\n\n\n\npredicted_output_with_labels->confusion_matrix\n\n\n\n\n\npredicted_output->predicted_output_with_labels\n\n\n\n\n\ntarget->train_test_split_func\n\n\n\n\n\ntarget_names->y_test_with_labels\n\n\n\n\n\ntarget_names->predicted_output_with_labels\n\n\n\n\n\ntarget_names->confusion_matrix_figure\n\n\n\n\n\nconfusion_matrix_figure->confusion_matrix_png\n\n\n\n\n\nconfusion_matrix_figure->confusion_matrix_html\n\n\n\n\n\nX_test->predicted_output\n\n\n\n\n\nX_train->fit_clf\n\n\n\n\n\n_prefit_clf_inputs\n\ngamma\nfloat\n\n\n\n_prefit_clf_inputs->prefit_clf\n\n\n\n\n\n_train_test_split_func_inputs\n\nshuffle_train_test_split\nbool\ntest_size_fraction\nfloat\n\n\n\n_train_test_split_func_inputs->train_test_split_func\n\n\n\n\n\ninput\n\ninput\n\n\n\nfunction\n\nfunction\n\n\n\noutput\n\noutput\n\n\n\nmaterializer\n\n\nmaterializer\n\n\n\n", + "text/plain": "" + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "materializers = [\n", + " to.plotly(\n", + " dependencies=[\"confusion_matrix_figure\"],\n", + " id=\"confusion_matrix_png\",\n", + " path=\"./static.png\",\n", + " ),\n", + " to.html(\n", + " dependencies=[\"confusion_matrix_figure\"],\n", + " id=\"confusion_matrix_html\",\n", + " path=\"./interactive.html\",\n", + " ),\n", + " ]\n", + "\n", + "dr.visualize_materialization(*materializers)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "7ee3a7b2", + "metadata": { + "ExecuteTime": { + "end_time": "2023-11-20T06:21:29.472853Z", + "start_time": "2023-11-20T06:21:27.912356Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": "({'confusion_matrix_png': {'size': 29058,\n 'path': './static.png',\n 'last_modified': 1700461289.1922433,\n 'timestamp': 1700490089.192551},\n 'confusion_matrix_html': {'size': 3607064,\n 'path': './interactive.html',\n 'last_modified': 1700461289.2231884,\n 'timestamp': 1700490089.425375}},\n {})" + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dr.materialize(*materializers)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false + }, + "id": "6760ac6885343fb8" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/plotly/requirements.txt b/examples/plotly/requirements.txt new file mode 100644 index 000000000..84b849067 --- /dev/null +++ b/examples/plotly/requirements.txt @@ -0,0 +1,2 @@ +plotly +sf-hamilton[visualization] diff --git a/examples/plotly/static.png b/examples/plotly/static.png new file mode 100644 index 000000000..d4aef9b45 Binary files /dev/null and b/examples/plotly/static.png differ