diff --git a/experimentation/Diabetes Ridge Regression Scoring.ipynb b/experimentation/Diabetes Ridge Regression Scoring.ipynb new file mode 100644 index 00000000..9ac340ed --- /dev/null +++ b/experimentation/Diabetes Ridge Regression Scoring.ipynb @@ -0,0 +1,114 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Score Data with a Ridge Regression Model Trained on the Diabetes Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook loads the model trained in the Diabetes Ridge Regression Training notebook, prepares the data, and scores the data." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import numpy\n", + "from azureml.core.model import Model\n", + "import joblib" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Model" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "model_path = Model.get_model_path(model_name=\"sklearn_regression_model.pkl\")\n", + "model = joblib.load(model_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prepare Data" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "raw_data = '{\"data\":[[1,2,3,4,5,6,7,8,9,10],[10,9,8,7,6,5,4,3,2,1]]}'\n", + "\n", + "data = json.loads(raw_data)[\"data\"]\n", + "data = numpy.array(data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Score Data" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test result: {'result': [5113.099642122813, 3713.6329271385353]}\n" + ] + } + ], + "source": [ + "request_headers = {}\n", + "\n", + "result = model.predict(data)\n", + "print(\"Test result: \", {\"result\": result.tolist()})" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (storedna)", + "language": "python", + "name": "storedna" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/experimentation/Diabetes Ridge Regression Training.ipynb b/experimentation/Diabetes Ridge Regression Training.ipynb new file mode 100644 index 00000000..7ae84e38 --- /dev/null +++ b/experimentation/Diabetes Ridge Regression Training.ipynb @@ -0,0 +1,171 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Train a Ridge Regression Model on the Diabetes Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook loads the Diabetes dataset from sklearn, splits the data into training and validation sets, trains a Ridge regression model, validates the model on the validation set, and saves the model." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.datasets import load_diabetes\n", + "from sklearn.linear_model import Ridge\n", + "from sklearn.metrics import mean_squared_error\n", + "from sklearn.model_selection import train_test_split\n", + "import joblib" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "X, y = load_diabetes(return_X_y=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Split Data into Training and Validation Sets" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n", + "data = {\"train\": {\"X\": X_train, \"y\": y_train},\n", + " \"test\": {\"X\": X_test, \"y\": y_test}}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train Model on Training Set" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Ridge(alpha=0.5, copy_X=True, fit_intercept=True, max_iter=None,\n", + " normalize=False, random_state=None, solver='auto', tol=0.001)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "alpha = 0.5\n", + "\n", + "reg = Ridge(alpha=alpha)\n", + "reg.fit(data[\"train\"][\"X\"], data[\"train\"][\"y\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Validate Model on Validation Set" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "mse: 3298.9096058070622\n" + ] + } + ], + "source": [ + "preds = reg.predict(data[\"test\"][\"X\"])\n", + "print(\"mse: \", mean_squared_error(preds, y_test))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Save Model" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['sklearn_regression_model.pkl']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_name = \"sklearn_regression_model.pkl\"\n", + "\n", + "joblib.dump(value=reg, filename=model_name)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (storedna)", + "language": "python", + "name": "storedna" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}