diff --git a/examples/pandas/split-apply-combine/README.md b/examples/pandas/split-apply-combine/README.md index 4b81db7ec..216b78ee8 100644 --- a/examples/pandas/split-apply-combine/README.md +++ b/examples/pandas/split-apply-combine/README.md @@ -63,6 +63,16 @@ You can run the example doing: python my_script.py ``` +or running the notebook: + +```bash +# cd examples/pandas/split-apply-combine/ +jupyter notebook # pip install jupyter if you don't have it +``` +Or you can open up the notebook in Colab: + +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/pandas/split-apply-combine/notebook.ipynb) + The expected result is : | Name | Income | Children | Tax Rate | Tax Credit | Tax | Tax Formula | diff --git a/examples/pandas/split-apply-combine/notebook.ipynb b/examples/pandas/split-apply-combine/notebook.ipynb new file mode 100644 index 000000000..2e0c55197 --- /dev/null +++ b/examples/pandas/split-apply-combine/notebook.ipynb @@ -0,0 +1,901 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "fa0e0e81-4182-4479-9d96-7f4e6a2ea702", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install pandas \"sf-hamilton[visualization]\"" + ] + }, + { + "cell_type": "markdown", + "id": "b7a1d7e3-df0d-4b40-a823-11858b2224ea", + "metadata": {}, + "source": [ + "# Run me in google colab\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/pandas/split-apply-combine/notebook.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "2d232bbc-a5c2-4459-b1e5-343e5817f35d", + "metadata": { + "ExecuteTime": { + "end_time": "2024-06-28T16:32:44.995610Z", + "start_time": "2024-06-28T16:32:38.286618Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The hamilton.plugins.jupyter_magic extension is already loaded. To reload it, use:\n", + " %reload_ext hamilton.plugins.jupyter_magic\n" + ] + } + ], + "source": [ + "%load_ext hamilton.plugins.jupyter_magic" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "cbeb58d9-e484-43c3-8c4f-96f66bc07c25", + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "cluster__legend\n", + "\n", + "Legend\n", + "\n", + "\n", + "\n", + "tax\n", + "\n", + "tax\n", + "Series\n", + "\n", + "\n", + "\n", + "final_tax_dataframe\n", + "\n", + "final_tax_dataframe\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "tax->final_tax_dataframe\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Tax Credit\n", + "\n", + "Tax Credit\n", + "Series\n", + "\n", + "\n", + "\n", + "tax_formula\n", + "\n", + "tax_formula\n", + "Series\n", + "\n", + "\n", + "\n", + "Tax Credit->tax_formula\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "over_100k_tax\n", + "\n", + "over_100k_tax\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "combined_dataframe\n", + "\n", + "combined_dataframe\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "over_100k_tax->combined_dataframe\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Income\n", + "\n", + "Income\n", + "Series\n", + "\n", + "\n", + "\n", + "Income->tax_formula\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "split_dataframe\n", + "\n", + "split_dataframe\n", + "dict\n", + "\n", + "\n", + "\n", + "under_100k\n", + "\n", + "under_100k\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "split_dataframe->under_100k\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "over_100k\n", + "\n", + "over_100k\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "split_dataframe->over_100k\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "tax_formula->tax\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "tax_formula->final_tax_dataframe\n", + "\n", + "\n", + "\n", + "\n", + "under_100k_tax\n", + "\n", + "under_100k_tax\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "under_100k_tax->combined_dataframe\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "under_100k_tax.with_tax_credit\n", + "\n", + "under_100k_tax.with_tax_credit\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "under_100k_tax.with_tax_credit->under_100k_tax\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Tax Rate\n", + "\n", + "Tax Rate\n", + "Series\n", + "\n", + "\n", + "\n", + "Tax Rate->tax_formula\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "under_100k_tax.with_tax_rate\n", + "\n", + "under_100k_tax.with_tax_rate\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "under_100k->under_100k_tax.with_tax_rate\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "over_100k_tax.with_tax_rate\n", + "\n", + "over_100k_tax.with_tax_rate\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "over_100k_tax.with_tax_rate->over_100k_tax\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "under_100k_tax.with_tax_rate->under_100k_tax.with_tax_credit\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "over_100k->over_100k_tax.with_tax_rate\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "combined_dataframe->Tax Credit\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "combined_dataframe->Income\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "combined_dataframe->final_tax_dataframe\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "combined_dataframe->Tax Rate\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "_split_dataframe_inputs\n", + "\n", + "input\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "_split_dataframe_inputs->split_dataframe\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "_under_100k_tax.with_tax_credit_inputs\n", + "\n", + "tax_credits\n", + "dict\n", + "\n", + "\n", + "\n", + "_under_100k_tax.with_tax_credit_inputs->under_100k_tax.with_tax_credit\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "_over_100k_tax.with_tax_rate_inputs\n", + "\n", + "tax_rates\n", + "dict\n", + "\n", + "\n", + "\n", + "_over_100k_tax.with_tax_rate_inputs->over_100k_tax.with_tax_rate\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "_under_100k_tax.with_tax_rate_inputs\n", + "\n", + "tax_rates\n", + "dict\n", + "\n", + "\n", + "\n", + "_under_100k_tax.with_tax_rate_inputs->under_100k_tax.with_tax_rate\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "input\n", + "\n", + "input\n", + "\n", + "\n", + "\n", + "function\n", + "\n", + "function\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%%cell_to_module my_functions --display\n", + "\n", + "from typing import Dict\n", + "\n", + "import numpy as np\n", + "import pandas\n", + "import pandas as pd\n", + "from pandas import DataFrame, Series\n", + "\n", + "from hamilton.function_modifiers import extract_columns, extract_fields, inject, pipe, source, step\n", + "\n", + "# ----------------------------------------------------------------------------------------------------------------------\n", + "# Tax calculation private functions\n", + "# ----------------------------------------------------------------------------------------------------------------------\n", + "\n", + "\n", + "def _tax_rate(df: DataFrame, tax_rates: Dict[str, float]) -> DataFrame:\n", + " \"\"\"\n", + " Add a series 'Tax Rate' to the DataFrame based on the tax_rates rules.\n", + " :param df: The DataFrame\n", + " :param tax_rates: Tax rates rules\n", + " :return: the DataFrame with the 'Tax Rate' Series\n", + " \"\"\"\n", + " output = DataFrame()\n", + " for tax_rate_formula, tax_rate in tax_rates.items():\n", + " selected = df.query(tax_rate_formula)\n", + " if selected.empty:\n", + " continue\n", + " tmp = DataFrame({\"Tax Rate\": tax_rate}, index=selected.index)\n", + " output = pd.concat([output, tmp], axis=0)\n", + " df = pd.concat([df, output], axis=1)\n", + " return df\n", + "\n", + "\n", + "def _tax_credit(df: DataFrame, tax_credits: Dict[str, float]) -> DataFrame:\n", + " \"\"\"\n", + " Add a series 'Tax Credit' to the DataFrame based on the tax_credits rules.\n", + " :param df: The DataFrame\n", + " :param tax_credits: Tax credits rules\n", + " :return: the DataFrame with the 'Tax Credit' Series\n", + " \"\"\"\n", + " output = DataFrame()\n", + " for tax_credit_formula, tax_credit in tax_credits.items():\n", + " selected = df.query(tax_credit_formula)\n", + " if selected.empty:\n", + " continue\n", + " tmp = DataFrame({\"Tax Credit\": tax_credit}, index=selected.index)\n", + " output = pd.concat([output, tmp], axis=0)\n", + " df = pd.concat([df, output], axis=1)\n", + " return df\n", + "\n", + "\n", + "# ----------------------------------------------------------------------------------------------------------------------\n", + "# DataFlow: The functions defined below are displayed in the order of execution\n", + "# ----------------------------------------------------------------------------------------------------------------------\n", + "\n", + "\n", + "@extract_fields({\"under_100k\": DataFrame, \"over_100k\": DataFrame})\n", + "# Step 1: DataFrame is split in 2 DataFrames\n", + "def split_dataframe(input: DataFrame) -> Dict[str, DataFrame]:\n", + " \"\"\"\n", + " That function takes the DataFrame in input and split it in 2 DataFrames:\n", + " - under_100k: Rows where 'Income' is under 100k\n", + " - over_100k: Rows where 'Income' is over 100k\n", + "\n", + " :param input: the DataFrame to process\n", + " :return: a Dict with the DataFrames and the Tax Rates & Credit rules\n", + " \"\"\"\n", + " return {\n", + " \"under_100k\": input.query(\"Income < 100000\"),\n", + " \"over_100k\": input.query(\"Income >= 100000\"),\n", + " }\n", + "\n", + "\n", + "@pipe(\n", + " step(_tax_rate, tax_rates=source(\"tax_rates\")), # apply the _tax_rate step\n", + " step(_tax_credit, tax_credits=source(\"tax_credits\")), # apply the _tax_credit step\n", + ")\n", + "# Step 2: DataFrame for Income under 100k applies a tax calculation pipeline\n", + "def under_100k_tax(under_100k: DataFrame) -> DataFrame:\n", + " \"\"\"\n", + " Tax calculation pipeline for 'Income' under 100k.\n", + " :param under_100k: The DataFrame where 'Income' is under 100k\n", + " :return: the DataFrame with the 'Tax' Series\n", + " \"\"\"\n", + " return under_100k\n", + "\n", + "\n", + "@pipe(\n", + " step(_tax_rate, tax_rates=source(\"tax_rates\")), # apply the _tax_rate step\n", + ")\n", + "# Step 2: DataFrame for Income over 100k applies a tax calculation pipeline\n", + "def over_100k_tax(over_100k: DataFrame) -> DataFrame:\n", + " \"\"\"\n", + " Tax calculation pipeline for 'Income' over 100k.\n", + " :param over_100k: The DataFrame where 'Income' is over 100k\n", + " :return: the DataFrame with the 'Tax' Series\n", + " \"\"\"\n", + " return over_100k\n", + "\n", + "\n", + "@extract_columns(\"Income\", \"Tax Rate\", \"Tax Credit\")\n", + "# Step 3: DataFrames are combined. Series 'Income', 'Tax Rate', 'Tax Credit' are extracted for next processing step\n", + "def combined_dataframe(under_100k_tax: DataFrame, over_100k_tax: DataFrame) -> DataFrame:\n", + " \"\"\"\n", + " That function combine the DataFrames under_100k_tax and over_100k_tax\n", + "\n", + " The @extract_columns decorator is making the Series available for processing.\n", + " \"\"\"\n", + " combined = pd.concat([under_100k_tax, over_100k_tax], axis=0).sort_index()\n", + " return combined\n", + "\n", + "\n", + "# We use @inject decorator here because we have spaces in the names of columns.\n", + "# If column names are valid python variable names we wouldn't need this.\n", + "@inject(income=source(\"Income\"), tax_rate=source(\"Tax Rate\"), tax_credit=source(\"Tax Credit\"))\n", + "# Step 4: 'Tax Formula' is calculated from 'Income', 'Tax Rate' and 'Tax Credit' series\n", + "def tax_formula(income: Series, tax_rate: Series, tax_credit: Series) -> Series:\n", + " \"\"\"\n", + " Return a DataFrame with a series 'Tax Formula' from 'Income', 'Tax Rate' and 'Tax Credit' series.\n", + "\n", + " :param income: the 'Income' series\n", + " :param tax_rate: the 'Tax Rate' series\n", + " :param tax_credit: the 'Tax Credit' series\n", + "\n", + " :return: the DataFrame with the 'Tax Formula' Series\n", + " \"\"\"\n", + " df = DataFrame({\"income\": income, \"tax_rate\": tax_rate, \"tax_credit\": tax_credit})\n", + " df[\"Tax Formula\"] = df.apply(\n", + " lambda x: (\n", + " f\"({int(x['income'])} * {x['tax_rate']})\"\n", + " if np.isnan(x[\"tax_credit\"])\n", + " else f\"({int(x['income'])} * {x['tax_rate']}) - ({int(x['income'])} * {x['tax_rate']}) * {x['tax_credit']}\"\n", + " ),\n", + " axis=1,\n", + " )\n", + " return df[\"Tax Formula\"]\n", + "\n", + "\n", + "# Step 5: 'Tax' is calculated from 'Tax Formula' series\n", + "def tax(tax_formula: Series) -> Series:\n", + " \"\"\"\n", + " Return a series 'Tax' from 'Tax Formula' series.\n", + " :param tax_formula: the 'Tax Formula' series.\n", + " :return: the 'Tax Formula' Series\n", + " \"\"\"\n", + " df = tax_formula.to_frame()\n", + " df[\"Tax\"] = df[\"Tax Formula\"].apply(lambda x: round(pandas.eval(x)))\n", + " return df[\"Tax\"]\n", + "\n", + "\n", + "# Step 6 (final): DataFrame and Series computed are combined\n", + "def final_tax_dataframe(\n", + " combined_dataframe: DataFrame, tax_formula: Series, tax: Series\n", + ") -> DataFrame:\n", + " \"\"\"\n", + " That function combine the DataFrame and the 'Tax' and 'Tax Formula' series\n", + " \"\"\"\n", + " df = combined_dataframe.copy(deep=True)\n", + "\n", + " # Set the 'Tax' and 'Tax Formula' series\n", + " df[\"Tax Formula\"] = tax_formula\n", + " df[\"Tax\"] = tax\n", + "\n", + " # Transform the 'Tax Rate' and 'Tax Credit' series to display percentage\n", + " df[\"Tax Rate\"] = df[\"Tax Rate\"].apply(lambda x: f\"{int(x * 100)} %\")\n", + " df[\"Tax Credit\"] = df[\"Tax Credit\"].apply(\n", + " lambda x: f\"{int(x * 100)} %\" if not np.isnan(x) else \"\"\n", + " )\n", + "\n", + " # Define the order the DataFrame will be displayed\n", + " order = [\"Name\", \"Income\", \"Children\", \"Tax Rate\", \"Tax Credit\", \"Tax\", \"Tax Formula\"]\n", + "\n", + " return df.reindex(columns=order)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "6b14bdc3-1aac-43f5-aa8c-50b2990c8969", + "metadata": {}, + "outputs": [], + "source": [ + "from typing import Dict\n", + "\n", + "# import my_functions # this is imported by the cell above\n", + "from pandas import DataFrame\n", + "\n", + "from hamilton import base, driver, lifecycle\n", + "\n", + "## if using the Hamilton UI do `pip install sf-hamilton[ui,sdk]`\n", + "# from hamilton_sdk import adapters\n", + "\n", + "# tracker = adapters.HamiltonTracker(\n", + "# project_id=4, # modify this as needed\n", + "# username=\"elijah@dagworks.io\",\n", + "# dag_name=\"split-apply-combine\",\n", + "# tags={\"environment\": \"DEV\", \"team\": \"MY_TEAM\", \"version\": \"1\"}\n", + "# )\n", + "\n", + "driver = (\n", + " driver.Builder()\n", + " .with_config({})\n", + " .with_modules(my_functions)\n", + " .with_adapters(\n", + " # tracker, # add tracker if you have the UI set up.\n", + " # this is a strict type checker for the input and output of each function.\n", + " lifecycle.FunctionInputOutputTypeChecker(),\n", + " # this will make execute return a pandas dataframe as a result\n", + " base.PandasDataFrameResult(),\n", + " \n", + " )\n", + " .build()\n", + ")\n", + "\n", + "\n", + "class TaxCalculator:\n", + " \"\"\"\n", + " Simple class to wrap Hamilton Driver\n", + " \"\"\"\n", + "\n", + " @staticmethod\n", + " def calculate(\n", + " input: DataFrame, tax_rates: Dict[str, float], tax_credits: Dict[str, float]\n", + " ) -> DataFrame:\n", + " return driver.execute(\n", + " inputs={\"input\": input, \"tax_rates\": tax_rates, \"tax_credits\": tax_credits},\n", + " final_vars=[\"final_tax_dataframe\"],\n", + " )\n", + "\n", + " @staticmethod\n", + " def visualize():\n", + " # To visualize do `pip install \"sf-hamilton[visualization]\"` if you want these to work\n", + " return driver.display_all_functions()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "a148497c-22b5-4f09-b088-0278ad252762", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Name Income Children Tax Rate Tax Credit Tax Tax Formula\n", + "0 John 75600 2 20 % 4 % 14515 (75600 * 0.2) - (75600 * 0.2) * 0.04\n", + "1 Bob 34000 1 15 % 2 % 4998 (34000 * 0.15) - (34000 * 0.15) * 0.02\n", + "2 Chloe 111500 3 22 % 24530 (111500 * 0.22)\n", + "3 Thomas 234546 1 28 % 65673 (234546 * 0.28)\n", + "4 Ellis 144865 2 25 % 36216 (144865 * 0.25)\n", + "5 Deane 138500 4 25 % 34625 (138500 * 0.25)\n", + "6 Mariella 69412 5 18 % 10 % 11245 (69412 * 0.18) - (69412 * 0.18) * 0.1\n", + "7 Carlos 65535 0 18 % 0 % 11796 (65535 * 0.18) - (65535 * 0.18) * 0.0\n", + "8 Toney 43642 3 15 % 6 % 6154 (43642 * 0.15) - (43642 * 0.15) * 0.06\n", + "9 Ramiro 117850 2 22 % 25927 (117850 * 0.22)\n" + ] + } + ], + "source": [ + "from inspect import cleandoc\n", + "from io import StringIO\n", + "\n", + "import pandas as pd\n", + "from pandas import DataFrame\n", + "\n", + "\n", + "def read_table(table: str, delimiter=\"|\") -> DataFrame:\n", + " \"\"\"\n", + " Read table from string and return pandas DataFrame.\n", + " \"\"\"\n", + " df = pd.read_table(StringIO(cleandoc(table)), delimiter=delimiter)\n", + " df = df.loc[:, ~df.columns.str.match(\"Unnamed\")]\n", + " df.columns = df.columns.str.strip()\n", + " return df\n", + "\n", + "\n", + "# ----------------------------------------------------------------------------------------------------------------------\n", + "# The Data to process\n", + "# ----------------------------------------------------------------------------------------------------------------------\n", + "input = read_table(\n", + " \"\"\"\n", + " | Name | Income | Children |\n", + " | John | 75600 | 2 |\n", + " | Bob | 34000 | 1 |\n", + " | Chloe | 111500 | 3 |\n", + " | Thomas | 234546 | 1 |\n", + " | Ellis | 144865 | 2 |\n", + " | Deane | 138500 | 4 |\n", + " | Mariella | 69412 | 5 |\n", + " | Carlos | 65535 | 0 |\n", + " | Toney | 43642 | 3 |\n", + " | Ramiro | 117850 | 2 |\n", + " \"\"\"\n", + ")\n", + "\n", + "# ----------------------------------------------------------------------------------------------------------------------\n", + "# Tax Rate & Credit rules\n", + "# ----------------------------------------------------------------------------------------------------------------------\n", + "tax_rates = {\n", + " \"Income < 50000\": 0.15, # < 50k: Tax rate is 15 %\n", + " \"Income > 50000 and Income < 70000\": 0.18, # 50k to 70k: Tax rate is 18 %\n", + " \"Income > 70000 and Income < 100000\": 0.2, # 70k to 100k: Tax rate is 20 %\n", + " \"Income > 100000 and Income < 120000\": 0.22, # 100k to 120k: Tax rate is 22 %\n", + " \"Income > 120000 and Income < 150000\": 0.25, # 120k to 150k: Tax rate is 25 %\n", + " \"Income > 150000\": 0.28, # over 150k: Tax rate is 28 %\n", + "}\n", + "\n", + "tax_credits = {\n", + " \"Children == 0\": 0.0, # 0 child: Tax credit 0 %\n", + " \"Children == 1\": 0.02, # 1 child: Tax credit 2 %\n", + " \"Children == 2\": 0.04, # 2 children: Tax credit 4 %\n", + " \"Children == 3\": 0.06, # 3 children: Tax credit 6 %\n", + " \"Children == 4\": 0.08, # 4 children: Tax credit 8 %\n", + " \"Children > 4\": 0.1, # over 4 children: Tax credit 10 %\n", + "}\n", + "\n", + "# ----------------------------------------------------------------------------------------------------------------------\n", + "# Run the Tax Calculator\n", + "# ----------------------------------------------------------------------------------------------------------------------\n", + "\n", + "# Calculate the taxes\n", + "output = TaxCalculator.calculate(input, tax_rates, tax_credits)\n", + "print(output.to_string())" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "886427e2-b23c-4b49-b510-d9788a5bf1fe", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameIncomeChildrenTax RateTax CreditTaxTax Formula
0John75600220 %4 %14515(75600 * 0.2) - (75600 * 0.2) * 0.04
1Bob34000115 %2 %4998(34000 * 0.15) - (34000 * 0.15) * 0.02
2Chloe111500322 %24530(111500 * 0.22)
3Thomas234546128 %65673(234546 * 0.28)
4Ellis144865225 %36216(144865 * 0.25)
5Deane138500425 %34625(138500 * 0.25)
6Mariella69412518 %10 %11245(69412 * 0.18) - (69412 * 0.18) * 0.1
7Carlos65535018 %0 %11796(65535 * 0.18) - (65535 * 0.18) * 0.0
8Toney43642315 %6 %6154(43642 * 0.15) - (43642 * 0.15) * 0.06
9Ramiro117850222 %25927(117850 * 0.22)
\n", + "
" + ], + "text/plain": [ + " Name Income Children Tax Rate Tax Credit Tax \\\n", + "0 John 75600 2 20 % 4 % 14515 \n", + "1 Bob 34000 1 15 % 2 % 4998 \n", + "2 Chloe 111500 3 22 % 24530 \n", + "3 Thomas 234546 1 28 % 65673 \n", + "4 Ellis 144865 2 25 % 36216 \n", + "5 Deane 138500 4 25 % 34625 \n", + "6 Mariella 69412 5 18 % 10 % 11245 \n", + "7 Carlos 65535 0 18 % 0 % 11796 \n", + "8 Toney 43642 3 15 % 6 % 6154 \n", + "9 Ramiro 117850 2 22 % 25927 \n", + "\n", + " Tax Formula \n", + "0 (75600 * 0.2) - (75600 * 0.2) * 0.04 \n", + "1 (34000 * 0.15) - (34000 * 0.15) * 0.02 \n", + "2 (111500 * 0.22) \n", + "3 (234546 * 0.28) \n", + "4 (144865 * 0.25) \n", + "5 (138500 * 0.25) \n", + "6 (69412 * 0.18) - (69412 * 0.18) * 0.1 \n", + "7 (65535 * 0.18) - (65535 * 0.18) * 0.0 \n", + "8 (43642 * 0.15) - (43642 * 0.15) * 0.06 \n", + "9 (117850 * 0.22) " + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "driver.execute(\n", + " inputs={\"input\": input, \"tax_rates\": tax_rates, \"tax_credits\": tax_credits},\n", + " final_vars=[\"final_tax_dataframe\"],\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fdf9969d-7470-4bba-a3f2-b0cd7ed26022", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/pandas/split-apply-combine/requirements.txt b/examples/pandas/split-apply-combine/requirements.txt new file mode 100644 index 000000000..78380e35c --- /dev/null +++ b/examples/pandas/split-apply-combine/requirements.txt @@ -0,0 +1,2 @@ +pandas +sf-hamilton[visualization]