forked from catherinedevlin/ipython-sql
-
Notifications
You must be signed in to change notification settings - Fork 77
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
improving duckdb + autopandas performance (#469)
* Using native DuckDB `.df()` method when using `autopandas` * lint * test fixes * adds polars to the benchmark * adds missing unit tests for ResultSet * ResultSet no longer a subclass of list * fix * ci * ci * ci * fix * checking result set with non sqlalchemy * ci * fixes notebook
- Loading branch information
1 parent
ca49215
commit ad625e3
Showing
6 changed files
with
445 additions
and
50 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,248 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"id": "d47c0ad1-d9da-476e-aa51-c0c52e764415", | ||
"metadata": { | ||
"user_expressions": [] | ||
}, | ||
"source": [ | ||
"# Native DuckDB vs JupySQL" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "b53a574b", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"%pip install pandas polars --quiet" | ||
] | ||
}, | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"id": "1b217aea", | ||
"metadata": {}, | ||
"source": [ | ||
"## Pandas\n", | ||
"\n", | ||
"### Native DuckDB" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"id": "4efc1b4d-52ab-482c-a5aa-a237f82f72e4", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"import duckdb\n", | ||
"from pandas import DataFrame\n", | ||
"import numpy as np\n", | ||
"\n", | ||
"num_rows = 1_000_000\n", | ||
"\n", | ||
"df = DataFrame(np.random.randn(num_rows, 20))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"id": "9e5d8f89-9e29-4994-a83f-268befd65ea4", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"86.6 ms ± 1.02 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"with duckdb.connect() as con:\n", | ||
" %timeit con.sql(\"select * from df\").df()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "892bc403-4f35-4489-889e-92eb76bda725", | ||
"metadata": { | ||
"user_expressions": [] | ||
}, | ||
"source": [ | ||
"### JupySQL" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"id": "8dbc3068-2dcf-4937-81f2-2cfbbefbb4e1", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"%load_ext sql\n", | ||
"%sql duckdb://\n", | ||
"%config SqlMagic.displaycon = False\n", | ||
"%config SqlMagic.autopandas = True" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"id": "7780ab33-56e1-44eb-84ce-887fdb96104a", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"90.1 ms ± 1.43 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"%%timeit\n", | ||
"df_ = %sql select * from df" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "1abcf8ab-0542-43fc-9cac-5ea9363e0827", | ||
"metadata": { | ||
"user_expressions": [] | ||
}, | ||
"source": [ | ||
"## Polars\n", | ||
"\n", | ||
"### Native DuckDB" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"id": "2ad79efc-aa17-41b1-a34c-fbed807785d5", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"import polars as pl\n", | ||
"\n", | ||
"df_pl = pl.DataFrame(np.random.randn(num_rows, 20))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"id": "38fed499-28c0-4815-8c40-1e38f5c71e69", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"114 ms ± 701 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"with duckdb.connect() as con:\n", | ||
" %timeit con.sql(\"select * from df_pl\").pl()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "e7fb905f-4a13-4ee4-8150-30ee079e4ec4", | ||
"metadata": { | ||
"user_expressions": [] | ||
}, | ||
"source": [ | ||
"### JupySQL" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 8, | ||
"id": "bf7ab190-9b21-44bd-9329-9aab5b39a8a3", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Disabled 'autopandas' since 'autopolars' was enabled.\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"%config SqlMagic.autopolars = True" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 9, | ||
"id": "53a14724-ffad-461f-aeec-674a59b93a0b", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Done.\n", | ||
"Done.\n", | ||
"Done.\n", | ||
"Done.\n", | ||
"Done.\n", | ||
"Done.\n", | ||
"Done.\n", | ||
"Done.\n", | ||
"3.93 s ± 37.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"%%timeit\n", | ||
"df_ = %sql select * from df_pl" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.10" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.