Skip to content

Commit

Permalink
enable multivalues insert (pandas-dev#19664)
Browse files Browse the repository at this point in the history
  • Loading branch information
danfrankj authored and jreback committed Mar 7, 2018
1 parent f33e84c commit 7c7bd56
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 3 deletions.
6 changes: 6 additions & 0 deletions doc/source/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4711,6 +4711,12 @@ writes ``data`` to the database in batches of 1000 rows at a time:
data.to_sql('data_chunked', engine, chunksize=1000)
.. note::

The function :func:`~pandas.DataFrame.to_sql` will perform a multivalue
insert if the engine dialect ``supports_multivalues_insert``. This will
greatly speed up the insert in some cases.

SQL data types
++++++++++++++

Expand Down
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,8 @@ Other Enhancements
- :func:`DataFrame.replace` now supports the ``method`` parameter, which can be used to specify the replacement method when ``to_replace`` is a scalar, list or tuple and ``value`` is ``None`` (:issue:`19632`)
- :meth:`Timestamp.month_name`, :meth:`DatetimeIndex.month_name`, and :meth:`Series.dt.month_name` are now available (:issue:`12805`)
- :meth:`Timestamp.day_name` and :meth:`DatetimeIndex.day_name` are now available to return day names with a specified locale (:issue:`12806`)
- :meth:`DataFrame.to_sql` now performs a multivalue insert if the underlying connection supports itk rather than inserting row by row.
``SQLAlchemy`` dialects supporting multivalue inserts include: ``mysql``, ``postgresql``, ``sqlite`` and any dialect with ``supports_multivalues_insert``. (:issue:`14315`, :issue:`8953`)

.. _whatsnew_0230.api_breaking:

Expand Down
28 changes: 25 additions & 3 deletions pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -572,8 +572,29 @@ def create(self):
else:
self._execute_create()

def insert_statement(self):
return self.table.insert()
def insert_statement(self, data, conn):
"""
Generate tuple of SQLAlchemy insert statement and any arguments
to be executed by connection (via `_execute_insert`).
Parameters
----------
conn : SQLAlchemy connectable(engine/connection)
Connection to recieve the data
data : list of dict
The data to be inserted
Returns
-------
SQLAlchemy statement
insert statement
*, optional
Additional parameters to be passed when executing insert statement
"""
dialect = getattr(conn, 'dialect', None)
if dialect and getattr(dialect, 'supports_multivalues_insert', False):
return self.table.insert(data),
return self.table.insert(), data

def insert_data(self):
if self.index is not None:
Expand Down Expand Up @@ -612,8 +633,9 @@ def insert_data(self):
return column_names, data_list

def _execute_insert(self, conn, keys, data_iter):
"""Insert data into this table with database connection"""
data = [{k: v for k, v in zip(keys, row)} for row in data_iter]
conn.execute(self.insert_statement(), data)
conn.execute(*self.insert_statement(data, conn))

def insert(self, chunksize=None):
keys, data_list = self.insert_data()
Expand Down
26 changes: 26 additions & 0 deletions pandas/tests/io/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -1665,6 +1665,29 @@ class Temporary(Base):

tm.assert_frame_equal(df, expected)

def test_insert_multivalues(self):
# issues addressed
# https://github.com/pandas-dev/pandas/issues/14315
# https://github.com/pandas-dev/pandas/issues/8953

db = sql.SQLDatabase(self.conn)
df = DataFrame({'A': [1, 0, 0], 'B': [1.1, 0.2, 4.3]})
table = sql.SQLTable("test_table", db, frame=df)
data = [
{'A': 1, 'B': 0.46},
{'A': 0, 'B': -2.06}
]
statement = table.insert_statement(data, conn=self.conn)[0]

if self.supports_multivalues_insert:
assert statement.parameters == data, (
'insert statement should be multivalues'
)
else:
assert statement.parameters is None, (
'insert statement should not be multivalues'
)


class _TestSQLAlchemyConn(_EngineToConnMixin, _TestSQLAlchemy):

Expand All @@ -1679,6 +1702,7 @@ class _TestSQLiteAlchemy(object):
"""
flavor = 'sqlite'
supports_multivalues_insert = True

@classmethod
def connect(cls):
Expand Down Expand Up @@ -1727,6 +1751,7 @@ class _TestMySQLAlchemy(object):
"""
flavor = 'mysql'
supports_multivalues_insert = True

@classmethod
def connect(cls):
Expand Down Expand Up @@ -1796,6 +1821,7 @@ class _TestPostgreSQLAlchemy(object):
"""
flavor = 'postgresql'
supports_multivalues_insert = True

@classmethod
def connect(