diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 006ab5c49e24c..6e352c52cd60e 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -5651,7 +5651,7 @@ the database using :func:`~pandas.DataFrame.to_sql`. data = pd.DataFrame(d, columns=c) data - data.to_sql("data", engine) + data.to_sql("data", con=engine) With some databases, writing large DataFrames can result in errors due to packet size limitations being exceeded. This can be avoided by setting the @@ -5660,7 +5660,7 @@ writes ``data`` to the database in batches of 1000 rows at a time: .. ipython:: python - data.to_sql("data_chunked", engine, chunksize=1000) + data.to_sql("data_chunked", con=engine, chunksize=1000) SQL data types ++++++++++++++ @@ -5680,7 +5680,7 @@ default ``Text`` type for string columns: from sqlalchemy.types import String - data.to_sql("data_dtype", engine, dtype={"Col_1": String}) + data.to_sql("data_dtype", con=engine, dtype={"Col_1": String}) .. note:: @@ -5849,7 +5849,7 @@ have schema's). For example: .. code-block:: python - df.to_sql("table", engine, schema="other_schema") + df.to_sql(name="table", con=engine, schema="other_schema") pd.read_sql_table("table", engine, schema="other_schema") Querying @@ -5876,7 +5876,7 @@ Specifying this will return an iterator through chunks of the query result: .. ipython:: python df = pd.DataFrame(np.random.randn(20, 3), columns=list("abc")) - df.to_sql("data_chunks", engine, index=False) + df.to_sql(name="data_chunks", con=engine, index=False) .. ipython:: python diff --git a/doc/source/whatsnew/v0.14.0.rst b/doc/source/whatsnew/v0.14.0.rst index f33ab3911f231..92c37243b7e81 100644 --- a/doc/source/whatsnew/v0.14.0.rst +++ b/doc/source/whatsnew/v0.14.0.rst @@ -437,7 +437,7 @@ This ``engine`` can then be used to write or read data to/from this database: .. ipython:: python df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'c']}) - df.to_sql('db_table', engine, index=False) + df.to_sql(name='db_table', con=engine, index=False) You can read data from a database by specifying the table name: diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 5cafaa5759a5b..db58d7ca67619 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -220,6 +220,7 @@ Other enhancements - :meth:`DataFrame.to_parquet` and :func:`read_parquet` will now write and read ``attrs`` respectively (:issue:`54346`) - Added support for the DataFrame Consortium Standard (:issue:`54383`) - Performance improvement in :meth:`GroupBy.quantile` (:issue:`51722`) +- .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: @@ -560,6 +561,7 @@ Other Deprecations - Deprecated the use of non-supported datetime64 and timedelta64 resolutions with :func:`pandas.array`. Supported resolutions are: "s", "ms", "us", "ns" resolutions (:issue:`53058`) - Deprecated values "pad", "ffill", "bfill", "backfill" for :meth:`Series.interpolate` and :meth:`DataFrame.interpolate`, use ``obj.ffill()`` or ``obj.bfill()`` instead (:issue:`53581`) - Deprecated the behavior of :meth:`Index.argmax`, :meth:`Index.argmin`, :meth:`Series.argmax`, :meth:`Series.argmin` with either all-NAs and skipna=True or any-NAs and skipna=False returning -1; in a future version this will raise ``ValueError`` (:issue:`33941`, :issue:`33942`) +- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_sql` except ``name``. (:issue:`54229`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8a3a105749800..106dfffa40e3b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -97,7 +97,10 @@ SettingWithCopyWarning, _chained_assignment_method_msg, ) -from pandas.util._decorators import doc +from pandas.util._decorators import ( + deprecate_nonkeyword_arguments, + doc, +) from pandas.util._exceptions import find_stack_level from pandas.util._validators import ( check_dtype_backend, @@ -2792,6 +2795,9 @@ def to_hdf( ) @final + @deprecate_nonkeyword_arguments( + version="3.0", allowed_args=["self", "name"], name="to_sql" + ) def to_sql( self, name: str, @@ -2911,7 +2917,7 @@ def to_sql( 1 User 2 2 User 3 - >>> df.to_sql('users', con=engine) + >>> df.to_sql(name='users', con=engine) 3 >>> from sqlalchemy import text >>> with engine.connect() as conn: @@ -2922,14 +2928,14 @@ def to_sql( >>> with engine.begin() as connection: ... df1 = pd.DataFrame({'name' : ['User 4', 'User 5']}) - ... df1.to_sql('users', con=connection, if_exists='append') + ... df1.to_sql(name='users', con=connection, if_exists='append') 2 This is allowed to support operations that require that the same DBAPI connection is used for the entire operation. >>> df2 = pd.DataFrame({'name' : ['User 6', 'User 7']}) - >>> df2.to_sql('users', con=engine, if_exists='append') + >>> df2.to_sql(name='users', con=engine, if_exists='append') 2 >>> with engine.connect() as conn: ... conn.execute(text("SELECT * FROM users")).fetchall() @@ -2939,7 +2945,7 @@ def to_sql( Overwrite the table with just ``df2``. - >>> df2.to_sql('users', con=engine, if_exists='replace', + >>> df2.to_sql(name='users', con=engine, if_exists='replace', ... index_label='id') 2 >>> with engine.connect() as conn: @@ -2956,7 +2962,7 @@ def to_sql( ... stmt = insert(table.table).values(data).on_conflict_do_nothing(index_elements=["a"]) ... result = conn.execute(stmt) ... return result.rowcount - >>> df_conflict.to_sql("conflict_table", conn, if_exists="append", method=insert_on_conflict_nothing) # doctest: +SKIP + >>> df_conflict.to_sql(name="conflict_table", con=conn, if_exists="append", method=insert_on_conflict_nothing) # doctest: +SKIP 0 For MySQL, a callable to update columns ``b`` and ``c`` if there's a conflict @@ -2973,7 +2979,7 @@ def to_sql( ... stmt = stmt.on_duplicate_key_update(b=stmt.inserted.b, c=stmt.inserted.c) ... result = conn.execute(stmt) ... return result.rowcount - >>> df_conflict.to_sql("conflict_table", conn, if_exists="append", method=insert_on_conflict_update) # doctest: +SKIP + >>> df_conflict.to_sql(name="conflict_table", con=conn, if_exists="append", method=insert_on_conflict_update) # doctest: +SKIP 2 Specify the dtype (especially useful for integers with missing values). @@ -2989,7 +2995,7 @@ def to_sql( 2 2.0 >>> from sqlalchemy.types import Integer - >>> df.to_sql('integers', con=engine, index=False, + >>> df.to_sql(name='integers', con=engine, index=False, ... dtype={"A": Integer()}) 3 diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 2cf9d144eb91c..7669d5aa4cea5 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -621,7 +621,7 @@ def read_sql( >>> conn = connect(':memory:') >>> df = pd.DataFrame(data=[[0, '10/11/12'], [1, '12/11/10']], ... columns=['int_column', 'date_column']) - >>> df.to_sql('test_data', conn) + >>> df.to_sql(name='test_data', con=conn) 2 >>> pd.read_sql('SELECT int_column, date_column FROM test_data', conn) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 8160249764272..0b98bcc4d4bec 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -548,7 +548,7 @@ def sqlite_buildin_iris(sqlite_buildin, iris_path): def test_dataframe_to_sql(conn, test_frame1, request): # GH 51086 if conn is sqlite_engine conn = request.getfixturevalue(conn) - test_frame1.to_sql("test", conn, if_exists="append", index=False) + test_frame1.to_sql(name="test", con=conn, if_exists="append", index=False) @pytest.mark.db @@ -569,7 +569,7 @@ def test_dataframe_to_sql_arrow_dtypes(conn, request): ) conn = request.getfixturevalue(conn) with tm.assert_produces_warning(UserWarning, match="the 'timedelta'"): - df.to_sql("test_arrow", conn, if_exists="replace", index=False) + df.to_sql(name="test_arrow", con=conn, if_exists="replace", index=False) @pytest.mark.db @@ -585,7 +585,7 @@ def test_dataframe_to_sql_arrow_dtypes_missing(conn, request, nulls_fixture): } ) conn = request.getfixturevalue(conn) - df.to_sql("test_arrow", conn, if_exists="replace", index=False) + df.to_sql(name="test_arrow", con=conn, if_exists="replace", index=False) @pytest.mark.db @@ -756,7 +756,7 @@ def test_read_procedure(conn, request): from sqlalchemy.engine import Engine df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) - df.to_sql("test_frame", conn, index=False) + df.to_sql(name="test_frame", con=conn, index=False) proc = """DROP PROCEDURE IF EXISTS get_testdb; @@ -811,7 +811,7 @@ def psql_insert_copy(table, conn, keys, data_iter): conn = request.getfixturevalue(conn) expected = DataFrame({"col1": [1, 2], "col2": [0.1, 0.2], "col3": ["a", "n"]}) result_count = expected.to_sql( - "test_frame", conn, index=False, method=psql_insert_copy + name="test_frame", con=conn, index=False, method=psql_insert_copy ) # GH 46891 if expected_count is None: @@ -860,12 +860,14 @@ def insert_on_conflict(table, conn, keys, data_iter): conn.execute(create_sql) expected = DataFrame([[1, 2.1, "a"]], columns=list("abc")) - expected.to_sql("test_insert_conflict", conn, if_exists="append", index=False) + expected.to_sql( + name="test_insert_conflict", con=conn, if_exists="append", index=False + ) df_insert = DataFrame([[1, 3.2, "b"]], columns=list("abc")) inserted = df_insert.to_sql( - "test_insert_conflict", - conn, + name="test_insert_conflict", + con=conn, index=False, if_exists="append", method=insert_on_conflict, @@ -914,12 +916,12 @@ def insert_on_conflict(table, conn, keys, data_iter): conn.execute(create_sql) df = DataFrame([[1, 2.1, "a"]], columns=list("abc")) - df.to_sql("test_insert_conflict", conn, if_exists="append", index=False) + df.to_sql(name="test_insert_conflict", con=conn, if_exists="append", index=False) expected = DataFrame([[1, 3.2, "b"]], columns=list("abc")) inserted = expected.to_sql( - "test_insert_conflict", - conn, + name="test_insert_conflict", + con=conn, index=False, if_exists="append", method=insert_on_conflict, @@ -1439,7 +1441,7 @@ def test_timedelta(self): # see #6921 df = to_timedelta(Series(["00:00:01", "00:00:03"], name="foo")).to_frame() with tm.assert_produces_warning(UserWarning): - result_count = df.to_sql("test_timedelta", self.conn) + result_count = df.to_sql(name="test_timedelta", con=self.conn) assert result_count == 2 result = sql.read_sql_query("SELECT * FROM test_timedelta", self.conn) tm.assert_series_equal(result["foo"], df["foo"].view("int64")) @@ -1448,7 +1450,7 @@ def test_complex_raises(self): df = DataFrame({"a": [1 + 1j, 2j]}) msg = "Complex datatypes not supported" with pytest.raises(ValueError, match=msg): - assert df.to_sql("test_complex", self.conn) is None + assert df.to_sql("test_complex", con=self.conn) is None @pytest.mark.parametrize( "index_name,index_label,expected", @@ -1539,7 +1541,7 @@ def test_multiindex_roundtrip(self): index=["A", "B"], ) - df.to_sql("test_multiindex_roundtrip", self.conn) + df.to_sql(name="test_multiindex_roundtrip", con=self.conn) result = sql.read_sql_query( "SELECT * FROM test_multiindex_roundtrip", self.conn, index_col=["A", "B"] ) @@ -1557,7 +1559,7 @@ def test_multiindex_roundtrip(self): def test_dtype_argument(self, dtype): # GH10285 Add dtype argument to read_sql_query df = DataFrame([[1.2, 3.4], [5.6, 7.8]], columns=["A", "B"]) - assert df.to_sql("test_dtype_argument", self.conn) == 2 + assert df.to_sql(name="test_dtype_argument", con=self.conn) == 2 expected = df.astype(dtype) result = sql.read_sql_query( @@ -1609,7 +1611,7 @@ def test_chunksize_read(self): df = DataFrame( np.random.default_rng(2).standard_normal((22, 5)), columns=list("abcde") ) - df.to_sql("test_chunksize", self.conn, index=False) + df.to_sql(name="test_chunksize", con=self.conn, index=False) # reading the query in one time res1 = sql.read_sql_query("select * from test_chunksize", self.conn) @@ -1653,7 +1655,7 @@ def test_categorical(self): df2 = df.copy() df2["person_name"] = df2["person_name"].astype("category") - df2.to_sql("test_categorical", self.conn, index=False) + df2.to_sql(name="test_categorical", con=self.conn, index=False) res = sql.read_sql_query("SELECT * FROM test_categorical", self.conn) tm.assert_frame_equal(res, df) @@ -1661,12 +1663,12 @@ def test_categorical(self): def test_unicode_column_name(self): # GH 11431 df = DataFrame([[1, 2], [3, 4]], columns=["\xe9", "b"]) - df.to_sql("test_unicode", self.conn, index=False) + df.to_sql(name="test_unicode", con=self.conn, index=False) def test_escaped_table_name(self): # GH 13206 df = DataFrame({"A": [0, 1, 2], "B": [0.2, np.nan, 5.6]}) - df.to_sql("d1187b08-4943-4c8d-a7f6", self.conn, index=False) + df.to_sql(name="d1187b08-4943-4c8d-a7f6", con=self.conn, index=False) res = sql.read_sql_query("SELECT * FROM `d1187b08-4943-4c8d-a7f6`", self.conn) @@ -1675,7 +1677,7 @@ def test_escaped_table_name(self): def test_read_sql_duplicate_columns(self): # GH#53117 df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": 1}) - df.to_sql("test_table", self.conn, index=False) + df.to_sql(name="test_table", con=self.conn, index=False) result = pd.read_sql("SELECT a, b, a +1 as a, c FROM test_table;", self.conn) expected = DataFrame( @@ -1771,7 +1773,7 @@ def test_warning_case_insensitive_table_name(self, test_frame1): # Test that the warning is certainly NOT triggered in a normal case. with tm.assert_produces_warning(None): - test_frame1.to_sql("CaseSensitive", self.conn) + test_frame1.to_sql(name="CaseSensitive", con=self.conn) def _get_index_columns(self, tbl_name): from sqlalchemy.engine import reflection @@ -1840,7 +1842,7 @@ def test_database_uri_string(self, test_frame1): with tm.ensure_clean() as name: db_uri = "sqlite:///" + name table = "iris" - test_frame1.to_sql(table, db_uri, if_exists="replace", index=False) + test_frame1.to_sql(name=table, con=db_uri, if_exists="replace", index=False) test_frame2 = sql.read_sql(table, db_uri) test_frame3 = sql.read_sql_table(table, db_uri) query = "SELECT * FROM iris" @@ -1882,7 +1884,7 @@ def test_query_by_select_obj(self): def test_column_with_percentage(self): # GH 37157 df = DataFrame({"A": [0, 1, 2], "%_variation": [3, 4, 5]}) - df.to_sql("test_column_percentage", self.conn, index=False) + df.to_sql(name="test_column_percentage", con=self.conn, index=False) res = sql.read_sql_table("test_column_percentage", self.conn) @@ -2094,7 +2096,7 @@ def test_default_type_conversion(self): def test_bigint(self): # int64 should be converted to BigInteger, GH7433 df = DataFrame(data={"i64": [2**62]}) - assert df.to_sql("test_bigint", self.conn, index=False) == 1 + assert df.to_sql(name="test_bigint", con=self.conn, index=False) == 1 result = sql.read_sql_table("test_bigint", self.conn) tm.assert_frame_equal(df, result) @@ -2193,7 +2195,7 @@ def test_datetime_with_timezone_roundtrip(self): expected = DataFrame( {"A": date_range("2013-01-01 09:00:00", periods=3, tz="US/Pacific")} ) - assert expected.to_sql("test_datetime_tz", self.conn, index=False) == 3 + assert expected.to_sql(name="test_datetime_tz", con=self.conn, index=False) == 3 if self.flavor == "postgresql": # SQLAlchemy "timezones" (i.e. offsets) are coerced to UTC @@ -2215,7 +2217,7 @@ def test_datetime_with_timezone_roundtrip(self): def test_out_of_bounds_datetime(self): # GH 26761 data = DataFrame({"date": datetime(9999, 1, 1)}, index=[0]) - assert data.to_sql("test_datetime_obb", self.conn, index=False) == 1 + assert data.to_sql(name="test_datetime_obb", con=self.conn, index=False) == 1 result = sql.read_sql_table("test_datetime_obb", self.conn) expected = DataFrame([pd.NaT], columns=["date"]) tm.assert_frame_equal(result, expected) @@ -2225,7 +2227,10 @@ def test_naive_datetimeindex_roundtrip(self): # Ensure that a naive DatetimeIndex isn't converted to UTC dates = date_range("2018-01-01", periods=5, freq="6H")._with_freq(None) expected = DataFrame({"nums": range(5)}, index=dates) - assert expected.to_sql("foo_table", self.conn, index_label="info_date") == 5 + assert ( + expected.to_sql(name="foo_table", con=self.conn, index_label="info_date") + == 5 + ) result = sql.read_sql_table("foo_table", self.conn, index_col="info_date") # result index with gain a name from a set_index operation; expected tm.assert_frame_equal(result, expected, check_names=False) @@ -2266,7 +2271,7 @@ def test_datetime(self): df = DataFrame( {"A": date_range("2013-01-01 09:00:00", periods=3), "B": np.arange(3.0)} ) - assert df.to_sql("test_datetime", self.conn) == 3 + assert df.to_sql(name="test_datetime", con=self.conn) == 3 # with read_table -> type information from schema used result = sql.read_sql_table("test_datetime", self.conn) @@ -2288,7 +2293,7 @@ def test_datetime_NaT(self): {"A": date_range("2013-01-01 09:00:00", periods=3), "B": np.arange(3.0)} ) df.loc[1, "A"] = np.nan - assert df.to_sql("test_datetime", self.conn, index=False) == 3 + assert df.to_sql(name="test_datetime", con=self.conn, index=False) == 3 # with read_table -> type information from schema used result = sql.read_sql_table("test_datetime", self.conn) @@ -2306,7 +2311,7 @@ def test_datetime_NaT(self): def test_datetime_date(self): # test support for datetime.date df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"]) - assert df.to_sql("test_date", self.conn, index=False) == 2 + assert df.to_sql(name="test_date", con=self.conn, index=False) == 2 res = read_sql_table("test_date", self.conn) result = res["a"] expected = to_datetime(df["a"]) @@ -2316,7 +2321,7 @@ def test_datetime_date(self): def test_datetime_time(self, sqlite_buildin): # test support for datetime.time df = DataFrame([time(9, 0, 0), time(9, 1, 30)], columns=["a"]) - assert df.to_sql("test_time", self.conn, index=False) == 2 + assert df.to_sql(name="test_time", con=self.conn, index=False) == 2 res = read_sql_table("test_time", self.conn) tm.assert_frame_equal(res, df) @@ -2343,7 +2348,7 @@ def test_mixed_dtype_insert(self): df = DataFrame({"s1": s1, "s2": s2}) # write and read again - assert df.to_sql("test_read_write", self.conn, index=False) == 1 + assert df.to_sql(name="test_read_write", con=self.conn, index=False) == 1 df2 = sql.read_sql_table("test_read_write", self.conn) tm.assert_frame_equal(df, df2, check_dtype=False, check_exact=True) @@ -2351,7 +2356,7 @@ def test_mixed_dtype_insert(self): def test_nan_numeric(self): # NaNs in numeric float column df = DataFrame({"A": [0, 1, 2], "B": [0.2, np.nan, 5.6]}) - assert df.to_sql("test_nan", self.conn, index=False) == 3 + assert df.to_sql(name="test_nan", con=self.conn, index=False) == 3 # with read_table result = sql.read_sql_table("test_nan", self.conn) @@ -2364,7 +2369,7 @@ def test_nan_numeric(self): def test_nan_fullcolumn(self): # full NaN column (numeric float column) df = DataFrame({"A": [0, 1, 2], "B": [np.nan, np.nan, np.nan]}) - assert df.to_sql("test_nan", self.conn, index=False) == 3 + assert df.to_sql(name="test_nan", con=self.conn, index=False) == 3 # with read_table result = sql.read_sql_table("test_nan", self.conn) @@ -2379,7 +2384,7 @@ def test_nan_fullcolumn(self): def test_nan_string(self): # NaNs in string column df = DataFrame({"A": [0, 1, 2], "B": ["a", "b", np.nan]}) - assert df.to_sql("test_nan", self.conn, index=False) == 3 + assert df.to_sql(name="test_nan", con=self.conn, index=False) == 3 # NaNs are coming back as None df.loc[2, "B"] = None @@ -2441,25 +2446,27 @@ def test_dtype(self): cols = ["A", "B"] data = [(0.8, True), (0.9, None)] df = DataFrame(data, columns=cols) - assert df.to_sql("dtype_test", self.conn) == 2 - assert df.to_sql("dtype_test2", self.conn, dtype={"B": TEXT}) == 2 + assert df.to_sql(name="dtype_test", con=self.conn) == 2 + assert df.to_sql(name="dtype_test2", con=self.conn, dtype={"B": TEXT}) == 2 meta = MetaData() meta.reflect(bind=self.conn) sqltype = meta.tables["dtype_test2"].columns["B"].type assert isinstance(sqltype, TEXT) msg = "The type of B is not a SQLAlchemy type" with pytest.raises(ValueError, match=msg): - df.to_sql("error", self.conn, dtype={"B": str}) + df.to_sql(name="error", con=self.conn, dtype={"B": str}) # GH9083 - assert df.to_sql("dtype_test3", self.conn, dtype={"B": String(10)}) == 2 + assert ( + df.to_sql(name="dtype_test3", con=self.conn, dtype={"B": String(10)}) == 2 + ) meta.reflect(bind=self.conn) sqltype = meta.tables["dtype_test3"].columns["B"].type assert isinstance(sqltype, String) assert sqltype.length == 10 # single dtype - assert df.to_sql("single_dtype_test", self.conn, dtype=TEXT) == 2 + assert df.to_sql(name="single_dtype_test", con=self.conn, dtype=TEXT) == 2 meta.reflect(bind=self.conn) sqltypea = meta.tables["single_dtype_test"].columns["A"].type sqltypeb = meta.tables["single_dtype_test"].columns["B"].type @@ -2484,7 +2491,7 @@ def test_notna_dtype(self): df = DataFrame(cols) tbl = "notna_dtype_test" - assert df.to_sql(tbl, self.conn) == 2 + assert df.to_sql(name=tbl, con=self.conn) == 2 _ = sql.read_sql_table(tbl, self.conn) meta = MetaData() meta.reflect(bind=self.conn) @@ -2517,8 +2524,8 @@ def test_double_precision(self): assert ( df.to_sql( - "test_dtypes", - self.conn, + name="test_dtypes", + con=self.conn, index=False, if_exists="replace", dtype={"f64_as_f32": Float(precision=23)}, @@ -2567,7 +2574,9 @@ def main(connectable): test_connectable(connectable) assert ( - DataFrame({"test_foo_data": [0, 1, 2]}).to_sql("test_foo_data", self.conn) + DataFrame({"test_foo_data": [0, 1, 2]}).to_sql( + name="test_foo_data", con=self.conn + ) == 3 ) main(self.conn) @@ -2597,9 +2606,9 @@ def test_to_sql_with_negative_npinf(self, input, request): msg = "inf cannot be used with MySQL" with pytest.raises(ValueError, match=msg): - df.to_sql("foobar", self.conn, index=False) + df.to_sql(name="foobar", con=self.conn, index=False) else: - assert df.to_sql("foobar", self.conn, index=False) == 1 + assert df.to_sql(name="foobar", con=self.conn, index=False) == 1 res = sql.read_sql_table("foobar", self.conn) tm.assert_equal(df, res) @@ -2672,7 +2681,7 @@ def test_read_sql_dtype_backend(self, string_storage, func, dtype_backend): # GH#50048 table = "test" df = self.dtype_backend_data() - df.to_sql(table, self.conn, index=False, if_exists="replace") + df.to_sql(name=table, con=self.conn, index=False, if_exists="replace") with pd.option_context("mode.string_storage", string_storage): result = getattr(pd, func)( @@ -2684,7 +2693,7 @@ def test_read_sql_dtype_backend(self, string_storage, func, dtype_backend): with pd.option_context("mode.string_storage", string_storage): iterator = getattr(pd, func)( f"Select * from {table}", - self.conn, + con=self.conn, dtype_backend=dtype_backend, chunksize=3, ) @@ -2697,7 +2706,7 @@ def test_read_sql_dtype_backend_table(self, string_storage, func, dtype_backend) # GH#50048 table = "test" df = self.dtype_backend_data() - df.to_sql(table, self.conn, index=False, if_exists="replace") + df.to_sql(name=table, con=self.conn, index=False, if_exists="replace") with pd.option_context("mode.string_storage", string_storage): result = getattr(pd, func)(table, self.conn, dtype_backend=dtype_backend) @@ -2719,7 +2728,7 @@ def test_read_sql_dtype_backend_table(self, string_storage, func, dtype_backend) def test_read_sql_invalid_dtype_backend_table(self, func): table = "test" df = self.dtype_backend_data() - df.to_sql(table, self.conn, index=False, if_exists="replace") + df.to_sql(name=table, con=self.conn, index=False, if_exists="replace") msg = ( "dtype_backend numpy is invalid, only 'numpy_nullable' and " @@ -2784,7 +2793,7 @@ def test_chunksize_empty_dtypes(self): dtypes = {"a": "int64", "b": "object"} df = DataFrame(columns=["a", "b"]).astype(dtypes) expected = df.copy() - df.to_sql("test", self.conn, index=False, if_exists="replace") + df.to_sql(name="test", con=self.conn, index=False, if_exists="replace") for result in read_sql_query( "SELECT * FROM test", @@ -2800,7 +2809,7 @@ def test_read_sql_dtype(self, func, dtype_backend): # GH#50797 table = "test" df = DataFrame({"a": [1, 2, 3], "b": 5}) - df.to_sql(table, self.conn, index=False, if_exists="replace") + df.to_sql(name=table, con=self.conn, index=False, if_exists="replace") result = getattr(pd, func)( f"Select * from {table}", @@ -2837,6 +2846,17 @@ def setup_driver(cls): # sqlite3 is built-in cls.driver = None + def test_keyword_deprecation(self): + # GH 54397 + msg = ( + "tarting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) + df = DataFrame([{"A": 1, "B": 2, "C": 3}, {"A": 1, "B": 2, "C": 3}]) + + with tm.assert_produces_warning(FutureWarning, match=msg): + df.to_sql("example", self.conn) + def test_default_type_conversion(self): df = sql.read_sql_table("types", self.conn) @@ -2861,7 +2881,7 @@ def test_default_date_load(self): def test_bigint_warning(self): # test no warning for BIGINT (to support int64) is raised (GH7433) df = DataFrame({"a": [1, 2]}, dtype="int64") - assert df.to_sql("test_bigintwarning", self.conn, index=False) == 2 + assert df.to_sql(name="test_bigintwarning", con=self.conn, index=False) == 2 with tm.assert_produces_warning(None): sql.read_sql_table("test_bigintwarning", self.conn) @@ -2899,7 +2919,9 @@ class Test(BaseModel): with Session() as session: df = DataFrame({"id": [0, 1], "string_column": ["hello", "world"]}) assert ( - df.to_sql("test_frame", con=self.conn, index=False, if_exists="replace") + df.to_sql( + name="test_frame", con=self.conn, index=False, if_exists="replace" + ) == 2 ) session.commit() @@ -2927,7 +2949,7 @@ def test_read_sql_string_inference(self): pa = pytest.importorskip("pyarrow") table = "test" df = DataFrame({"a": ["x", "y"]}) - df.to_sql(table, self.conn, index=False, if_exists="replace") + df.to_sql(table, con=self.conn, index=False, if_exists="replace") with pd.option_context("future.infer_string", True): result = read_sql_table(table, self.conn) @@ -3010,15 +3032,21 @@ def test_schema_support(self): self.conn.exec_driver_sql("CREATE SCHEMA other;") # write dataframe to different schema's - assert df.to_sql("test_schema_public", self.conn, index=False) == 2 + assert df.to_sql(name="test_schema_public", con=self.conn, index=False) == 2 assert ( df.to_sql( - "test_schema_public_explicit", self.conn, index=False, schema="public" + name="test_schema_public_explicit", + con=self.conn, + index=False, + schema="public", ) == 2 ) assert ( - df.to_sql("test_schema_other", self.conn, index=False, schema="other") == 2 + df.to_sql( + name="test_schema_other", con=self.conn, index=False, schema="other" + ) + == 2 ) # read dataframes back in @@ -3045,19 +3073,22 @@ def test_schema_support(self): # write dataframe with different if_exists options assert ( - df.to_sql("test_schema_other", self.conn, schema="other", index=False) == 2 + df.to_sql( + name="test_schema_other", con=self.conn, schema="other", index=False + ) + == 2 ) df.to_sql( - "test_schema_other", - self.conn, + name="test_schema_other", + con=self.conn, schema="other", index=False, if_exists="replace", ) assert ( df.to_sql( - "test_schema_other", - self.conn, + name="test_schema_other", + con=self.conn, schema="other", index=False, if_exists="append", @@ -3176,7 +3207,7 @@ def test_execute_sql(self): def test_datetime_date(self): # test support for datetime.date df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"]) - assert df.to_sql("test_date", self.conn, index=False) == 2 + assert df.to_sql(name="test_date", con=self.conn, index=False) == 2 res = read_sql_query("SELECT * FROM test_date", self.conn) if self.flavor == "sqlite": # comes back as strings @@ -3195,7 +3226,7 @@ def test_datetime_time(self, tz_aware): df = DataFrame(tz_times, columns=["a"]) - assert df.to_sql("test_time", self.conn, index=False) == 2 + assert df.to_sql(name="test_time", con=self.conn, index=False) == 2 res = read_sql_query("SELECT * FROM test_time", self.conn) if self.flavor == "sqlite": # comes back as strings @@ -3233,8 +3264,8 @@ def test_dtype(self): cols = ["A", "B"] data = [(0.8, True), (0.9, None)] df = DataFrame(data, columns=cols) - assert df.to_sql("dtype_test", self.conn) == 2 - assert df.to_sql("dtype_test2", self.conn, dtype={"B": "STRING"}) == 2 + assert df.to_sql(name="dtype_test", con=self.conn) == 2 + assert df.to_sql(name="dtype_test2", con=self.conn, dtype={"B": "STRING"}) == 2 # sqlite stores Boolean values as INTEGER assert self._get_sqlite_column_type("dtype_test", "B") == "INTEGER" @@ -3242,10 +3273,10 @@ def test_dtype(self): assert self._get_sqlite_column_type("dtype_test2", "B") == "STRING" msg = r"B \(\) not a string" with pytest.raises(ValueError, match=msg): - df.to_sql("error", self.conn, dtype={"B": bool}) + df.to_sql(name="error", con=self.conn, dtype={"B": bool}) # single dtype - assert df.to_sql("single_dtype_test", self.conn, dtype="STRING") == 2 + assert df.to_sql(name="single_dtype_test", con=self.conn, dtype="STRING") == 2 assert self._get_sqlite_column_type("single_dtype_test", "A") == "STRING" assert self._get_sqlite_column_type("single_dtype_test", "B") == "STRING" @@ -3262,7 +3293,7 @@ def test_notna_dtype(self): df = DataFrame(cols) tbl = "notna_dtype_test" - assert df.to_sql(tbl, self.conn) == 2 + assert df.to_sql(name=tbl, con=self.conn) == 2 assert self._get_sqlite_column_type(tbl, "Bool") == "INTEGER" assert self._get_sqlite_column_type(tbl, "Date") == "TIMESTAMP" @@ -3275,7 +3306,7 @@ def test_illegal_names(self): msg = "Empty table or column name specified" with pytest.raises(ValueError, match=msg): - df.to_sql("", self.conn) + df.to_sql(name="", con=self.conn) for ndx, weird_name in enumerate( [ @@ -3291,12 +3322,12 @@ def test_illegal_names(self): "\xe9", ] ): - assert df.to_sql(weird_name, self.conn) == 2 + assert df.to_sql(name=weird_name, con=self.conn) == 2 sql.table_exists(weird_name, self.conn) df2 = DataFrame([[1, 2], [3, 4]], columns=["a", weird_name]) c_tbl = f"test_weird_col_name{ndx:d}" - assert df2.to_sql(c_tbl, self.conn) == 2 + assert df2.to_sql(name=c_tbl, con=self.conn) == 2 sql.table_exists(c_tbl, self.conn)