From 8b0450898ca8bd9ad39d4debdb43179c0bd33eef Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Thu, 3 Aug 2023 21:20:33 -0400 Subject: [PATCH 01/13] Updated method header and whatsnew file --- doc/source/whatsnew/v2.1.0.rst | 2 ++ pandas/core/generic.py | 12 +++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 89b79d2e04194..94f9b7e517d78 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -214,6 +214,8 @@ Other enhancements - Reductions :meth:`Series.argmax`, :meth:`Series.argmin`, :meth:`Series.idxmax`, :meth:`Series.idxmin`, :meth:`Index.argmax`, :meth:`Index.argmin`, :meth:`DataFrame.idxmax`, :meth:`DataFrame.idxmin` are now supported for object-dtype objects (:issue:`4279`, :issue:`18021`, :issue:`40685`, :issue:`43697`) - :meth:`DataFrame.to_parquet` and :func:`read_parquet` will now write and read ``attrs`` respectively (:issue:`54346`) - Performance improvement in :meth:`GroupBy.quantile` (:issue:`51722`) +- Updated ``con`` parameter for :meth:`DataFrame.to_sql` to be a keyword argument. (:issue:`54229`) +- .. --------------------------------------------------------------------------- .. _whatsnew_210.notable_bug_fixes: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8a3a105749800..ac1e0142f447b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -201,6 +201,9 @@ Mapping, Sequence, ) + import sqlite3 + + import sqlalchemy from pandas._libs.tslibs import BaseOffset @@ -2795,7 +2798,10 @@ def to_hdf( def to_sql( self, name: str, - con, + con: sqlalchemy.engine.Engine + | sqlalchemy.engine.Connection + | sqlite3.Connection + | None = None, schema: str | None = None, if_exists: Literal["fail", "replace", "append"] = "fail", index: bool_t = True, @@ -2956,7 +2962,7 @@ def to_sql( ... stmt = insert(table.table).values(data).on_conflict_do_nothing(index_elements=["a"]) ... result = conn.execute(stmt) ... return result.rowcount - >>> df_conflict.to_sql("conflict_table", conn, if_exists="append", method=insert_on_conflict_nothing) # doctest: +SKIP + >>> df_conflict.to_sql("conflict_table", con=conn, if_exists="append", method=insert_on_conflict_nothing) # doctest: +SKIP 0 For MySQL, a callable to update columns ``b`` and ``c`` if there's a conflict @@ -2973,7 +2979,7 @@ def to_sql( ... stmt = stmt.on_duplicate_key_update(b=stmt.inserted.b, c=stmt.inserted.c) ... result = conn.execute(stmt) ... return result.rowcount - >>> df_conflict.to_sql("conflict_table", conn, if_exists="append", method=insert_on_conflict_update) # doctest: +SKIP + >>> df_conflict.to_sql("conflict_table", con=conn, if_exists="append", method=insert_on_conflict_update) # doctest: +SKIP 2 Specify the dtype (especially useful for integers with missing values). From 432ebbedac74b0cb8b7aace3b86a1646dd7c0430 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Thu, 3 Aug 2023 21:52:51 -0400 Subject: [PATCH 02/13] Updated unit tests to use keyword argument for con parameter. --- pandas/tests/io/test_sql.py | 159 +++++++++++++++++++----------------- 1 file changed, 85 insertions(+), 74 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index e2d985bdf1386..fa0fdfd2047db 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -548,7 +548,7 @@ def sqlite_buildin_iris(sqlite_buildin, iris_path): def test_dataframe_to_sql(conn, test_frame1, request): # GH 51086 if conn is sqlite_engine conn = request.getfixturevalue(conn) - test_frame1.to_sql("test", conn, if_exists="append", index=False) + test_frame1.to_sql("test", con=conn, if_exists="append", index=False) @pytest.mark.db @@ -569,7 +569,7 @@ def test_dataframe_to_sql_arrow_dtypes(conn, request): ) conn = request.getfixturevalue(conn) with tm.assert_produces_warning(UserWarning, match="the 'timedelta'"): - df.to_sql("test_arrow", conn, if_exists="replace", index=False) + df.to_sql("test_arrow", con=conn, if_exists="replace", index=False) @pytest.mark.db @@ -585,7 +585,7 @@ def test_dataframe_to_sql_arrow_dtypes_missing(conn, request, nulls_fixture): } ) conn = request.getfixturevalue(conn) - df.to_sql("test_arrow", conn, if_exists="replace", index=False) + df.to_sql("test_arrow", con=conn, if_exists="replace", index=False) @pytest.mark.db @@ -756,7 +756,7 @@ def test_read_procedure(conn, request): from sqlalchemy.engine import Engine df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) - df.to_sql("test_frame", conn, index=False) + df.to_sql("test_frame", con=conn, index=False) proc = """DROP PROCEDURE IF EXISTS get_testdb; @@ -811,7 +811,7 @@ def psql_insert_copy(table, conn, keys, data_iter): conn = request.getfixturevalue(conn) expected = DataFrame({"col1": [1, 2], "col2": [0.1, 0.2], "col3": ["a", "n"]}) result_count = expected.to_sql( - "test_frame", conn, index=False, method=psql_insert_copy + "test_frame", con=conn, index=False, method=psql_insert_copy ) # GH 46891 if expected_count is None: @@ -860,12 +860,12 @@ def insert_on_conflict(table, conn, keys, data_iter): conn.execute(create_sql) expected = DataFrame([[1, 2.1, "a"]], columns=list("abc")) - expected.to_sql("test_insert_conflict", conn, if_exists="append", index=False) + expected.to_sql("test_insert_conflict", con=conn, if_exists="append", index=False) df_insert = DataFrame([[1, 3.2, "b"]], columns=list("abc")) inserted = df_insert.to_sql( "test_insert_conflict", - conn, + con=conn, index=False, if_exists="append", method=insert_on_conflict, @@ -914,12 +914,12 @@ def insert_on_conflict(table, conn, keys, data_iter): conn.execute(create_sql) df = DataFrame([[1, 2.1, "a"]], columns=list("abc")) - df.to_sql("test_insert_conflict", conn, if_exists="append", index=False) + df.to_sql("test_insert_conflict", con=conn, if_exists="append", index=False) expected = DataFrame([[1, 3.2, "b"]], columns=list("abc")) inserted = expected.to_sql( "test_insert_conflict", - conn, + con=conn, index=False, if_exists="append", method=insert_on_conflict, @@ -1448,7 +1448,7 @@ def test_complex_raises(self): df = DataFrame({"a": [1 + 1j, 2j]}) msg = "Complex datatypes not supported" with pytest.raises(ValueError, match=msg): - assert df.to_sql("test_complex", self.conn) is None + assert df.to_sql("test_complex", con=self.conn) is None @pytest.mark.parametrize( "index_name,index_label,expected", @@ -1471,7 +1471,9 @@ def test_to_sql_index_label(self, index_name, index_label, expected): temp_frame = DataFrame({"col1": range(4)}) temp_frame.index.name = index_name query = "SELECT * FROM test_index_label" - sql.to_sql(temp_frame, "test_index_label", self.conn, index_label=index_label) + sql.to_sql( + temp_frame, "test_index_label", con=self.conn, index_label=index_label + ) frame = sql.read_sql_query(query, self.conn) assert frame.columns[0] == expected @@ -1483,7 +1485,7 @@ def test_to_sql_index_label_multiindex(self): ) # no index name, defaults to 'level_0' and 'level_1' - result = sql.to_sql(temp_frame, "test_index_label", self.conn) + result = sql.to_sql(temp_frame, "test_index_label", con=self.conn) assert result == expected_row_count frame = sql.read_sql_query("SELECT * FROM test_index_label", self.conn) assert frame.columns[0] == "level_0" @@ -1493,7 +1495,7 @@ def test_to_sql_index_label_multiindex(self): result = sql.to_sql( temp_frame, "test_index_label", - self.conn, + con=self.conn, if_exists="replace", index_label=["A", "B"], ) @@ -1504,7 +1506,7 @@ def test_to_sql_index_label_multiindex(self): # using the index name temp_frame.index.names = ["A", "B"] result = sql.to_sql( - temp_frame, "test_index_label", self.conn, if_exists="replace" + temp_frame, "test_index_label", con=self.conn, if_exists="replace" ) assert result == expected_row_count frame = sql.read_sql_query("SELECT * FROM test_index_label", self.conn) @@ -1514,7 +1516,7 @@ def test_to_sql_index_label_multiindex(self): result = sql.to_sql( temp_frame, "test_index_label", - self.conn, + con=self.conn, if_exists="replace", index_label=["C", "D"], ) @@ -1527,7 +1529,7 @@ def test_to_sql_index_label_multiindex(self): sql.to_sql( temp_frame, "test_index_label", - self.conn, + con=self.conn, if_exists="replace", index_label="C", ) @@ -1539,7 +1541,7 @@ def test_multiindex_roundtrip(self): index=["A", "B"], ) - df.to_sql("test_multiindex_roundtrip", self.conn) + df.to_sql("test_multiindex_roundtrip", con=self.conn) result = sql.read_sql_query( "SELECT * FROM test_multiindex_roundtrip", self.conn, index_col=["A", "B"] ) @@ -1557,7 +1559,7 @@ def test_multiindex_roundtrip(self): def test_dtype_argument(self, dtype): # GH10285 Add dtype argument to read_sql_query df = DataFrame([[1.2, 3.4], [5.6, 7.8]], columns=["A", "B"]) - assert df.to_sql("test_dtype_argument", self.conn) == 2 + assert df.to_sql("test_dtype_argument", con=self.conn) == 2 expected = df.astype(dtype) result = sql.read_sql_query( @@ -1568,7 +1570,9 @@ def test_dtype_argument(self, dtype): def test_integer_col_names(self): df = DataFrame([[1, 2], [3, 4]], columns=[0, 1]) - sql.to_sql(df, "test_frame_integer_col_names", self.conn, if_exists="replace") + sql.to_sql( + df, "test_frame_integer_col_names", con=self.conn, if_exists="replace" + ) def test_get_schema(self, test_frame1): create_sql = sql.get_schema(test_frame1, "test", con=self.conn) @@ -1609,7 +1613,7 @@ def test_chunksize_read(self): df = DataFrame( np.random.default_rng(2).standard_normal((22, 5)), columns=list("abcde") ) - df.to_sql("test_chunksize", self.conn, index=False) + df.to_sql("test_chunksize", con=self.conn, index=False) # reading the query in one time res1 = sql.read_sql_query("select * from test_chunksize", self.conn) @@ -1653,7 +1657,7 @@ def test_categorical(self): df2 = df.copy() df2["person_name"] = df2["person_name"].astype("category") - df2.to_sql("test_categorical", self.conn, index=False) + df2.to_sql("test_categorical", con=self.conn, index=False) res = sql.read_sql_query("SELECT * FROM test_categorical", self.conn) tm.assert_frame_equal(res, df) @@ -1661,12 +1665,12 @@ def test_categorical(self): def test_unicode_column_name(self): # GH 11431 df = DataFrame([[1, 2], [3, 4]], columns=["\xe9", "b"]) - df.to_sql("test_unicode", self.conn, index=False) + df.to_sql("test_unicode", con=self.conn, index=False) def test_escaped_table_name(self): # GH 13206 df = DataFrame({"A": [0, 1, 2], "B": [0.2, np.nan, 5.6]}) - df.to_sql("d1187b08-4943-4c8d-a7f6", self.conn, index=False) + df.to_sql("d1187b08-4943-4c8d-a7f6", con=self.conn, index=False) res = sql.read_sql_query("SELECT * FROM `d1187b08-4943-4c8d-a7f6`", self.conn) @@ -1675,7 +1679,7 @@ def test_escaped_table_name(self): def test_read_sql_duplicate_columns(self): # GH#53117 df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": 1}) - df.to_sql("test_table", self.conn, index=False) + df.to_sql("test_table", con=self.conn, index=False) result = pd.read_sql("SELECT a, b, a +1 as a, c FROM test_table;", self.conn) expected = DataFrame( @@ -1704,7 +1708,7 @@ def setup_class(cls): def test_read_table_columns(self, test_frame1): # test columns argument in read_table - sql.to_sql(test_frame1, "test_frame", self.conn) + sql.to_sql(test_frame1, "test_frame", con=self.conn) cols = ["A", "B"] result = sql.read_sql_table("test_frame", self.conn, columns=cols) @@ -1712,7 +1716,7 @@ def test_read_table_columns(self, test_frame1): def test_read_table_index_col(self, test_frame1): # test columns argument in read_table - sql.to_sql(test_frame1, "test_frame", self.conn) + sql.to_sql(test_frame1, "test_frame", con=self.conn) result = sql.read_sql_table("test_frame", self.conn, index_col="index") assert result.index.names == ["index"] @@ -1771,7 +1775,7 @@ def test_warning_case_insensitive_table_name(self, test_frame1): # Test that the warning is certainly NOT triggered in a normal case. with tm.assert_produces_warning(None): - test_frame1.to_sql("CaseSensitive", self.conn) + test_frame1.to_sql("CaseSensitive", con=self.conn) def _get_index_columns(self, tbl_name): from sqlalchemy.engine import reflection @@ -1882,7 +1886,7 @@ def test_query_by_select_obj(self): def test_column_with_percentage(self): # GH 37157 df = DataFrame({"A": [0, 1, 2], "%_variation": [3, 4, 5]}) - df.to_sql("test_column_percentage", self.conn, index=False) + df.to_sql("test_column_percentage", con=self.conn, index=False) res = sql.read_sql_table("test_column_percentage", self.conn) @@ -1908,7 +1912,7 @@ def test_sql_open_close(self, test_frame3): with tm.ensure_clean() as name: with closing(self.connect(name)) as conn: assert ( - sql.to_sql(test_frame3, "test_frame3_legacy", conn, index=False) + sql.to_sql(test_frame3, "test_frame3_legacy", con=conn, index=False) == 4 ) @@ -2094,7 +2098,7 @@ def test_default_type_conversion(self): def test_bigint(self): # int64 should be converted to BigInteger, GH7433 df = DataFrame(data={"i64": [2**62]}) - assert df.to_sql("test_bigint", self.conn, index=False) == 1 + assert df.to_sql("test_bigint", con=self.conn, index=False) == 1 result = sql.read_sql_table("test_bigint", self.conn) tm.assert_frame_equal(df, result) @@ -2193,7 +2197,7 @@ def test_datetime_with_timezone_roundtrip(self): expected = DataFrame( {"A": date_range("2013-01-01 09:00:00", periods=3, tz="US/Pacific")} ) - assert expected.to_sql("test_datetime_tz", self.conn, index=False) == 3 + assert expected.to_sql("test_datetime_tz", con=self.conn, index=False) == 3 if self.flavor == "postgresql": # SQLAlchemy "timezones" (i.e. offsets) are coerced to UTC @@ -2215,7 +2219,7 @@ def test_datetime_with_timezone_roundtrip(self): def test_out_of_bounds_datetime(self): # GH 26761 data = DataFrame({"date": datetime(9999, 1, 1)}, index=[0]) - assert data.to_sql("test_datetime_obb", self.conn, index=False) == 1 + assert data.to_sql("test_datetime_obb", con=self.conn, index=False) == 1 result = sql.read_sql_table("test_datetime_obb", self.conn) expected = DataFrame([pd.NaT], columns=["date"]) tm.assert_frame_equal(result, expected) @@ -2225,7 +2229,7 @@ def test_naive_datetimeindex_roundtrip(self): # Ensure that a naive DatetimeIndex isn't converted to UTC dates = date_range("2018-01-01", periods=5, freq="6H")._with_freq(None) expected = DataFrame({"nums": range(5)}, index=dates) - assert expected.to_sql("foo_table", self.conn, index_label="info_date") == 5 + assert expected.to_sql("foo_table", con=self.conn, index_label="info_date") == 5 result = sql.read_sql_table("foo_table", self.conn, index_col="info_date") # result index with gain a name from a set_index operation; expected tm.assert_frame_equal(result, expected, check_names=False) @@ -2266,7 +2270,7 @@ def test_datetime(self): df = DataFrame( {"A": date_range("2013-01-01 09:00:00", periods=3), "B": np.arange(3.0)} ) - assert df.to_sql("test_datetime", self.conn) == 3 + assert df.to_sql("test_datetime", con=self.conn) == 3 # with read_table -> type information from schema used result = sql.read_sql_table("test_datetime", self.conn) @@ -2288,7 +2292,7 @@ def test_datetime_NaT(self): {"A": date_range("2013-01-01 09:00:00", periods=3), "B": np.arange(3.0)} ) df.loc[1, "A"] = np.nan - assert df.to_sql("test_datetime", self.conn, index=False) == 3 + assert df.to_sql("test_datetime", con=self.conn, index=False) == 3 # with read_table -> type information from schema used result = sql.read_sql_table("test_datetime", self.conn) @@ -2306,7 +2310,7 @@ def test_datetime_NaT(self): def test_datetime_date(self): # test support for datetime.date df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"]) - assert df.to_sql("test_date", self.conn, index=False) == 2 + assert df.to_sql("test_date", con=self.conn, index=False) == 2 res = read_sql_table("test_date", self.conn) result = res["a"] expected = to_datetime(df["a"]) @@ -2316,19 +2320,19 @@ def test_datetime_date(self): def test_datetime_time(self, sqlite_buildin): # test support for datetime.time df = DataFrame([time(9, 0, 0), time(9, 1, 30)], columns=["a"]) - assert df.to_sql("test_time", self.conn, index=False) == 2 + assert df.to_sql("test_time", con=self.conn, index=False) == 2 res = read_sql_table("test_time", self.conn) tm.assert_frame_equal(res, df) # GH8341 # first, use the fallback to have the sqlite adapter put in place sqlite_conn = sqlite_buildin - assert sql.to_sql(df, "test_time2", sqlite_conn, index=False) == 2 + assert sql.to_sql(df, "test_time2", con=sqlite_conn, index=False) == 2 res = sql.read_sql_query("SELECT * FROM test_time2", sqlite_conn) ref = df.map(lambda _: _.strftime("%H:%M:%S.%f")) tm.assert_frame_equal(ref, res) # check if adapter is in place # then test if sqlalchemy is unaffected by the sqlite adapter - assert sql.to_sql(df, "test_time3", self.conn, index=False) == 2 + assert sql.to_sql(df, "test_time3", con=self.conn, index=False) == 2 if self.flavor == "sqlite": res = sql.read_sql_query("SELECT * FROM test_time3", self.conn) ref = df.map(lambda _: _.strftime("%H:%M:%S.%f")) @@ -2343,7 +2347,7 @@ def test_mixed_dtype_insert(self): df = DataFrame({"s1": s1, "s2": s2}) # write and read again - assert df.to_sql("test_read_write", self.conn, index=False) == 1 + assert df.to_sql("test_read_write", con=self.conn, index=False) == 1 df2 = sql.read_sql_table("test_read_write", self.conn) tm.assert_frame_equal(df, df2, check_dtype=False, check_exact=True) @@ -2351,7 +2355,7 @@ def test_mixed_dtype_insert(self): def test_nan_numeric(self): # NaNs in numeric float column df = DataFrame({"A": [0, 1, 2], "B": [0.2, np.nan, 5.6]}) - assert df.to_sql("test_nan", self.conn, index=False) == 3 + assert df.to_sql("test_nan", con=self.conn, index=False) == 3 # with read_table result = sql.read_sql_table("test_nan", self.conn) @@ -2364,7 +2368,7 @@ def test_nan_numeric(self): def test_nan_fullcolumn(self): # full NaN column (numeric float column) df = DataFrame({"A": [0, 1, 2], "B": [np.nan, np.nan, np.nan]}) - assert df.to_sql("test_nan", self.conn, index=False) == 3 + assert df.to_sql("test_nan", con=self.conn, index=False) == 3 # with read_table result = sql.read_sql_table("test_nan", self.conn) @@ -2379,7 +2383,7 @@ def test_nan_fullcolumn(self): def test_nan_string(self): # NaNs in string column df = DataFrame({"A": [0, 1, 2], "B": ["a", "b", np.nan]}) - assert df.to_sql("test_nan", self.conn, index=False) == 3 + assert df.to_sql("test_nan", con=self.conn, index=False) == 3 # NaNs are coming back as None df.loc[2, "B"] = None @@ -2441,15 +2445,15 @@ def test_dtype(self): cols = ["A", "B"] data = [(0.8, True), (0.9, None)] df = DataFrame(data, columns=cols) - assert df.to_sql("dtype_test", self.conn) == 2 - assert df.to_sql("dtype_test2", self.conn, dtype={"B": TEXT}) == 2 + assert df.to_sql("dtype_test", con=self.conn) == 2 + assert df.to_sql("dtype_test2", con=self.conn, dtype={"B": TEXT}) == 2 meta = MetaData() meta.reflect(bind=self.conn) sqltype = meta.tables["dtype_test2"].columns["B"].type assert isinstance(sqltype, TEXT) msg = "The type of B is not a SQLAlchemy type" with pytest.raises(ValueError, match=msg): - df.to_sql("error", self.conn, dtype={"B": str}) + df.to_sql("error", con=self.conn, dtype={"B": str}) # GH9083 assert df.to_sql("dtype_test3", self.conn, dtype={"B": String(10)}) == 2 @@ -2459,7 +2463,7 @@ def test_dtype(self): assert sqltype.length == 10 # single dtype - assert df.to_sql("single_dtype_test", self.conn, dtype=TEXT) == 2 + assert df.to_sql("single_dtype_test", con=self.conn, dtype=TEXT) == 2 meta.reflect(bind=self.conn) sqltypea = meta.tables["single_dtype_test"].columns["A"].type sqltypeb = meta.tables["single_dtype_test"].columns["B"].type @@ -2484,7 +2488,7 @@ def test_notna_dtype(self): df = DataFrame(cols) tbl = "notna_dtype_test" - assert df.to_sql(tbl, self.conn) == 2 + assert df.to_sql(tbl, con=self.conn) == 2 _ = sql.read_sql_table(tbl, self.conn) meta = MetaData() meta.reflect(bind=self.conn) @@ -2518,7 +2522,7 @@ def test_double_precision(self): assert ( df.to_sql( "test_dtypes", - self.conn, + con=self.conn, index=False, if_exists="replace", dtype={"f64_as_f32": Float(precision=23)}, @@ -2567,7 +2571,9 @@ def main(connectable): test_connectable(connectable) assert ( - DataFrame({"test_foo_data": [0, 1, 2]}).to_sql("test_foo_data", self.conn) + DataFrame({"test_foo_data": [0, 1, 2]}).to_sql( + "test_foo_data", con=self.conn + ) == 3 ) main(self.conn) @@ -2597,9 +2603,9 @@ def test_to_sql_with_negative_npinf(self, input, request): msg = "inf cannot be used with MySQL" with pytest.raises(ValueError, match=msg): - df.to_sql("foobar", self.conn, index=False) + df.to_sql("foobar", con=self.conn, index=False) else: - assert df.to_sql("foobar", self.conn, index=False) == 1 + assert df.to_sql("foobar", con=self.conn, index=False) == 1 res = sql.read_sql_table("foobar", self.conn) tm.assert_equal(df, res) @@ -2672,7 +2678,7 @@ def test_read_sql_dtype_backend(self, string_storage, func, dtype_backend): # GH#50048 table = "test" df = self.dtype_backend_data() - df.to_sql(table, self.conn, index=False, if_exists="replace") + df.to_sql(table, con=self.conn, index=False, if_exists="replace") with pd.option_context("mode.string_storage", string_storage): result = getattr(pd, func)( @@ -2697,7 +2703,7 @@ def test_read_sql_dtype_backend_table(self, string_storage, func, dtype_backend) # GH#50048 table = "test" df = self.dtype_backend_data() - df.to_sql(table, self.conn, index=False, if_exists="replace") + df.to_sql(table, con=self.conn, index=False, if_exists="replace") with pd.option_context("mode.string_storage", string_storage): result = getattr(pd, func)(table, self.conn, dtype_backend=dtype_backend) @@ -2719,7 +2725,7 @@ def test_read_sql_dtype_backend_table(self, string_storage, func, dtype_backend) def test_read_sql_invalid_dtype_backend_table(self, func): table = "test" df = self.dtype_backend_data() - df.to_sql(table, self.conn, index=False, if_exists="replace") + df.to_sql(table, con=self.conn, index=False, if_exists="replace") msg = ( "dtype_backend numpy is invalid, only 'numpy_nullable' and " @@ -2784,7 +2790,7 @@ def test_chunksize_empty_dtypes(self): dtypes = {"a": "int64", "b": "object"} df = DataFrame(columns=["a", "b"]).astype(dtypes) expected = df.copy() - df.to_sql("test", self.conn, index=False, if_exists="replace") + df.to_sql("test", con=self.conn, index=False, if_exists="replace") for result in read_sql_query( "SELECT * FROM test", @@ -2800,7 +2806,7 @@ def test_read_sql_dtype(self, func, dtype_backend): # GH#50797 table = "test" df = DataFrame({"a": [1, 2, 3], "b": 5}) - df.to_sql(table, self.conn, index=False, if_exists="replace") + df.to_sql(table, con=self.conn, index=False, if_exists="replace") result = getattr(pd, func)( f"Select * from {table}", @@ -2861,7 +2867,7 @@ def test_default_date_load(self): def test_bigint_warning(self): # test no warning for BIGINT (to support int64) is raised (GH7433) df = DataFrame({"a": [1, 2]}, dtype="int64") - assert df.to_sql("test_bigintwarning", self.conn, index=False) == 2 + assert df.to_sql("test_bigintwarning", con=self.conn, index=False) == 2 with tm.assert_produces_warning(None): sql.read_sql_table("test_bigintwarning", self.conn) @@ -2993,15 +2999,19 @@ def test_schema_support(self): self.conn.exec_driver_sql("CREATE SCHEMA other;") # write dataframe to different schema's - assert df.to_sql("test_schema_public", self.conn, index=False) == 2 + assert df.to_sql("test_schema_public", con=self.conn, index=False) == 2 assert ( df.to_sql( - "test_schema_public_explicit", self.conn, index=False, schema="public" + "test_schema_public_explicit", + con=self.conn, + index=False, + schema="public", ) == 2 ) assert ( - df.to_sql("test_schema_other", self.conn, index=False, schema="other") == 2 + df.to_sql("test_schema_other", con=self.conn, index=False, schema="other") + == 2 ) # read dataframes back in @@ -3028,11 +3038,12 @@ def test_schema_support(self): # write dataframe with different if_exists options assert ( - df.to_sql("test_schema_other", self.conn, schema="other", index=False) == 2 + df.to_sql("test_schema_other", con=self.conn, schema="other", index=False) + == 2 ) df.to_sql( "test_schema_other", - self.conn, + con=self.conn, schema="other", index=False, if_exists="replace", @@ -3040,7 +3051,7 @@ def test_schema_support(self): assert ( df.to_sql( "test_schema_other", - self.conn, + con=self.conn, schema="other", index=False, if_exists="append", @@ -3159,7 +3170,7 @@ def test_execute_sql(self): def test_datetime_date(self): # test support for datetime.date df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"]) - assert df.to_sql("test_date", self.conn, index=False) == 2 + assert df.to_sql("test_date", con=self.conn, index=False) == 2 res = read_sql_query("SELECT * FROM test_date", self.conn) if self.flavor == "sqlite": # comes back as strings @@ -3178,7 +3189,7 @@ def test_datetime_time(self, tz_aware): df = DataFrame(tz_times, columns=["a"]) - assert df.to_sql("test_time", self.conn, index=False) == 2 + assert df.to_sql("test_time", con=self.conn, index=False) == 2 res = read_sql_query("SELECT * FROM test_time", self.conn) if self.flavor == "sqlite": # comes back as strings @@ -3216,8 +3227,8 @@ def test_dtype(self): cols = ["A", "B"] data = [(0.8, True), (0.9, None)] df = DataFrame(data, columns=cols) - assert df.to_sql("dtype_test", self.conn) == 2 - assert df.to_sql("dtype_test2", self.conn, dtype={"B": "STRING"}) == 2 + assert df.to_sql("dtype_test", con=self.conn) == 2 + assert df.to_sql("dtype_test2", con=self.conn, dtype={"B": "STRING"}) == 2 # sqlite stores Boolean values as INTEGER assert self._get_sqlite_column_type("dtype_test", "B") == "INTEGER" @@ -3225,10 +3236,10 @@ def test_dtype(self): assert self._get_sqlite_column_type("dtype_test2", "B") == "STRING" msg = r"B \(\) not a string" with pytest.raises(ValueError, match=msg): - df.to_sql("error", self.conn, dtype={"B": bool}) + df.to_sql("error", con=self.conn, dtype={"B": bool}) # single dtype - assert df.to_sql("single_dtype_test", self.conn, dtype="STRING") == 2 + assert df.to_sql("single_dtype_test", con=self.conn, dtype="STRING") == 2 assert self._get_sqlite_column_type("single_dtype_test", "A") == "STRING" assert self._get_sqlite_column_type("single_dtype_test", "B") == "STRING" @@ -3245,7 +3256,7 @@ def test_notna_dtype(self): df = DataFrame(cols) tbl = "notna_dtype_test" - assert df.to_sql(tbl, self.conn) == 2 + assert df.to_sql(tbl, con=self.conn) == 2 assert self._get_sqlite_column_type(tbl, "Bool") == "INTEGER" assert self._get_sqlite_column_type(tbl, "Date") == "TIMESTAMP" @@ -3258,7 +3269,7 @@ def test_illegal_names(self): msg = "Empty table or column name specified" with pytest.raises(ValueError, match=msg): - df.to_sql("", self.conn) + df.to_sql("", con=self.conn) for ndx, weird_name in enumerate( [ @@ -3274,12 +3285,12 @@ def test_illegal_names(self): "\xe9", ] ): - assert df.to_sql(weird_name, self.conn) == 2 + assert df.to_sql(weird_name, con=self.conn) == 2 sql.table_exists(weird_name, self.conn) df2 = DataFrame([[1, 2], [3, 4]], columns=["a", weird_name]) c_tbl = f"test_weird_col_name{ndx:d}" - assert df2.to_sql(c_tbl, self.conn) == 2 + assert df2.to_sql(c_tbl, con=self.conn) == 2 sql.table_exists(c_tbl, self.conn) From 567e9182015c96ab5f278444d640c40f834a2f70 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Sat, 5 Aug 2023 21:08:44 -0400 Subject: [PATCH 03/13] Updating unit tests and implementation. --- pandas/core/generic.py | 14 +- pandas/tests/io/test_sql.py | 431 +++++++++++++++++++++++++++--------- 2 files changed, 329 insertions(+), 116 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ac1e0142f447b..e7515d9c0b8b9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -97,7 +97,10 @@ SettingWithCopyWarning, _chained_assignment_method_msg, ) -from pandas.util._decorators import doc +from pandas.util._decorators import ( + deprecate_nonkeyword_arguments, + doc, +) from pandas.util._exceptions import find_stack_level from pandas.util._validators import ( check_dtype_backend, @@ -201,9 +204,6 @@ Mapping, Sequence, ) - import sqlite3 - - import sqlalchemy from pandas._libs.tslibs import BaseOffset @@ -2795,13 +2795,11 @@ def to_hdf( ) @final + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["name"], name="to_sql") def to_sql( self, name: str, - con: sqlalchemy.engine.Engine - | sqlalchemy.engine.Connection - | sqlite3.Connection - | None = None, + con, schema: str | None = None, if_exists: Literal["fail", "replace", "append"] = "fail", index: bool_t = True, diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index fa0fdfd2047db..209779bfaabf5 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -28,6 +28,7 @@ ) from io import StringIO from pathlib import Path +import re import sqlite3 import uuid @@ -548,9 +549,15 @@ def sqlite_buildin_iris(sqlite_buildin, iris_path): def test_dataframe_to_sql(conn, test_frame1, request): # GH 51086 if conn is sqlite_engine conn = request.getfixturevalue(conn) - test_frame1.to_sql("test", con=conn, if_exists="append", index=False) + msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) + with tm.assert_produces_warning(FutureWarning, match=re.escape(msg)): + test_frame1.to_sql("test", conn, if_exists="append", index=False) +@pytest.mark.skip @pytest.mark.db @pytest.mark.parametrize("conn", all_connectable) def test_dataframe_to_sql_arrow_dtypes(conn, request): @@ -569,13 +576,17 @@ def test_dataframe_to_sql_arrow_dtypes(conn, request): ) conn = request.getfixturevalue(conn) with tm.assert_produces_warning(UserWarning, match="the 'timedelta'"): - df.to_sql("test_arrow", con=conn, if_exists="replace", index=False) + df.to_sql("test_arrow", conn, if_exists="replace", index=False) @pytest.mark.db @pytest.mark.parametrize("conn", all_connectable) def test_dataframe_to_sql_arrow_dtypes_missing(conn, request, nulls_fixture): # GH 52046 + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) pytest.importorskip("pyarrow") df = DataFrame( { @@ -585,7 +596,8 @@ def test_dataframe_to_sql_arrow_dtypes_missing(conn, request, nulls_fixture): } ) conn = request.getfixturevalue(conn) - df.to_sql("test_arrow", con=conn, if_exists="replace", index=False) + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + df.to_sql("test_arrow", conn, if_exists="replace", index=False) @pytest.mark.db @@ -756,7 +768,7 @@ def test_read_procedure(conn, request): from sqlalchemy.engine import Engine df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) - df.to_sql("test_frame", con=conn, index=False) + df.to_sql("test_frame", conn, index=False) proc = """DROP PROCEDURE IF EXISTS get_testdb; @@ -811,7 +823,7 @@ def psql_insert_copy(table, conn, keys, data_iter): conn = request.getfixturevalue(conn) expected = DataFrame({"col1": [1, 2], "col2": [0.1, 0.2], "col3": ["a", "n"]}) result_count = expected.to_sql( - "test_frame", con=conn, index=False, method=psql_insert_copy + "test_frame", conn, index=False, method=psql_insert_copy ) # GH 46891 if expected_count is None: @@ -860,12 +872,12 @@ def insert_on_conflict(table, conn, keys, data_iter): conn.execute(create_sql) expected = DataFrame([[1, 2.1, "a"]], columns=list("abc")) - expected.to_sql("test_insert_conflict", con=conn, if_exists="append", index=False) + expected.to_sql("test_insert_conflict", conn, if_exists="append", index=False) df_insert = DataFrame([[1, 3.2, "b"]], columns=list("abc")) inserted = df_insert.to_sql( "test_insert_conflict", - con=conn, + conn, index=False, if_exists="append", method=insert_on_conflict, @@ -914,12 +926,12 @@ def insert_on_conflict(table, conn, keys, data_iter): conn.execute(create_sql) df = DataFrame([[1, 2.1, "a"]], columns=list("abc")) - df.to_sql("test_insert_conflict", con=conn, if_exists="append", index=False) + df.to_sql("test_insert_conflict", conn, if_exists="append", index=False) expected = DataFrame([[1, 3.2, "b"]], columns=list("abc")) inserted = expected.to_sql( "test_insert_conflict", - con=conn, + conn, index=False, if_exists="append", method=insert_on_conflict, @@ -1435,9 +1447,11 @@ def test_date_and_index(self): assert issubclass(df.index.dtype.type, np.datetime64) assert issubclass(df.IntDateCol.dtype.type, np.datetime64) + @pytest.mark.skip def test_timedelta(self): # see #6921 df = to_timedelta(Series(["00:00:01", "00:00:03"], name="foo")).to_frame() + with tm.assert_produces_warning(UserWarning): result_count = df.to_sql("test_timedelta", self.conn) assert result_count == 2 @@ -1447,8 +1461,13 @@ def test_timedelta(self): def test_complex_raises(self): df = DataFrame({"a": [1 + 1j, 2j]}) msg = "Complex datatypes not supported" + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) with pytest.raises(ValueError, match=msg): - assert df.to_sql("test_complex", con=self.conn) is None + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + assert df.to_sql("test_complex", self.conn) is None @pytest.mark.parametrize( "index_name,index_label,expected", @@ -1471,9 +1490,7 @@ def test_to_sql_index_label(self, index_name, index_label, expected): temp_frame = DataFrame({"col1": range(4)}) temp_frame.index.name = index_name query = "SELECT * FROM test_index_label" - sql.to_sql( - temp_frame, "test_index_label", con=self.conn, index_label=index_label - ) + sql.to_sql(temp_frame, "test_index_label", self.conn, index_label=index_label) frame = sql.read_sql_query(query, self.conn) assert frame.columns[0] == expected @@ -1485,7 +1502,7 @@ def test_to_sql_index_label_multiindex(self): ) # no index name, defaults to 'level_0' and 'level_1' - result = sql.to_sql(temp_frame, "test_index_label", con=self.conn) + result = sql.to_sql(temp_frame, "test_index_label", self.conn) assert result == expected_row_count frame = sql.read_sql_query("SELECT * FROM test_index_label", self.conn) assert frame.columns[0] == "level_0" @@ -1495,7 +1512,7 @@ def test_to_sql_index_label_multiindex(self): result = sql.to_sql( temp_frame, "test_index_label", - con=self.conn, + self.conn, if_exists="replace", index_label=["A", "B"], ) @@ -1506,7 +1523,7 @@ def test_to_sql_index_label_multiindex(self): # using the index name temp_frame.index.names = ["A", "B"] result = sql.to_sql( - temp_frame, "test_index_label", con=self.conn, if_exists="replace" + temp_frame, "test_index_label", self.conn, if_exists="replace" ) assert result == expected_row_count frame = sql.read_sql_query("SELECT * FROM test_index_label", self.conn) @@ -1516,7 +1533,7 @@ def test_to_sql_index_label_multiindex(self): result = sql.to_sql( temp_frame, "test_index_label", - con=self.conn, + self.conn, if_exists="replace", index_label=["C", "D"], ) @@ -1529,19 +1546,23 @@ def test_to_sql_index_label_multiindex(self): sql.to_sql( temp_frame, "test_index_label", - con=self.conn, + self.conn, if_exists="replace", index_label="C", ) def test_multiindex_roundtrip(self): + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) df = DataFrame.from_records( [(1, 2.1, "line1"), (2, 1.5, "line2")], columns=["A", "B", "C"], index=["A", "B"], ) - - df.to_sql("test_multiindex_roundtrip", con=self.conn) + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + df.to_sql("test_multiindex_roundtrip", self.conn) result = sql.read_sql_query( "SELECT * FROM test_multiindex_roundtrip", self.conn, index_col=["A", "B"] ) @@ -1559,7 +1580,12 @@ def test_multiindex_roundtrip(self): def test_dtype_argument(self, dtype): # GH10285 Add dtype argument to read_sql_query df = DataFrame([[1.2, 3.4], [5.6, 7.8]], columns=["A", "B"]) - assert df.to_sql("test_dtype_argument", con=self.conn) == 2 + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + assert df.to_sql("test_dtype_argument", self.conn) == 2 expected = df.astype(dtype) result = sql.read_sql_query( @@ -1570,9 +1596,7 @@ def test_dtype_argument(self, dtype): def test_integer_col_names(self): df = DataFrame([[1, 2], [3, 4]], columns=[0, 1]) - sql.to_sql( - df, "test_frame_integer_col_names", con=self.conn, if_exists="replace" - ) + sql.to_sql(df, "test_frame_integer_col_names", self.conn, if_exists="replace") def test_get_schema(self, test_frame1): create_sql = sql.get_schema(test_frame1, "test", con=self.conn) @@ -1613,7 +1637,12 @@ def test_chunksize_read(self): df = DataFrame( np.random.default_rng(2).standard_normal((22, 5)), columns=list("abcde") ) - df.to_sql("test_chunksize", con=self.conn, index=False) + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + df.to_sql("test_chunksize", self.conn, index=False) # reading the query in one time res1 = sql.read_sql_query("select * from test_chunksize", self.conn) @@ -1654,10 +1683,14 @@ def test_categorical(self): "person_name": ["John P. Doe", "Jane Dove", "John P. Doe"], } ) + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) df2 = df.copy() df2["person_name"] = df2["person_name"].astype("category") - - df2.to_sql("test_categorical", con=self.conn, index=False) + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + df2.to_sql("test_categorical", self.conn, index=False) res = sql.read_sql_query("SELECT * FROM test_categorical", self.conn) tm.assert_frame_equal(res, df) @@ -1665,12 +1698,22 @@ def test_categorical(self): def test_unicode_column_name(self): # GH 11431 df = DataFrame([[1, 2], [3, 4]], columns=["\xe9", "b"]) - df.to_sql("test_unicode", con=self.conn, index=False) + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + df.to_sql("test_unicode", self.conn, index=False) def test_escaped_table_name(self): # GH 13206 df = DataFrame({"A": [0, 1, 2], "B": [0.2, np.nan, 5.6]}) - df.to_sql("d1187b08-4943-4c8d-a7f6", con=self.conn, index=False) + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + df.to_sql("d1187b08-4943-4c8d-a7f6", self.conn, index=False) res = sql.read_sql_query("SELECT * FROM `d1187b08-4943-4c8d-a7f6`", self.conn) @@ -1678,8 +1721,13 @@ def test_escaped_table_name(self): def test_read_sql_duplicate_columns(self): # GH#53117 + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": 1}) - df.to_sql("test_table", con=self.conn, index=False) + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + df.to_sql("test_table", self.conn, index=False) result = pd.read_sql("SELECT a, b, a +1 as a, c FROM test_table;", self.conn) expected = DataFrame( @@ -1708,7 +1756,7 @@ def setup_class(cls): def test_read_table_columns(self, test_frame1): # test columns argument in read_table - sql.to_sql(test_frame1, "test_frame", con=self.conn) + sql.to_sql(test_frame1, "test_frame", self.conn) cols = ["A", "B"] result = sql.read_sql_table("test_frame", self.conn, columns=cols) @@ -1716,7 +1764,7 @@ def test_read_table_columns(self, test_frame1): def test_read_table_index_col(self, test_frame1): # test columns argument in read_table - sql.to_sql(test_frame1, "test_frame", con=self.conn) + sql.to_sql(test_frame1, "test_frame", self.conn) result = sql.read_sql_table("test_frame", self.conn, index_col="index") assert result.index.names == ["index"] @@ -1763,6 +1811,10 @@ def test_not_reflect_all_tables(self): def test_warning_case_insensitive_table_name(self, test_frame1): # see gh-7815 + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) with tm.assert_produces_warning( UserWarning, match=( @@ -1774,8 +1826,8 @@ def test_warning_case_insensitive_table_name(self, test_frame1): sql.SQLDatabase(self.conn).check_case_sensitive("TABLE1", "") # Test that the warning is certainly NOT triggered in a normal case. - with tm.assert_produces_warning(None): - test_frame1.to_sql("CaseSensitive", con=self.conn) + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + test_frame1.to_sql("CaseSensitive", self.conn) def _get_index_columns(self, tbl_name): from sqlalchemy.engine import reflection @@ -1841,10 +1893,15 @@ def test_database_uri_string(self, test_frame1): # db_uri = 'sqlite:///:memory:' # raises # sqlalchemy.exc.OperationalError: (sqlite3.OperationalError) near # "iris": syntax error [SQL: 'iris'] + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) with tm.ensure_clean() as name: db_uri = "sqlite:///" + name table = "iris" - test_frame1.to_sql(table, db_uri, if_exists="replace", index=False) + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + test_frame1.to_sql(table, db_uri, if_exists="replace", index=False) test_frame2 = sql.read_sql(table, db_uri) test_frame3 = sql.read_sql_table(table, db_uri) query = "SELECT * FROM iris" @@ -1886,7 +1943,12 @@ def test_query_by_select_obj(self): def test_column_with_percentage(self): # GH 37157 df = DataFrame({"A": [0, 1, 2], "%_variation": [3, 4, 5]}) - df.to_sql("test_column_percentage", con=self.conn, index=False) + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + df.to_sql("test_column_percentage", self.conn, index=False) res = sql.read_sql_table("test_column_percentage", self.conn) @@ -1912,7 +1974,7 @@ def test_sql_open_close(self, test_frame3): with tm.ensure_clean() as name: with closing(self.connect(name)) as conn: assert ( - sql.to_sql(test_frame3, "test_frame3_legacy", con=conn, index=False) + sql.to_sql(test_frame3, "test_frame3_legacy", conn, index=False) == 4 ) @@ -2098,7 +2160,12 @@ def test_default_type_conversion(self): def test_bigint(self): # int64 should be converted to BigInteger, GH7433 df = DataFrame(data={"i64": [2**62]}) - assert df.to_sql("test_bigint", con=self.conn, index=False) == 1 + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + assert df.to_sql("test_bigint", self.conn, index=False) == 1 result = sql.read_sql_table("test_bigint", self.conn) tm.assert_frame_equal(df, result) @@ -2194,10 +2261,15 @@ def test_datetime_with_timezone_roundtrip(self): # Write datetimetz data to a db and read it back # For dbs that support timestamps with timezones, should get back UTC # otherwise naive data should be returned + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) expected = DataFrame( {"A": date_range("2013-01-01 09:00:00", periods=3, tz="US/Pacific")} ) - assert expected.to_sql("test_datetime_tz", con=self.conn, index=False) == 3 + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + assert expected.to_sql("test_datetime_tz", self.conn, index=False) == 3 if self.flavor == "postgresql": # SQLAlchemy "timezones" (i.e. offsets) are coerced to UTC @@ -2218,8 +2290,13 @@ def test_datetime_with_timezone_roundtrip(self): def test_out_of_bounds_datetime(self): # GH 26761 + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) data = DataFrame({"date": datetime(9999, 1, 1)}, index=[0]) - assert data.to_sql("test_datetime_obb", con=self.conn, index=False) == 1 + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + assert data.to_sql("test_datetime_obb", self.conn, index=False) == 1 result = sql.read_sql_table("test_datetime_obb", self.conn) expected = DataFrame([pd.NaT], columns=["date"]) tm.assert_frame_equal(result, expected) @@ -2227,9 +2304,14 @@ def test_out_of_bounds_datetime(self): def test_naive_datetimeindex_roundtrip(self): # GH 23510 # Ensure that a naive DatetimeIndex isn't converted to UTC + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) dates = date_range("2018-01-01", periods=5, freq="6H")._with_freq(None) expected = DataFrame({"nums": range(5)}, index=dates) - assert expected.to_sql("foo_table", con=self.conn, index_label="info_date") == 5 + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + assert expected.to_sql("foo_table", self.conn, index_label="info_date") == 5 result = sql.read_sql_table("foo_table", self.conn, index_col="info_date") # result index with gain a name from a set_index operation; expected tm.assert_frame_equal(result, expected, check_names=False) @@ -2270,7 +2352,12 @@ def test_datetime(self): df = DataFrame( {"A": date_range("2013-01-01 09:00:00", periods=3), "B": np.arange(3.0)} ) - assert df.to_sql("test_datetime", con=self.conn) == 3 + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + assert df.to_sql("test_datetime", self.conn) == 3 # with read_table -> type information from schema used result = sql.read_sql_table("test_datetime", self.conn) @@ -2291,8 +2378,13 @@ def test_datetime_NaT(self): df = DataFrame( {"A": date_range("2013-01-01 09:00:00", periods=3), "B": np.arange(3.0)} ) + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) df.loc[1, "A"] = np.nan - assert df.to_sql("test_datetime", con=self.conn, index=False) == 3 + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + assert df.to_sql("test_datetime", self.conn, index=False) == 3 # with read_table -> type information from schema used result = sql.read_sql_table("test_datetime", self.conn) @@ -2310,7 +2402,12 @@ def test_datetime_NaT(self): def test_datetime_date(self): # test support for datetime.date df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"]) - assert df.to_sql("test_date", con=self.conn, index=False) == 2 + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + assert df.to_sql("test_date", self.conn, index=False) == 2 res = read_sql_table("test_date", self.conn) result = res["a"] expected = to_datetime(df["a"]) @@ -2318,21 +2415,26 @@ def test_datetime_date(self): tm.assert_series_equal(result, expected) def test_datetime_time(self, sqlite_buildin): + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) # test support for datetime.time df = DataFrame([time(9, 0, 0), time(9, 1, 30)], columns=["a"]) - assert df.to_sql("test_time", con=self.conn, index=False) == 2 + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + assert df.to_sql("test_time", self.conn, index=False) == 2 res = read_sql_table("test_time", self.conn) tm.assert_frame_equal(res, df) # GH8341 # first, use the fallback to have the sqlite adapter put in place sqlite_conn = sqlite_buildin - assert sql.to_sql(df, "test_time2", con=sqlite_conn, index=False) == 2 + assert sql.to_sql(df, "test_time2", sqlite_conn, index=False) == 2 res = sql.read_sql_query("SELECT * FROM test_time2", sqlite_conn) ref = df.map(lambda _: _.strftime("%H:%M:%S.%f")) tm.assert_frame_equal(ref, res) # check if adapter is in place # then test if sqlalchemy is unaffected by the sqlite adapter - assert sql.to_sql(df, "test_time3", con=self.conn, index=False) == 2 + assert sql.to_sql(df, "test_time3", self.conn, index=False) == 2 if self.flavor == "sqlite": res = sql.read_sql_query("SELECT * FROM test_time3", self.conn) ref = df.map(lambda _: _.strftime("%H:%M:%S.%f")) @@ -2345,9 +2447,14 @@ def test_mixed_dtype_insert(self): s1 = Series(2**25 + 1, dtype=np.int32) s2 = Series(0.0, dtype=np.float32) df = DataFrame({"s1": s1, "s2": s2}) + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) # write and read again - assert df.to_sql("test_read_write", con=self.conn, index=False) == 1 + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + assert df.to_sql("test_read_write", self.conn, index=False) == 1 df2 = sql.read_sql_table("test_read_write", self.conn) tm.assert_frame_equal(df, df2, check_dtype=False, check_exact=True) @@ -2355,7 +2462,12 @@ def test_mixed_dtype_insert(self): def test_nan_numeric(self): # NaNs in numeric float column df = DataFrame({"A": [0, 1, 2], "B": [0.2, np.nan, 5.6]}) - assert df.to_sql("test_nan", con=self.conn, index=False) == 3 + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + assert df.to_sql("test_nan", self.conn, index=False) == 3 # with read_table result = sql.read_sql_table("test_nan", self.conn) @@ -2367,8 +2479,13 @@ def test_nan_numeric(self): def test_nan_fullcolumn(self): # full NaN column (numeric float column) + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) df = DataFrame({"A": [0, 1, 2], "B": [np.nan, np.nan, np.nan]}) - assert df.to_sql("test_nan", con=self.conn, index=False) == 3 + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + assert df.to_sql("test_nan", self.conn, index=False) == 3 # with read_table result = sql.read_sql_table("test_nan", self.conn) @@ -2382,8 +2499,13 @@ def test_nan_fullcolumn(self): def test_nan_string(self): # NaNs in string column + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) df = DataFrame({"A": [0, 1, 2], "B": ["a", "b", np.nan]}) - assert df.to_sql("test_nan", con=self.conn, index=False) == 3 + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + assert df.to_sql("test_nan", self.conn, index=False) == 3 # NaNs are coming back as None df.loc[2, "B"] = None @@ -2436,6 +2558,10 @@ def test_get_schema_create_table(self, test_frame3): self.drop_table(tbl, self.conn) def test_dtype(self): + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) from sqlalchemy import ( TEXT, String, @@ -2445,25 +2571,29 @@ def test_dtype(self): cols = ["A", "B"] data = [(0.8, True), (0.9, None)] df = DataFrame(data, columns=cols) - assert df.to_sql("dtype_test", con=self.conn) == 2 - assert df.to_sql("dtype_test2", con=self.conn, dtype={"B": TEXT}) == 2 + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + assert df.to_sql("dtype_test", self.conn) == 2 + assert df.to_sql("dtype_test2", self.conn, dtype={"B": TEXT}) == 2 meta = MetaData() meta.reflect(bind=self.conn) sqltype = meta.tables["dtype_test2"].columns["B"].type assert isinstance(sqltype, TEXT) msg = "The type of B is not a SQLAlchemy type" - with pytest.raises(ValueError, match=msg): - df.to_sql("error", con=self.conn, dtype={"B": str}) + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + with pytest.raises(ValueError, match=msg): + df.to_sql("error", self.conn, dtype={"B": str}) # GH9083 - assert df.to_sql("dtype_test3", self.conn, dtype={"B": String(10)}) == 2 + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + assert df.to_sql("dtype_test3", self.conn, dtype={"B": String(10)}) == 2 meta.reflect(bind=self.conn) sqltype = meta.tables["dtype_test3"].columns["B"].type assert isinstance(sqltype, String) assert sqltype.length == 10 # single dtype - assert df.to_sql("single_dtype_test", con=self.conn, dtype=TEXT) == 2 + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + assert df.to_sql("single_dtype_test", self.conn, dtype=TEXT) == 2 meta.reflect(bind=self.conn) sqltypea = meta.tables["single_dtype_test"].columns["A"].type sqltypeb = meta.tables["single_dtype_test"].columns["B"].type @@ -2471,6 +2601,10 @@ def test_dtype(self): assert isinstance(sqltypeb, TEXT) def test_notna_dtype(self): + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) from sqlalchemy import ( Boolean, DateTime, @@ -2488,7 +2622,8 @@ def test_notna_dtype(self): df = DataFrame(cols) tbl = "notna_dtype_test" - assert df.to_sql(tbl, con=self.conn) == 2 + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + assert df.to_sql(tbl, self.conn) == 2 _ = sql.read_sql_table(tbl, self.conn) meta = MetaData() meta.reflect(bind=self.conn) @@ -2500,6 +2635,10 @@ def test_notna_dtype(self): assert isinstance(col_dict["Float"].type, Float) def test_double_precision(self): + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) from sqlalchemy import ( BigInteger, Float, @@ -2519,16 +2658,17 @@ def test_double_precision(self): } ) - assert ( - df.to_sql( - "test_dtypes", - con=self.conn, - index=False, - if_exists="replace", - dtype={"f64_as_f32": Float(precision=23)}, + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + assert ( + df.to_sql( + "test_dtypes", + self.conn, + index=False, + if_exists="replace", + dtype={"f64_as_f32": Float(precision=23)}, + ) + == 1 ) - == 1 - ) res = sql.read_sql_table("test_dtypes", self.conn) # check precision of float64 @@ -2549,6 +2689,11 @@ def test_connectable_issue_example(self): # https://github.com/pandas-dev/pandas/issues/10104 from sqlalchemy.engine import Engine + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) + def test_select(connection): query = "SELECT test_foo_data FROM test_foo_data" return sql.read_sql_query(query, con=connection) @@ -2570,12 +2715,13 @@ def main(connectable): else: test_connectable(connectable) - assert ( - DataFrame({"test_foo_data": [0, 1, 2]}).to_sql( - "test_foo_data", con=self.conn + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + assert ( + DataFrame({"test_foo_data": [0, 1, 2]}).to_sql( + "test_foo_data", self.conn + ) + == 3 ) - == 3 - ) main(self.conn) @pytest.mark.parametrize( @@ -2586,6 +2732,10 @@ def test_to_sql_with_negative_npinf(self, input, request): # GH 34431 df = DataFrame(input) + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) if self.flavor == "mysql": # GH 36465 @@ -2602,10 +2752,12 @@ def test_to_sql_with_negative_npinf(self, input, request): request.node.add_marker(mark) msg = "inf cannot be used with MySQL" - with pytest.raises(ValueError, match=msg): - df.to_sql("foobar", con=self.conn, index=False) + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + with pytest.raises(ValueError, match=msg): + df.to_sql("foobar", self.conn, index=False) else: - assert df.to_sql("foobar", con=self.conn, index=False) == 1 + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + assert df.to_sql("foobar", self.conn, index=False) == 1 res = sql.read_sql_table("foobar", self.conn) tm.assert_equal(df, res) @@ -2676,9 +2828,14 @@ def test_get_engine_auto_error_message(self): @pytest.mark.parametrize("func", ["read_sql", "read_sql_query"]) def test_read_sql_dtype_backend(self, string_storage, func, dtype_backend): # GH#50048 + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) table = "test" df = self.dtype_backend_data() - df.to_sql(table, con=self.conn, index=False, if_exists="replace") + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + df.to_sql(table, self.conn, index=False, if_exists="replace") with pd.option_context("mode.string_storage", string_storage): result = getattr(pd, func)( @@ -2703,7 +2860,12 @@ def test_read_sql_dtype_backend_table(self, string_storage, func, dtype_backend) # GH#50048 table = "test" df = self.dtype_backend_data() - df.to_sql(table, con=self.conn, index=False, if_exists="replace") + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + df.to_sql(table, self.conn, index=False, if_exists="replace") with pd.option_context("mode.string_storage", string_storage): result = getattr(pd, func)(table, self.conn, dtype_backend=dtype_backend) @@ -2725,7 +2887,12 @@ def test_read_sql_dtype_backend_table(self, string_storage, func, dtype_backend) def test_read_sql_invalid_dtype_backend_table(self, func): table = "test" df = self.dtype_backend_data() - df.to_sql(table, con=self.conn, index=False, if_exists="replace") + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + df.to_sql(table, self.conn, index=False, if_exists="replace") msg = ( "dtype_backend numpy is invalid, only 'numpy_nullable' and " @@ -2790,7 +2957,13 @@ def test_chunksize_empty_dtypes(self): dtypes = {"a": "int64", "b": "object"} df = DataFrame(columns=["a", "b"]).astype(dtypes) expected = df.copy() - df.to_sql("test", con=self.conn, index=False, if_exists="replace") + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) + + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + df.to_sql("test", self.conn, index=False, if_exists="replace") for result in read_sql_query( "SELECT * FROM test", @@ -2804,9 +2977,14 @@ def test_chunksize_empty_dtypes(self): @pytest.mark.parametrize("func", ["read_sql", "read_sql_query"]) def test_read_sql_dtype(self, func, dtype_backend): # GH#50797 + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) table = "test" df = DataFrame({"a": [1, 2, 3], "b": 5}) - df.to_sql(table, con=self.conn, index=False, if_exists="replace") + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + df.to_sql(table, self.conn, index=False, if_exists="replace") result = getattr(pd, func)( f"Select * from {table}", @@ -2866,8 +3044,13 @@ def test_default_date_load(self): def test_bigint_warning(self): # test no warning for BIGINT (to support int64) is raised (GH7433) + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) df = DataFrame({"a": [1, 2]}, dtype="int64") - assert df.to_sql("test_bigintwarning", con=self.conn, index=False) == 2 + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + assert df.to_sql("test_bigintwarning", self.conn, index=False) == 2 with tm.assert_produces_warning(None): sql.read_sql_table("test_bigintwarning", self.conn) @@ -2881,6 +3064,10 @@ def test_row_object_is_named_tuple(self): # GH 40682 # Test for the is_named_tuple() function # Placed here due to its usage of sqlalchemy + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) from sqlalchemy import ( Column, @@ -2904,10 +3091,13 @@ class Test(BaseModel): Session = sessionmaker(bind=self.conn) with Session() as session: df = DataFrame({"id": [0, 1], "string_column": ["hello", "world"]}) - assert ( - df.to_sql("test_frame", con=self.conn, index=False, if_exists="replace") - == 2 - ) + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + assert ( + df.to_sql( + "test_frame", con=self.conn, index=False, if_exists="replace" + ) + == 2 + ) session.commit() test_query = session.query(Test.id, Test.string_column) df = DataFrame(test_query) @@ -2999,19 +3189,15 @@ def test_schema_support(self): self.conn.exec_driver_sql("CREATE SCHEMA other;") # write dataframe to different schema's - assert df.to_sql("test_schema_public", con=self.conn, index=False) == 2 + assert df.to_sql("test_schema_public", self.conn, index=False) == 2 assert ( df.to_sql( - "test_schema_public_explicit", - con=self.conn, - index=False, - schema="public", + "test_schema_public_explicit", self.conn, index=False, schema="public" ) == 2 ) assert ( - df.to_sql("test_schema_other", con=self.conn, index=False, schema="other") - == 2 + df.to_sql("test_schema_other", self.conn, index=False, schema="other") == 2 ) # read dataframes back in @@ -3038,12 +3224,11 @@ def test_schema_support(self): # write dataframe with different if_exists options assert ( - df.to_sql("test_schema_other", con=self.conn, schema="other", index=False) - == 2 + df.to_sql("test_schema_other", self.conn, schema="other", index=False) == 2 ) df.to_sql( "test_schema_other", - con=self.conn, + self.conn, schema="other", index=False, if_exists="replace", @@ -3051,7 +3236,7 @@ def test_schema_support(self): assert ( df.to_sql( "test_schema_other", - con=self.conn, + self.conn, schema="other", index=False, if_exists="append", @@ -3168,9 +3353,14 @@ def test_execute_sql(self): self._execute_sql() def test_datetime_date(self): + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) # test support for datetime.date df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"]) - assert df.to_sql("test_date", con=self.conn, index=False) == 2 + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + assert df.to_sql("test_date", self.conn, index=False) == 2 res = read_sql_query("SELECT * FROM test_date", self.conn) if self.flavor == "sqlite": # comes back as strings @@ -3181,6 +3371,11 @@ def test_datetime_date(self): @pytest.mark.parametrize("tz_aware", [False, True]) def test_datetime_time(self, tz_aware): # test support for datetime.time, GH #8341 + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) + if not tz_aware: tz_times = [time(9, 0, 0), time(9, 1, 30)] else: @@ -3189,7 +3384,8 @@ def test_datetime_time(self, tz_aware): df = DataFrame(tz_times, columns=["a"]) - assert df.to_sql("test_time", con=self.conn, index=False) == 2 + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + assert df.to_sql("test_time", self.conn, index=False) == 2 res = read_sql_query("SELECT * FROM test_time", self.conn) if self.flavor == "sqlite": # comes back as strings @@ -3222,28 +3418,39 @@ def _get_sqlite_column_type(self, table, column): raise ValueError(f"Table {table}, column {column} not found") def test_dtype(self): + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) if self.flavor == "mysql": pytest.skip("Not applicable to MySQL legacy") cols = ["A", "B"] data = [(0.8, True), (0.9, None)] df = DataFrame(data, columns=cols) - assert df.to_sql("dtype_test", con=self.conn) == 2 - assert df.to_sql("dtype_test2", con=self.conn, dtype={"B": "STRING"}) == 2 + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + assert df.to_sql("dtype_test", self.conn) == 2 + assert df.to_sql("dtype_test2", self.conn, dtype={"B": "STRING"}) == 2 # sqlite stores Boolean values as INTEGER assert self._get_sqlite_column_type("dtype_test", "B") == "INTEGER" assert self._get_sqlite_column_type("dtype_test2", "B") == "STRING" msg = r"B \(\) not a string" - with pytest.raises(ValueError, match=msg): - df.to_sql("error", con=self.conn, dtype={"B": bool}) + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + with pytest.raises(ValueError, match=msg): + df.to_sql("error", self.conn, dtype={"B": bool}) # single dtype - assert df.to_sql("single_dtype_test", con=self.conn, dtype="STRING") == 2 + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + assert df.to_sql("single_dtype_test", self.conn, dtype="STRING") == 2 assert self._get_sqlite_column_type("single_dtype_test", "A") == "STRING" assert self._get_sqlite_column_type("single_dtype_test", "B") == "STRING" def test_notna_dtype(self): + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) if self.flavor == "mysql": pytest.skip("Not applicable to MySQL legacy") @@ -3256,7 +3463,8 @@ def test_notna_dtype(self): df = DataFrame(cols) tbl = "notna_dtype_test" - assert df.to_sql(tbl, con=self.conn) == 2 + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + assert df.to_sql(tbl, self.conn) == 2 assert self._get_sqlite_column_type(tbl, "Bool") == "INTEGER" assert self._get_sqlite_column_type(tbl, "Date") == "TIMESTAMP" @@ -3268,8 +3476,13 @@ def test_illegal_names(self): df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) msg = "Empty table or column name specified" - with pytest.raises(ValueError, match=msg): - df.to_sql("", con=self.conn) + sql_msg = ( + "Starting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + with pytest.raises(ValueError, match=msg): + df.to_sql("", self.conn) for ndx, weird_name in enumerate( [ @@ -3285,12 +3498,14 @@ def test_illegal_names(self): "\xe9", ] ): - assert df.to_sql(weird_name, con=self.conn) == 2 + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + assert df.to_sql(weird_name, self.conn) == 2 sql.table_exists(weird_name, self.conn) df2 = DataFrame([[1, 2], [3, 4]], columns=["a", weird_name]) c_tbl = f"test_weird_col_name{ndx:d}" - assert df2.to_sql(c_tbl, con=self.conn) == 2 + with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + assert df2.to_sql(c_tbl, self.conn) == 2 sql.table_exists(c_tbl, self.conn) From a8f9d0b597a5bc60c857f515bd0dd100bf610857 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Mon, 7 Aug 2023 21:24:31 -0400 Subject: [PATCH 04/13] Updated documentation and unit tests. --- doc/source/user_guide/io.rst | 10 +- pandas/tests/io/test_sql.py | 370 +++++++---------------------------- 2 files changed, 77 insertions(+), 303 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 006ab5c49e24c..02d0c85327e2f 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -5651,7 +5651,7 @@ the database using :func:`~pandas.DataFrame.to_sql`. data = pd.DataFrame(d, columns=c) data - data.to_sql("data", engine) + data.to_sql(name="data", con=engine) With some databases, writing large DataFrames can result in errors due to packet size limitations being exceeded. This can be avoided by setting the @@ -5660,7 +5660,7 @@ writes ``data`` to the database in batches of 1000 rows at a time: .. ipython:: python - data.to_sql("data_chunked", engine, chunksize=1000) + data.to_sql(name="data_chunked", con=engine, chunksize=1000) SQL data types ++++++++++++++ @@ -5680,7 +5680,7 @@ default ``Text`` type for string columns: from sqlalchemy.types import String - data.to_sql("data_dtype", engine, dtype={"Col_1": String}) + data.to_sql(name="data_dtype", con=engine, dtype={"Col_1": String}) .. note:: @@ -5849,7 +5849,7 @@ have schema's). For example: .. code-block:: python - df.to_sql("table", engine, schema="other_schema") + df.to_sql(name="table", con=engine, schema="other_schema") pd.read_sql_table("table", engine, schema="other_schema") Querying @@ -5876,7 +5876,7 @@ Specifying this will return an iterator through chunks of the query result: .. ipython:: python df = pd.DataFrame(np.random.randn(20, 3), columns=list("abc")) - df.to_sql("data_chunks", engine, index=False) + df.to_sql(name="data_chunks", con=engine, index=False) .. ipython:: python diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 209779bfaabf5..e2d985bdf1386 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -28,7 +28,6 @@ ) from io import StringIO from pathlib import Path -import re import sqlite3 import uuid @@ -549,15 +548,9 @@ def sqlite_buildin_iris(sqlite_buildin, iris_path): def test_dataframe_to_sql(conn, test_frame1, request): # GH 51086 if conn is sqlite_engine conn = request.getfixturevalue(conn) - msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) - with tm.assert_produces_warning(FutureWarning, match=re.escape(msg)): - test_frame1.to_sql("test", conn, if_exists="append", index=False) + test_frame1.to_sql("test", conn, if_exists="append", index=False) -@pytest.mark.skip @pytest.mark.db @pytest.mark.parametrize("conn", all_connectable) def test_dataframe_to_sql_arrow_dtypes(conn, request): @@ -583,10 +576,6 @@ def test_dataframe_to_sql_arrow_dtypes(conn, request): @pytest.mark.parametrize("conn", all_connectable) def test_dataframe_to_sql_arrow_dtypes_missing(conn, request, nulls_fixture): # GH 52046 - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) pytest.importorskip("pyarrow") df = DataFrame( { @@ -596,8 +585,7 @@ def test_dataframe_to_sql_arrow_dtypes_missing(conn, request, nulls_fixture): } ) conn = request.getfixturevalue(conn) - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - df.to_sql("test_arrow", conn, if_exists="replace", index=False) + df.to_sql("test_arrow", conn, if_exists="replace", index=False) @pytest.mark.db @@ -1447,11 +1435,9 @@ def test_date_and_index(self): assert issubclass(df.index.dtype.type, np.datetime64) assert issubclass(df.IntDateCol.dtype.type, np.datetime64) - @pytest.mark.skip def test_timedelta(self): # see #6921 df = to_timedelta(Series(["00:00:01", "00:00:03"], name="foo")).to_frame() - with tm.assert_produces_warning(UserWarning): result_count = df.to_sql("test_timedelta", self.conn) assert result_count == 2 @@ -1461,13 +1447,8 @@ def test_timedelta(self): def test_complex_raises(self): df = DataFrame({"a": [1 + 1j, 2j]}) msg = "Complex datatypes not supported" - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - assert df.to_sql("test_complex", self.conn) is None + assert df.to_sql("test_complex", self.conn) is None @pytest.mark.parametrize( "index_name,index_label,expected", @@ -1552,17 +1533,13 @@ def test_to_sql_index_label_multiindex(self): ) def test_multiindex_roundtrip(self): - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) df = DataFrame.from_records( [(1, 2.1, "line1"), (2, 1.5, "line2")], columns=["A", "B", "C"], index=["A", "B"], ) - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - df.to_sql("test_multiindex_roundtrip", self.conn) + + df.to_sql("test_multiindex_roundtrip", self.conn) result = sql.read_sql_query( "SELECT * FROM test_multiindex_roundtrip", self.conn, index_col=["A", "B"] ) @@ -1580,12 +1557,7 @@ def test_multiindex_roundtrip(self): def test_dtype_argument(self, dtype): # GH10285 Add dtype argument to read_sql_query df = DataFrame([[1.2, 3.4], [5.6, 7.8]], columns=["A", "B"]) - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - assert df.to_sql("test_dtype_argument", self.conn) == 2 + assert df.to_sql("test_dtype_argument", self.conn) == 2 expected = df.astype(dtype) result = sql.read_sql_query( @@ -1637,12 +1609,7 @@ def test_chunksize_read(self): df = DataFrame( np.random.default_rng(2).standard_normal((22, 5)), columns=list("abcde") ) - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - df.to_sql("test_chunksize", self.conn, index=False) + df.to_sql("test_chunksize", self.conn, index=False) # reading the query in one time res1 = sql.read_sql_query("select * from test_chunksize", self.conn) @@ -1683,14 +1650,10 @@ def test_categorical(self): "person_name": ["John P. Doe", "Jane Dove", "John P. Doe"], } ) - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) df2 = df.copy() df2["person_name"] = df2["person_name"].astype("category") - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - df2.to_sql("test_categorical", self.conn, index=False) + + df2.to_sql("test_categorical", self.conn, index=False) res = sql.read_sql_query("SELECT * FROM test_categorical", self.conn) tm.assert_frame_equal(res, df) @@ -1698,22 +1661,12 @@ def test_categorical(self): def test_unicode_column_name(self): # GH 11431 df = DataFrame([[1, 2], [3, 4]], columns=["\xe9", "b"]) - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - df.to_sql("test_unicode", self.conn, index=False) + df.to_sql("test_unicode", self.conn, index=False) def test_escaped_table_name(self): # GH 13206 df = DataFrame({"A": [0, 1, 2], "B": [0.2, np.nan, 5.6]}) - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - df.to_sql("d1187b08-4943-4c8d-a7f6", self.conn, index=False) + df.to_sql("d1187b08-4943-4c8d-a7f6", self.conn, index=False) res = sql.read_sql_query("SELECT * FROM `d1187b08-4943-4c8d-a7f6`", self.conn) @@ -1721,13 +1674,8 @@ def test_escaped_table_name(self): def test_read_sql_duplicate_columns(self): # GH#53117 - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": 1}) - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - df.to_sql("test_table", self.conn, index=False) + df.to_sql("test_table", self.conn, index=False) result = pd.read_sql("SELECT a, b, a +1 as a, c FROM test_table;", self.conn) expected = DataFrame( @@ -1811,10 +1759,6 @@ def test_not_reflect_all_tables(self): def test_warning_case_insensitive_table_name(self, test_frame1): # see gh-7815 - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) with tm.assert_produces_warning( UserWarning, match=( @@ -1826,7 +1770,7 @@ def test_warning_case_insensitive_table_name(self, test_frame1): sql.SQLDatabase(self.conn).check_case_sensitive("TABLE1", "") # Test that the warning is certainly NOT triggered in a normal case. - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): + with tm.assert_produces_warning(None): test_frame1.to_sql("CaseSensitive", self.conn) def _get_index_columns(self, tbl_name): @@ -1893,15 +1837,10 @@ def test_database_uri_string(self, test_frame1): # db_uri = 'sqlite:///:memory:' # raises # sqlalchemy.exc.OperationalError: (sqlite3.OperationalError) near # "iris": syntax error [SQL: 'iris'] - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) with tm.ensure_clean() as name: db_uri = "sqlite:///" + name table = "iris" - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - test_frame1.to_sql(table, db_uri, if_exists="replace", index=False) + test_frame1.to_sql(table, db_uri, if_exists="replace", index=False) test_frame2 = sql.read_sql(table, db_uri) test_frame3 = sql.read_sql_table(table, db_uri) query = "SELECT * FROM iris" @@ -1943,12 +1882,7 @@ def test_query_by_select_obj(self): def test_column_with_percentage(self): # GH 37157 df = DataFrame({"A": [0, 1, 2], "%_variation": [3, 4, 5]}) - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - df.to_sql("test_column_percentage", self.conn, index=False) + df.to_sql("test_column_percentage", self.conn, index=False) res = sql.read_sql_table("test_column_percentage", self.conn) @@ -2160,12 +2094,7 @@ def test_default_type_conversion(self): def test_bigint(self): # int64 should be converted to BigInteger, GH7433 df = DataFrame(data={"i64": [2**62]}) - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - assert df.to_sql("test_bigint", self.conn, index=False) == 1 + assert df.to_sql("test_bigint", self.conn, index=False) == 1 result = sql.read_sql_table("test_bigint", self.conn) tm.assert_frame_equal(df, result) @@ -2261,15 +2190,10 @@ def test_datetime_with_timezone_roundtrip(self): # Write datetimetz data to a db and read it back # For dbs that support timestamps with timezones, should get back UTC # otherwise naive data should be returned - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) expected = DataFrame( {"A": date_range("2013-01-01 09:00:00", periods=3, tz="US/Pacific")} ) - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - assert expected.to_sql("test_datetime_tz", self.conn, index=False) == 3 + assert expected.to_sql("test_datetime_tz", self.conn, index=False) == 3 if self.flavor == "postgresql": # SQLAlchemy "timezones" (i.e. offsets) are coerced to UTC @@ -2290,13 +2214,8 @@ def test_datetime_with_timezone_roundtrip(self): def test_out_of_bounds_datetime(self): # GH 26761 - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) data = DataFrame({"date": datetime(9999, 1, 1)}, index=[0]) - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - assert data.to_sql("test_datetime_obb", self.conn, index=False) == 1 + assert data.to_sql("test_datetime_obb", self.conn, index=False) == 1 result = sql.read_sql_table("test_datetime_obb", self.conn) expected = DataFrame([pd.NaT], columns=["date"]) tm.assert_frame_equal(result, expected) @@ -2304,14 +2223,9 @@ def test_out_of_bounds_datetime(self): def test_naive_datetimeindex_roundtrip(self): # GH 23510 # Ensure that a naive DatetimeIndex isn't converted to UTC - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) dates = date_range("2018-01-01", periods=5, freq="6H")._with_freq(None) expected = DataFrame({"nums": range(5)}, index=dates) - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - assert expected.to_sql("foo_table", self.conn, index_label="info_date") == 5 + assert expected.to_sql("foo_table", self.conn, index_label="info_date") == 5 result = sql.read_sql_table("foo_table", self.conn, index_col="info_date") # result index with gain a name from a set_index operation; expected tm.assert_frame_equal(result, expected, check_names=False) @@ -2352,12 +2266,7 @@ def test_datetime(self): df = DataFrame( {"A": date_range("2013-01-01 09:00:00", periods=3), "B": np.arange(3.0)} ) - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - assert df.to_sql("test_datetime", self.conn) == 3 + assert df.to_sql("test_datetime", self.conn) == 3 # with read_table -> type information from schema used result = sql.read_sql_table("test_datetime", self.conn) @@ -2378,13 +2287,8 @@ def test_datetime_NaT(self): df = DataFrame( {"A": date_range("2013-01-01 09:00:00", periods=3), "B": np.arange(3.0)} ) - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) df.loc[1, "A"] = np.nan - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - assert df.to_sql("test_datetime", self.conn, index=False) == 3 + assert df.to_sql("test_datetime", self.conn, index=False) == 3 # with read_table -> type information from schema used result = sql.read_sql_table("test_datetime", self.conn) @@ -2402,12 +2306,7 @@ def test_datetime_NaT(self): def test_datetime_date(self): # test support for datetime.date df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"]) - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - assert df.to_sql("test_date", self.conn, index=False) == 2 + assert df.to_sql("test_date", self.conn, index=False) == 2 res = read_sql_table("test_date", self.conn) result = res["a"] expected = to_datetime(df["a"]) @@ -2415,14 +2314,9 @@ def test_datetime_date(self): tm.assert_series_equal(result, expected) def test_datetime_time(self, sqlite_buildin): - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) # test support for datetime.time df = DataFrame([time(9, 0, 0), time(9, 1, 30)], columns=["a"]) - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - assert df.to_sql("test_time", self.conn, index=False) == 2 + assert df.to_sql("test_time", self.conn, index=False) == 2 res = read_sql_table("test_time", self.conn) tm.assert_frame_equal(res, df) @@ -2447,14 +2341,9 @@ def test_mixed_dtype_insert(self): s1 = Series(2**25 + 1, dtype=np.int32) s2 = Series(0.0, dtype=np.float32) df = DataFrame({"s1": s1, "s2": s2}) - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) # write and read again - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - assert df.to_sql("test_read_write", self.conn, index=False) == 1 + assert df.to_sql("test_read_write", self.conn, index=False) == 1 df2 = sql.read_sql_table("test_read_write", self.conn) tm.assert_frame_equal(df, df2, check_dtype=False, check_exact=True) @@ -2462,12 +2351,7 @@ def test_mixed_dtype_insert(self): def test_nan_numeric(self): # NaNs in numeric float column df = DataFrame({"A": [0, 1, 2], "B": [0.2, np.nan, 5.6]}) - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - assert df.to_sql("test_nan", self.conn, index=False) == 3 + assert df.to_sql("test_nan", self.conn, index=False) == 3 # with read_table result = sql.read_sql_table("test_nan", self.conn) @@ -2479,13 +2363,8 @@ def test_nan_numeric(self): def test_nan_fullcolumn(self): # full NaN column (numeric float column) - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) df = DataFrame({"A": [0, 1, 2], "B": [np.nan, np.nan, np.nan]}) - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - assert df.to_sql("test_nan", self.conn, index=False) == 3 + assert df.to_sql("test_nan", self.conn, index=False) == 3 # with read_table result = sql.read_sql_table("test_nan", self.conn) @@ -2499,13 +2378,8 @@ def test_nan_fullcolumn(self): def test_nan_string(self): # NaNs in string column - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) df = DataFrame({"A": [0, 1, 2], "B": ["a", "b", np.nan]}) - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - assert df.to_sql("test_nan", self.conn, index=False) == 3 + assert df.to_sql("test_nan", self.conn, index=False) == 3 # NaNs are coming back as None df.loc[2, "B"] = None @@ -2558,10 +2432,6 @@ def test_get_schema_create_table(self, test_frame3): self.drop_table(tbl, self.conn) def test_dtype(self): - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) from sqlalchemy import ( TEXT, String, @@ -2571,29 +2441,25 @@ def test_dtype(self): cols = ["A", "B"] data = [(0.8, True), (0.9, None)] df = DataFrame(data, columns=cols) - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - assert df.to_sql("dtype_test", self.conn) == 2 - assert df.to_sql("dtype_test2", self.conn, dtype={"B": TEXT}) == 2 + assert df.to_sql("dtype_test", self.conn) == 2 + assert df.to_sql("dtype_test2", self.conn, dtype={"B": TEXT}) == 2 meta = MetaData() meta.reflect(bind=self.conn) sqltype = meta.tables["dtype_test2"].columns["B"].type assert isinstance(sqltype, TEXT) msg = "The type of B is not a SQLAlchemy type" - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - with pytest.raises(ValueError, match=msg): - df.to_sql("error", self.conn, dtype={"B": str}) + with pytest.raises(ValueError, match=msg): + df.to_sql("error", self.conn, dtype={"B": str}) # GH9083 - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - assert df.to_sql("dtype_test3", self.conn, dtype={"B": String(10)}) == 2 + assert df.to_sql("dtype_test3", self.conn, dtype={"B": String(10)}) == 2 meta.reflect(bind=self.conn) sqltype = meta.tables["dtype_test3"].columns["B"].type assert isinstance(sqltype, String) assert sqltype.length == 10 # single dtype - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - assert df.to_sql("single_dtype_test", self.conn, dtype=TEXT) == 2 + assert df.to_sql("single_dtype_test", self.conn, dtype=TEXT) == 2 meta.reflect(bind=self.conn) sqltypea = meta.tables["single_dtype_test"].columns["A"].type sqltypeb = meta.tables["single_dtype_test"].columns["B"].type @@ -2601,10 +2467,6 @@ def test_dtype(self): assert isinstance(sqltypeb, TEXT) def test_notna_dtype(self): - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) from sqlalchemy import ( Boolean, DateTime, @@ -2622,8 +2484,7 @@ def test_notna_dtype(self): df = DataFrame(cols) tbl = "notna_dtype_test" - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - assert df.to_sql(tbl, self.conn) == 2 + assert df.to_sql(tbl, self.conn) == 2 _ = sql.read_sql_table(tbl, self.conn) meta = MetaData() meta.reflect(bind=self.conn) @@ -2635,10 +2496,6 @@ def test_notna_dtype(self): assert isinstance(col_dict["Float"].type, Float) def test_double_precision(self): - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) from sqlalchemy import ( BigInteger, Float, @@ -2658,17 +2515,16 @@ def test_double_precision(self): } ) - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - assert ( - df.to_sql( - "test_dtypes", - self.conn, - index=False, - if_exists="replace", - dtype={"f64_as_f32": Float(precision=23)}, - ) - == 1 + assert ( + df.to_sql( + "test_dtypes", + self.conn, + index=False, + if_exists="replace", + dtype={"f64_as_f32": Float(precision=23)}, ) + == 1 + ) res = sql.read_sql_table("test_dtypes", self.conn) # check precision of float64 @@ -2689,11 +2545,6 @@ def test_connectable_issue_example(self): # https://github.com/pandas-dev/pandas/issues/10104 from sqlalchemy.engine import Engine - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) - def test_select(connection): query = "SELECT test_foo_data FROM test_foo_data" return sql.read_sql_query(query, con=connection) @@ -2715,13 +2566,10 @@ def main(connectable): else: test_connectable(connectable) - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - assert ( - DataFrame({"test_foo_data": [0, 1, 2]}).to_sql( - "test_foo_data", self.conn - ) - == 3 - ) + assert ( + DataFrame({"test_foo_data": [0, 1, 2]}).to_sql("test_foo_data", self.conn) + == 3 + ) main(self.conn) @pytest.mark.parametrize( @@ -2732,10 +2580,6 @@ def test_to_sql_with_negative_npinf(self, input, request): # GH 34431 df = DataFrame(input) - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) if self.flavor == "mysql": # GH 36465 @@ -2752,12 +2596,10 @@ def test_to_sql_with_negative_npinf(self, input, request): request.node.add_marker(mark) msg = "inf cannot be used with MySQL" - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - with pytest.raises(ValueError, match=msg): - df.to_sql("foobar", self.conn, index=False) + with pytest.raises(ValueError, match=msg): + df.to_sql("foobar", self.conn, index=False) else: - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - assert df.to_sql("foobar", self.conn, index=False) == 1 + assert df.to_sql("foobar", self.conn, index=False) == 1 res = sql.read_sql_table("foobar", self.conn) tm.assert_equal(df, res) @@ -2828,14 +2670,9 @@ def test_get_engine_auto_error_message(self): @pytest.mark.parametrize("func", ["read_sql", "read_sql_query"]) def test_read_sql_dtype_backend(self, string_storage, func, dtype_backend): # GH#50048 - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) table = "test" df = self.dtype_backend_data() - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - df.to_sql(table, self.conn, index=False, if_exists="replace") + df.to_sql(table, self.conn, index=False, if_exists="replace") with pd.option_context("mode.string_storage", string_storage): result = getattr(pd, func)( @@ -2860,12 +2697,7 @@ def test_read_sql_dtype_backend_table(self, string_storage, func, dtype_backend) # GH#50048 table = "test" df = self.dtype_backend_data() - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - df.to_sql(table, self.conn, index=False, if_exists="replace") + df.to_sql(table, self.conn, index=False, if_exists="replace") with pd.option_context("mode.string_storage", string_storage): result = getattr(pd, func)(table, self.conn, dtype_backend=dtype_backend) @@ -2887,12 +2719,7 @@ def test_read_sql_dtype_backend_table(self, string_storage, func, dtype_backend) def test_read_sql_invalid_dtype_backend_table(self, func): table = "test" df = self.dtype_backend_data() - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - df.to_sql(table, self.conn, index=False, if_exists="replace") + df.to_sql(table, self.conn, index=False, if_exists="replace") msg = ( "dtype_backend numpy is invalid, only 'numpy_nullable' and " @@ -2957,13 +2784,7 @@ def test_chunksize_empty_dtypes(self): dtypes = {"a": "int64", "b": "object"} df = DataFrame(columns=["a", "b"]).astype(dtypes) expected = df.copy() - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) - - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - df.to_sql("test", self.conn, index=False, if_exists="replace") + df.to_sql("test", self.conn, index=False, if_exists="replace") for result in read_sql_query( "SELECT * FROM test", @@ -2977,14 +2798,9 @@ def test_chunksize_empty_dtypes(self): @pytest.mark.parametrize("func", ["read_sql", "read_sql_query"]) def test_read_sql_dtype(self, func, dtype_backend): # GH#50797 - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) table = "test" df = DataFrame({"a": [1, 2, 3], "b": 5}) - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - df.to_sql(table, self.conn, index=False, if_exists="replace") + df.to_sql(table, self.conn, index=False, if_exists="replace") result = getattr(pd, func)( f"Select * from {table}", @@ -3044,13 +2860,8 @@ def test_default_date_load(self): def test_bigint_warning(self): # test no warning for BIGINT (to support int64) is raised (GH7433) - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) df = DataFrame({"a": [1, 2]}, dtype="int64") - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - assert df.to_sql("test_bigintwarning", self.conn, index=False) == 2 + assert df.to_sql("test_bigintwarning", self.conn, index=False) == 2 with tm.assert_produces_warning(None): sql.read_sql_table("test_bigintwarning", self.conn) @@ -3064,10 +2875,6 @@ def test_row_object_is_named_tuple(self): # GH 40682 # Test for the is_named_tuple() function # Placed here due to its usage of sqlalchemy - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) from sqlalchemy import ( Column, @@ -3091,13 +2898,10 @@ class Test(BaseModel): Session = sessionmaker(bind=self.conn) with Session() as session: df = DataFrame({"id": [0, 1], "string_column": ["hello", "world"]}) - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - assert ( - df.to_sql( - "test_frame", con=self.conn, index=False, if_exists="replace" - ) - == 2 - ) + assert ( + df.to_sql("test_frame", con=self.conn, index=False, if_exists="replace") + == 2 + ) session.commit() test_query = session.query(Test.id, Test.string_column) df = DataFrame(test_query) @@ -3353,14 +3157,9 @@ def test_execute_sql(self): self._execute_sql() def test_datetime_date(self): - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) # test support for datetime.date df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"]) - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - assert df.to_sql("test_date", self.conn, index=False) == 2 + assert df.to_sql("test_date", self.conn, index=False) == 2 res = read_sql_query("SELECT * FROM test_date", self.conn) if self.flavor == "sqlite": # comes back as strings @@ -3371,11 +3170,6 @@ def test_datetime_date(self): @pytest.mark.parametrize("tz_aware", [False, True]) def test_datetime_time(self, tz_aware): # test support for datetime.time, GH #8341 - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) - if not tz_aware: tz_times = [time(9, 0, 0), time(9, 1, 30)] else: @@ -3384,8 +3178,7 @@ def test_datetime_time(self, tz_aware): df = DataFrame(tz_times, columns=["a"]) - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - assert df.to_sql("test_time", self.conn, index=False) == 2 + assert df.to_sql("test_time", self.conn, index=False) == 2 res = read_sql_query("SELECT * FROM test_time", self.conn) if self.flavor == "sqlite": # comes back as strings @@ -3418,39 +3211,28 @@ def _get_sqlite_column_type(self, table, column): raise ValueError(f"Table {table}, column {column} not found") def test_dtype(self): - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) if self.flavor == "mysql": pytest.skip("Not applicable to MySQL legacy") cols = ["A", "B"] data = [(0.8, True), (0.9, None)] df = DataFrame(data, columns=cols) - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - assert df.to_sql("dtype_test", self.conn) == 2 - assert df.to_sql("dtype_test2", self.conn, dtype={"B": "STRING"}) == 2 + assert df.to_sql("dtype_test", self.conn) == 2 + assert df.to_sql("dtype_test2", self.conn, dtype={"B": "STRING"}) == 2 # sqlite stores Boolean values as INTEGER assert self._get_sqlite_column_type("dtype_test", "B") == "INTEGER" assert self._get_sqlite_column_type("dtype_test2", "B") == "STRING" msg = r"B \(\) not a string" - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - with pytest.raises(ValueError, match=msg): - df.to_sql("error", self.conn, dtype={"B": bool}) + with pytest.raises(ValueError, match=msg): + df.to_sql("error", self.conn, dtype={"B": bool}) # single dtype - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - assert df.to_sql("single_dtype_test", self.conn, dtype="STRING") == 2 + assert df.to_sql("single_dtype_test", self.conn, dtype="STRING") == 2 assert self._get_sqlite_column_type("single_dtype_test", "A") == "STRING" assert self._get_sqlite_column_type("single_dtype_test", "B") == "STRING" def test_notna_dtype(self): - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) if self.flavor == "mysql": pytest.skip("Not applicable to MySQL legacy") @@ -3463,8 +3245,7 @@ def test_notna_dtype(self): df = DataFrame(cols) tbl = "notna_dtype_test" - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - assert df.to_sql(tbl, self.conn) == 2 + assert df.to_sql(tbl, self.conn) == 2 assert self._get_sqlite_column_type(tbl, "Bool") == "INTEGER" assert self._get_sqlite_column_type(tbl, "Date") == "TIMESTAMP" @@ -3476,13 +3257,8 @@ def test_illegal_names(self): df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) msg = "Empty table or column name specified" - sql_msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "argument 'name' will be keyword-only." - ) - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - with pytest.raises(ValueError, match=msg): - df.to_sql("", self.conn) + with pytest.raises(ValueError, match=msg): + df.to_sql("", self.conn) for ndx, weird_name in enumerate( [ @@ -3498,14 +3274,12 @@ def test_illegal_names(self): "\xe9", ] ): - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - assert df.to_sql(weird_name, self.conn) == 2 + assert df.to_sql(weird_name, self.conn) == 2 sql.table_exists(weird_name, self.conn) df2 = DataFrame([[1, 2], [3, 4]], columns=["a", weird_name]) c_tbl = f"test_weird_col_name{ndx:d}" - with tm.assert_produces_warning(FutureWarning, match=re.escape(sql_msg)): - assert df2.to_sql(c_tbl, self.conn) == 2 + assert df2.to_sql(c_tbl, self.conn) == 2 sql.table_exists(c_tbl, self.conn) From e8862d31e4aadb80bc2ac116cc21474c3de7224a Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Mon, 7 Aug 2023 22:13:29 -0400 Subject: [PATCH 05/13] Updating documentation and fixing unit tests. --- doc/source/whatsnew/v0.14.0.rst | 2 +- pandas/tests/io/test_sql.py | 150 +++++++++++++++++--------------- 2 files changed, 83 insertions(+), 69 deletions(-) diff --git a/doc/source/whatsnew/v0.14.0.rst b/doc/source/whatsnew/v0.14.0.rst index f33ab3911f231..92c37243b7e81 100644 --- a/doc/source/whatsnew/v0.14.0.rst +++ b/doc/source/whatsnew/v0.14.0.rst @@ -437,7 +437,7 @@ This ``engine`` can then be used to write or read data to/from this database: .. ipython:: python df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'c']}) - df.to_sql('db_table', engine, index=False) + df.to_sql(name='db_table', con=engine, index=False) You can read data from a database by specifying the table name: diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index e2d985bdf1386..0df8621351715 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -548,7 +548,7 @@ def sqlite_buildin_iris(sqlite_buildin, iris_path): def test_dataframe_to_sql(conn, test_frame1, request): # GH 51086 if conn is sqlite_engine conn = request.getfixturevalue(conn) - test_frame1.to_sql("test", conn, if_exists="append", index=False) + test_frame1.to_sql(name="test", con=conn, if_exists="append", index=False) @pytest.mark.db @@ -569,7 +569,7 @@ def test_dataframe_to_sql_arrow_dtypes(conn, request): ) conn = request.getfixturevalue(conn) with tm.assert_produces_warning(UserWarning, match="the 'timedelta'"): - df.to_sql("test_arrow", conn, if_exists="replace", index=False) + df.to_sql(name="test_arrow", con=conn, if_exists="replace", index=False) @pytest.mark.db @@ -585,7 +585,7 @@ def test_dataframe_to_sql_arrow_dtypes_missing(conn, request, nulls_fixture): } ) conn = request.getfixturevalue(conn) - df.to_sql("test_arrow", conn, if_exists="replace", index=False) + df.to_sql(name="test_arrow", con=conn, if_exists="replace", index=False) @pytest.mark.db @@ -756,7 +756,7 @@ def test_read_procedure(conn, request): from sqlalchemy.engine import Engine df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) - df.to_sql("test_frame", conn, index=False) + df.to_sql("test_frame", con=conn, index=False) proc = """DROP PROCEDURE IF EXISTS get_testdb; @@ -811,7 +811,7 @@ def psql_insert_copy(table, conn, keys, data_iter): conn = request.getfixturevalue(conn) expected = DataFrame({"col1": [1, 2], "col2": [0.1, 0.2], "col3": ["a", "n"]}) result_count = expected.to_sql( - "test_frame", conn, index=False, method=psql_insert_copy + "test_frame", con=conn, index=False, method=psql_insert_copy ) # GH 46891 if expected_count is None: @@ -860,12 +860,12 @@ def insert_on_conflict(table, conn, keys, data_iter): conn.execute(create_sql) expected = DataFrame([[1, 2.1, "a"]], columns=list("abc")) - expected.to_sql("test_insert_conflict", conn, if_exists="append", index=False) + expected.to_sql("test_insert_conflict", con=conn, if_exists="append", index=False) df_insert = DataFrame([[1, 3.2, "b"]], columns=list("abc")) inserted = df_insert.to_sql( "test_insert_conflict", - conn, + con=conn, index=False, if_exists="append", method=insert_on_conflict, @@ -914,12 +914,12 @@ def insert_on_conflict(table, conn, keys, data_iter): conn.execute(create_sql) df = DataFrame([[1, 2.1, "a"]], columns=list("abc")) - df.to_sql("test_insert_conflict", conn, if_exists="append", index=False) + df.to_sql("test_insert_conflict", con=conn, if_exists="append", index=False) expected = DataFrame([[1, 3.2, "b"]], columns=list("abc")) inserted = expected.to_sql( "test_insert_conflict", - conn, + con=conn, index=False, if_exists="append", method=insert_on_conflict, @@ -1439,7 +1439,7 @@ def test_timedelta(self): # see #6921 df = to_timedelta(Series(["00:00:01", "00:00:03"], name="foo")).to_frame() with tm.assert_produces_warning(UserWarning): - result_count = df.to_sql("test_timedelta", self.conn) + result_count = df.to_sql(name="test_timedelta", con=self.conn) assert result_count == 2 result = sql.read_sql_query("SELECT * FROM test_timedelta", self.conn) tm.assert_series_equal(result["foo"], df["foo"].view("int64")) @@ -1448,7 +1448,7 @@ def test_complex_raises(self): df = DataFrame({"a": [1 + 1j, 2j]}) msg = "Complex datatypes not supported" with pytest.raises(ValueError, match=msg): - assert df.to_sql("test_complex", self.conn) is None + assert df.to_sql(name="test_complex", con=self.conn) is None @pytest.mark.parametrize( "index_name,index_label,expected", @@ -1539,7 +1539,7 @@ def test_multiindex_roundtrip(self): index=["A", "B"], ) - df.to_sql("test_multiindex_roundtrip", self.conn) + df.to_sql(name="test_multiindex_roundtrip", con=self.conn) result = sql.read_sql_query( "SELECT * FROM test_multiindex_roundtrip", self.conn, index_col=["A", "B"] ) @@ -1557,7 +1557,7 @@ def test_multiindex_roundtrip(self): def test_dtype_argument(self, dtype): # GH10285 Add dtype argument to read_sql_query df = DataFrame([[1.2, 3.4], [5.6, 7.8]], columns=["A", "B"]) - assert df.to_sql("test_dtype_argument", self.conn) == 2 + assert df.to_sql(name="test_dtype_argument", con=self.conn) == 2 expected = df.astype(dtype) result = sql.read_sql_query( @@ -1609,7 +1609,7 @@ def test_chunksize_read(self): df = DataFrame( np.random.default_rng(2).standard_normal((22, 5)), columns=list("abcde") ) - df.to_sql("test_chunksize", self.conn, index=False) + df.to_sql(name="test_chunksize", con=self.conn, index=False) # reading the query in one time res1 = sql.read_sql_query("select * from test_chunksize", self.conn) @@ -1653,7 +1653,7 @@ def test_categorical(self): df2 = df.copy() df2["person_name"] = df2["person_name"].astype("category") - df2.to_sql("test_categorical", self.conn, index=False) + df2.to_sql(name="test_categorical", con=self.conn, index=False) res = sql.read_sql_query("SELECT * FROM test_categorical", self.conn) tm.assert_frame_equal(res, df) @@ -1661,12 +1661,12 @@ def test_categorical(self): def test_unicode_column_name(self): # GH 11431 df = DataFrame([[1, 2], [3, 4]], columns=["\xe9", "b"]) - df.to_sql("test_unicode", self.conn, index=False) + df.to_sql(name="test_unicode", con=self.conn, index=False) def test_escaped_table_name(self): # GH 13206 df = DataFrame({"A": [0, 1, 2], "B": [0.2, np.nan, 5.6]}) - df.to_sql("d1187b08-4943-4c8d-a7f6", self.conn, index=False) + df.to_sql(name="d1187b08-4943-4c8d-a7f6", con=self.conn, index=False) res = sql.read_sql_query("SELECT * FROM `d1187b08-4943-4c8d-a7f6`", self.conn) @@ -1675,7 +1675,7 @@ def test_escaped_table_name(self): def test_read_sql_duplicate_columns(self): # GH#53117 df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": 1}) - df.to_sql("test_table", self.conn, index=False) + df.to_sql(name="test_table", con=self.conn, index=False) result = pd.read_sql("SELECT a, b, a +1 as a, c FROM test_table;", self.conn) expected = DataFrame( @@ -1771,7 +1771,7 @@ def test_warning_case_insensitive_table_name(self, test_frame1): # Test that the warning is certainly NOT triggered in a normal case. with tm.assert_produces_warning(None): - test_frame1.to_sql("CaseSensitive", self.conn) + test_frame1.to_sql(name="CaseSensitive", con=self.conn) def _get_index_columns(self, tbl_name): from sqlalchemy.engine import reflection @@ -1840,7 +1840,7 @@ def test_database_uri_string(self, test_frame1): with tm.ensure_clean() as name: db_uri = "sqlite:///" + name table = "iris" - test_frame1.to_sql(table, db_uri, if_exists="replace", index=False) + test_frame1.to_sql(name=table, con=db_uri, if_exists="replace", index=False) test_frame2 = sql.read_sql(table, db_uri) test_frame3 = sql.read_sql_table(table, db_uri) query = "SELECT * FROM iris" @@ -1882,7 +1882,7 @@ def test_query_by_select_obj(self): def test_column_with_percentage(self): # GH 37157 df = DataFrame({"A": [0, 1, 2], "%_variation": [3, 4, 5]}) - df.to_sql("test_column_percentage", self.conn, index=False) + df.to_sql(name="test_column_percentage", con=self.conn, index=False) res = sql.read_sql_table("test_column_percentage", self.conn) @@ -2094,7 +2094,7 @@ def test_default_type_conversion(self): def test_bigint(self): # int64 should be converted to BigInteger, GH7433 df = DataFrame(data={"i64": [2**62]}) - assert df.to_sql("test_bigint", self.conn, index=False) == 1 + assert df.to_sql(name="test_bigint", con=self.conn, index=False) == 1 result = sql.read_sql_table("test_bigint", self.conn) tm.assert_frame_equal(df, result) @@ -2193,7 +2193,7 @@ def test_datetime_with_timezone_roundtrip(self): expected = DataFrame( {"A": date_range("2013-01-01 09:00:00", periods=3, tz="US/Pacific")} ) - assert expected.to_sql("test_datetime_tz", self.conn, index=False) == 3 + assert expected.to_sql(name="test_datetime_tz", con=self.conn, index=False) == 3 if self.flavor == "postgresql": # SQLAlchemy "timezones" (i.e. offsets) are coerced to UTC @@ -2215,7 +2215,7 @@ def test_datetime_with_timezone_roundtrip(self): def test_out_of_bounds_datetime(self): # GH 26761 data = DataFrame({"date": datetime(9999, 1, 1)}, index=[0]) - assert data.to_sql("test_datetime_obb", self.conn, index=False) == 1 + assert data.to_sql(name="test_datetime_obb", con=self.conn, index=False) == 1 result = sql.read_sql_table("test_datetime_obb", self.conn) expected = DataFrame([pd.NaT], columns=["date"]) tm.assert_frame_equal(result, expected) @@ -2225,7 +2225,10 @@ def test_naive_datetimeindex_roundtrip(self): # Ensure that a naive DatetimeIndex isn't converted to UTC dates = date_range("2018-01-01", periods=5, freq="6H")._with_freq(None) expected = DataFrame({"nums": range(5)}, index=dates) - assert expected.to_sql("foo_table", self.conn, index_label="info_date") == 5 + assert ( + expected.to_sql(name="foo_table", con=self.conn, index_label="info_date") + == 5 + ) result = sql.read_sql_table("foo_table", self.conn, index_col="info_date") # result index with gain a name from a set_index operation; expected tm.assert_frame_equal(result, expected, check_names=False) @@ -2266,7 +2269,7 @@ def test_datetime(self): df = DataFrame( {"A": date_range("2013-01-01 09:00:00", periods=3), "B": np.arange(3.0)} ) - assert df.to_sql("test_datetime", self.conn) == 3 + assert df.to_sql(name="test_datetime", con=self.conn) == 3 # with read_table -> type information from schema used result = sql.read_sql_table("test_datetime", self.conn) @@ -2288,7 +2291,7 @@ def test_datetime_NaT(self): {"A": date_range("2013-01-01 09:00:00", periods=3), "B": np.arange(3.0)} ) df.loc[1, "A"] = np.nan - assert df.to_sql("test_datetime", self.conn, index=False) == 3 + assert df.to_sql(name="test_datetime", con=self.conn, index=False) == 3 # with read_table -> type information from schema used result = sql.read_sql_table("test_datetime", self.conn) @@ -2306,7 +2309,7 @@ def test_datetime_NaT(self): def test_datetime_date(self): # test support for datetime.date df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"]) - assert df.to_sql("test_date", self.conn, index=False) == 2 + assert df.to_sql(name="test_date", con=self.conn, index=False) == 2 res = read_sql_table("test_date", self.conn) result = res["a"] expected = to_datetime(df["a"]) @@ -2316,7 +2319,7 @@ def test_datetime_date(self): def test_datetime_time(self, sqlite_buildin): # test support for datetime.time df = DataFrame([time(9, 0, 0), time(9, 1, 30)], columns=["a"]) - assert df.to_sql("test_time", self.conn, index=False) == 2 + assert df.to_sql(name="test_time", con=self.conn, index=False) == 2 res = read_sql_table("test_time", self.conn) tm.assert_frame_equal(res, df) @@ -2343,7 +2346,7 @@ def test_mixed_dtype_insert(self): df = DataFrame({"s1": s1, "s2": s2}) # write and read again - assert df.to_sql("test_read_write", self.conn, index=False) == 1 + assert df.to_sql(name="test_read_write", con=self.conn, index=False) == 1 df2 = sql.read_sql_table("test_read_write", self.conn) tm.assert_frame_equal(df, df2, check_dtype=False, check_exact=True) @@ -2351,7 +2354,7 @@ def test_mixed_dtype_insert(self): def test_nan_numeric(self): # NaNs in numeric float column df = DataFrame({"A": [0, 1, 2], "B": [0.2, np.nan, 5.6]}) - assert df.to_sql("test_nan", self.conn, index=False) == 3 + assert df.to_sql(name="test_nan", con=self.conn, index=False) == 3 # with read_table result = sql.read_sql_table("test_nan", self.conn) @@ -2364,7 +2367,7 @@ def test_nan_numeric(self): def test_nan_fullcolumn(self): # full NaN column (numeric float column) df = DataFrame({"A": [0, 1, 2], "B": [np.nan, np.nan, np.nan]}) - assert df.to_sql("test_nan", self.conn, index=False) == 3 + assert df.to_sql(name="test_nan", con=self.conn, index=False) == 3 # with read_table result = sql.read_sql_table("test_nan", self.conn) @@ -2379,7 +2382,7 @@ def test_nan_fullcolumn(self): def test_nan_string(self): # NaNs in string column df = DataFrame({"A": [0, 1, 2], "B": ["a", "b", np.nan]}) - assert df.to_sql("test_nan", self.conn, index=False) == 3 + assert df.to_sql(name="test_nan", con=self.conn, index=False) == 3 # NaNs are coming back as None df.loc[2, "B"] = None @@ -2441,25 +2444,27 @@ def test_dtype(self): cols = ["A", "B"] data = [(0.8, True), (0.9, None)] df = DataFrame(data, columns=cols) - assert df.to_sql("dtype_test", self.conn) == 2 - assert df.to_sql("dtype_test2", self.conn, dtype={"B": TEXT}) == 2 + assert df.to_sql(name="dtype_test", con=self.conn) == 2 + assert df.to_sql(name="dtype_test2", con=self.conn, dtype={"B": TEXT}) == 2 meta = MetaData() meta.reflect(bind=self.conn) sqltype = meta.tables["dtype_test2"].columns["B"].type assert isinstance(sqltype, TEXT) msg = "The type of B is not a SQLAlchemy type" with pytest.raises(ValueError, match=msg): - df.to_sql("error", self.conn, dtype={"B": str}) + df.to_sql(name="error", con=self.conn, dtype={"B": str}) # GH9083 - assert df.to_sql("dtype_test3", self.conn, dtype={"B": String(10)}) == 2 + assert ( + df.to_sql(name="dtype_test3", con=self.conn, dtype={"B": String(10)}) == 2 + ) meta.reflect(bind=self.conn) sqltype = meta.tables["dtype_test3"].columns["B"].type assert isinstance(sqltype, String) assert sqltype.length == 10 # single dtype - assert df.to_sql("single_dtype_test", self.conn, dtype=TEXT) == 2 + assert df.to_sql(name="single_dtype_test", con=self.conn, dtype=TEXT) == 2 meta.reflect(bind=self.conn) sqltypea = meta.tables["single_dtype_test"].columns["A"].type sqltypeb = meta.tables["single_dtype_test"].columns["B"].type @@ -2484,7 +2489,7 @@ def test_notna_dtype(self): df = DataFrame(cols) tbl = "notna_dtype_test" - assert df.to_sql(tbl, self.conn) == 2 + assert df.to_sql(name=tbl, con=self.conn) == 2 _ = sql.read_sql_table(tbl, self.conn) meta = MetaData() meta.reflect(bind=self.conn) @@ -2517,8 +2522,8 @@ def test_double_precision(self): assert ( df.to_sql( - "test_dtypes", - self.conn, + name="test_dtypes", + con=self.conn, index=False, if_exists="replace", dtype={"f64_as_f32": Float(precision=23)}, @@ -2567,7 +2572,9 @@ def main(connectable): test_connectable(connectable) assert ( - DataFrame({"test_foo_data": [0, 1, 2]}).to_sql("test_foo_data", self.conn) + DataFrame({"test_foo_data": [0, 1, 2]}).to_sql( + name="test_foo_data", con=self.conn + ) == 3 ) main(self.conn) @@ -2597,9 +2604,9 @@ def test_to_sql_with_negative_npinf(self, input, request): msg = "inf cannot be used with MySQL" with pytest.raises(ValueError, match=msg): - df.to_sql("foobar", self.conn, index=False) + df.to_sql(name="foobar", con=self.conn, index=False) else: - assert df.to_sql("foobar", self.conn, index=False) == 1 + assert df.to_sql(name="foobar", con=self.conn, index=False) == 1 res = sql.read_sql_table("foobar", self.conn) tm.assert_equal(df, res) @@ -2672,7 +2679,7 @@ def test_read_sql_dtype_backend(self, string_storage, func, dtype_backend): # GH#50048 table = "test" df = self.dtype_backend_data() - df.to_sql(table, self.conn, index=False, if_exists="replace") + df.to_sql(name=table, con=self.conn, index=False, if_exists="replace") with pd.option_context("mode.string_storage", string_storage): result = getattr(pd, func)( @@ -2684,7 +2691,7 @@ def test_read_sql_dtype_backend(self, string_storage, func, dtype_backend): with pd.option_context("mode.string_storage", string_storage): iterator = getattr(pd, func)( f"Select * from {table}", - self.conn, + con=self.conn, dtype_backend=dtype_backend, chunksize=3, ) @@ -2697,7 +2704,7 @@ def test_read_sql_dtype_backend_table(self, string_storage, func, dtype_backend) # GH#50048 table = "test" df = self.dtype_backend_data() - df.to_sql(table, self.conn, index=False, if_exists="replace") + df.to_sql(name=table, con=self.conn, index=False, if_exists="replace") with pd.option_context("mode.string_storage", string_storage): result = getattr(pd, func)(table, self.conn, dtype_backend=dtype_backend) @@ -2719,7 +2726,7 @@ def test_read_sql_dtype_backend_table(self, string_storage, func, dtype_backend) def test_read_sql_invalid_dtype_backend_table(self, func): table = "test" df = self.dtype_backend_data() - df.to_sql(table, self.conn, index=False, if_exists="replace") + df.to_sql(name=table, con=self.conn, index=False, if_exists="replace") msg = ( "dtype_backend numpy is invalid, only 'numpy_nullable' and " @@ -2784,7 +2791,7 @@ def test_chunksize_empty_dtypes(self): dtypes = {"a": "int64", "b": "object"} df = DataFrame(columns=["a", "b"]).astype(dtypes) expected = df.copy() - df.to_sql("test", self.conn, index=False, if_exists="replace") + df.to_sql(name="test", con=self.conn, index=False, if_exists="replace") for result in read_sql_query( "SELECT * FROM test", @@ -2800,7 +2807,7 @@ def test_read_sql_dtype(self, func, dtype_backend): # GH#50797 table = "test" df = DataFrame({"a": [1, 2, 3], "b": 5}) - df.to_sql(table, self.conn, index=False, if_exists="replace") + df.to_sql(name=table, con=self.conn, index=False, if_exists="replace") result = getattr(pd, func)( f"Select * from {table}", @@ -2861,7 +2868,7 @@ def test_default_date_load(self): def test_bigint_warning(self): # test no warning for BIGINT (to support int64) is raised (GH7433) df = DataFrame({"a": [1, 2]}, dtype="int64") - assert df.to_sql("test_bigintwarning", self.conn, index=False) == 2 + assert df.to_sql(name="test_bigintwarning", con=self.conn, index=False) == 2 with tm.assert_produces_warning(None): sql.read_sql_table("test_bigintwarning", self.conn) @@ -2899,7 +2906,9 @@ class Test(BaseModel): with Session() as session: df = DataFrame({"id": [0, 1], "string_column": ["hello", "world"]}) assert ( - df.to_sql("test_frame", con=self.conn, index=False, if_exists="replace") + df.to_sql( + name="test_frame", con=self.conn, index=False, if_exists="replace" + ) == 2 ) session.commit() @@ -2993,15 +3002,19 @@ def test_schema_support(self): self.conn.exec_driver_sql("CREATE SCHEMA other;") # write dataframe to different schema's - assert df.to_sql("test_schema_public", self.conn, index=False) == 2 + assert df.to_sql("test_schema_public", con=self.conn, index=False) == 2 assert ( df.to_sql( - "test_schema_public_explicit", self.conn, index=False, schema="public" + "test_schema_public_explicit", + con=self.conn, + index=False, + schema="public", ) == 2 ) assert ( - df.to_sql("test_schema_other", self.conn, index=False, schema="other") == 2 + df.to_sql("test_schema_other", con=self.conn, index=False, schema="other") + == 2 ) # read dataframes back in @@ -3028,11 +3041,12 @@ def test_schema_support(self): # write dataframe with different if_exists options assert ( - df.to_sql("test_schema_other", self.conn, schema="other", index=False) == 2 + df.to_sql("test_schema_other", con=self.conn, schema="other", index=False) + == 2 ) df.to_sql( "test_schema_other", - self.conn, + con=self.conn, schema="other", index=False, if_exists="replace", @@ -3040,7 +3054,7 @@ def test_schema_support(self): assert ( df.to_sql( "test_schema_other", - self.conn, + con=self.conn, schema="other", index=False, if_exists="append", @@ -3159,7 +3173,7 @@ def test_execute_sql(self): def test_datetime_date(self): # test support for datetime.date df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"]) - assert df.to_sql("test_date", self.conn, index=False) == 2 + assert df.to_sql(name="test_date", con=self.conn, index=False) == 2 res = read_sql_query("SELECT * FROM test_date", self.conn) if self.flavor == "sqlite": # comes back as strings @@ -3178,7 +3192,7 @@ def test_datetime_time(self, tz_aware): df = DataFrame(tz_times, columns=["a"]) - assert df.to_sql("test_time", self.conn, index=False) == 2 + assert df.to_sql(name="test_time", con=self.conn, index=False) == 2 res = read_sql_query("SELECT * FROM test_time", self.conn) if self.flavor == "sqlite": # comes back as strings @@ -3216,8 +3230,8 @@ def test_dtype(self): cols = ["A", "B"] data = [(0.8, True), (0.9, None)] df = DataFrame(data, columns=cols) - assert df.to_sql("dtype_test", self.conn) == 2 - assert df.to_sql("dtype_test2", self.conn, dtype={"B": "STRING"}) == 2 + assert df.to_sql(name="dtype_test", con=self.conn) == 2 + assert df.to_sql(name="dtype_test2", con=self.conn, dtype={"B": "STRING"}) == 2 # sqlite stores Boolean values as INTEGER assert self._get_sqlite_column_type("dtype_test", "B") == "INTEGER" @@ -3225,10 +3239,10 @@ def test_dtype(self): assert self._get_sqlite_column_type("dtype_test2", "B") == "STRING" msg = r"B \(\) not a string" with pytest.raises(ValueError, match=msg): - df.to_sql("error", self.conn, dtype={"B": bool}) + df.to_sql(name="error", con=self.conn, dtype={"B": bool}) # single dtype - assert df.to_sql("single_dtype_test", self.conn, dtype="STRING") == 2 + assert df.to_sql(name="single_dtype_test", con=self.conn, dtype="STRING") == 2 assert self._get_sqlite_column_type("single_dtype_test", "A") == "STRING" assert self._get_sqlite_column_type("single_dtype_test", "B") == "STRING" @@ -3245,7 +3259,7 @@ def test_notna_dtype(self): df = DataFrame(cols) tbl = "notna_dtype_test" - assert df.to_sql(tbl, self.conn) == 2 + assert df.to_sql(name=tbl, con=self.conn) == 2 assert self._get_sqlite_column_type(tbl, "Bool") == "INTEGER" assert self._get_sqlite_column_type(tbl, "Date") == "TIMESTAMP" @@ -3258,7 +3272,7 @@ def test_illegal_names(self): msg = "Empty table or column name specified" with pytest.raises(ValueError, match=msg): - df.to_sql("", self.conn) + df.to_sql(name="", con=self.conn) for ndx, weird_name in enumerate( [ @@ -3274,12 +3288,12 @@ def test_illegal_names(self): "\xe9", ] ): - assert df.to_sql(weird_name, self.conn) == 2 + assert df.to_sql(name=weird_name, con=self.conn) == 2 sql.table_exists(weird_name, self.conn) df2 = DataFrame([[1, 2], [3, 4]], columns=["a", weird_name]) c_tbl = f"test_weird_col_name{ndx:d}" - assert df2.to_sql(c_tbl, self.conn) == 2 + assert df2.to_sql(name=c_tbl, con=self.conn) == 2 sql.table_exists(c_tbl, self.conn) From afb4cc4c5e7670990befe40ce71de6b7ea5325b3 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Mon, 7 Aug 2023 22:58:27 -0400 Subject: [PATCH 06/13] Updating documentation. --- pandas/core/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e7515d9c0b8b9..88d30d83da055 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2915,7 +2915,7 @@ def to_sql( 1 User 2 2 User 3 - >>> df.to_sql('users', con=engine) + >>> df.to_sql(name='users', con=engine) 3 >>> from sqlalchemy import text >>> with engine.connect() as conn: @@ -2926,7 +2926,7 @@ def to_sql( >>> with engine.begin() as connection: ... df1 = pd.DataFrame({'name' : ['User 4', 'User 5']}) - ... df1.to_sql('users', con=connection, if_exists='append') + ... df1.to_sql(name='users', con=connection, if_exists='append') 2 This is allowed to support operations that require that the same From a8c0baffd06baead724639a3ab02e04447338385 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Tue, 8 Aug 2023 11:03:37 -0400 Subject: [PATCH 07/13] Updating documentation and fixing failing unit tests. --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/core/generic.py | 10 +++++----- pandas/tests/io/test_sql.py | 12 +++++++----- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index d8ee4abfe8bf3..72664f40ee882 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -220,7 +220,6 @@ Other enhancements - :meth:`DataFrame.to_parquet` and :func:`read_parquet` will now write and read ``attrs`` respectively (:issue:`54346`) - Added support for the DataFrame Consortium Standard (:issue:`54383`) - Performance improvement in :meth:`GroupBy.quantile` (:issue:`51722`) -- Updated ``con`` parameter for :meth:`DataFrame.to_sql` to be a keyword argument. (:issue:`54229`) - .. --------------------------------------------------------------------------- @@ -562,6 +561,7 @@ Other Deprecations - Deprecated the use of non-supported datetime64 and timedelta64 resolutions with :func:`pandas.array`. Supported resolutions are: "s", "ms", "us", "ns" resolutions (:issue:`53058`) - Deprecated values "pad", "ffill", "bfill", "backfill" for :meth:`Series.interpolate` and :meth:`DataFrame.interpolate`, use ``obj.ffill()`` or ``obj.bfill()`` instead (:issue:`53581`) - Deprecated the behavior of :meth:`Index.argmax`, :meth:`Index.argmin`, :meth:`Series.argmax`, :meth:`Series.argmin` with either all-NAs and skipna=True or any-NAs and skipna=False returning -1; in a future version this will raise ``ValueError`` (:issue:`33941`, :issue:`33942`) +- Updated ``con`` parameter for :meth:`DataFrame.to_sql` to be a keyword argument. (:issue:`54229`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 88d30d83da055..d477c894502c1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2933,7 +2933,7 @@ def to_sql( DBAPI connection is used for the entire operation. >>> df2 = pd.DataFrame({'name' : ['User 6', 'User 7']}) - >>> df2.to_sql('users', con=engine, if_exists='append') + >>> df2.to_sql(name='users', con=engine, if_exists='append') 2 >>> with engine.connect() as conn: ... conn.execute(text("SELECT * FROM users")).fetchall() @@ -2943,7 +2943,7 @@ def to_sql( Overwrite the table with just ``df2``. - >>> df2.to_sql('users', con=engine, if_exists='replace', + >>> df2.to_sql(name='users', con=engine, if_exists='replace', ... index_label='id') 2 >>> with engine.connect() as conn: @@ -2960,7 +2960,7 @@ def to_sql( ... stmt = insert(table.table).values(data).on_conflict_do_nothing(index_elements=["a"]) ... result = conn.execute(stmt) ... return result.rowcount - >>> df_conflict.to_sql("conflict_table", con=conn, if_exists="append", method=insert_on_conflict_nothing) # doctest: +SKIP + >>> df_conflict.to_sql(name="conflict_table", con=conn, if_exists="append", method=insert_on_conflict_nothing) # doctest: +SKIP 0 For MySQL, a callable to update columns ``b`` and ``c`` if there's a conflict @@ -2977,7 +2977,7 @@ def to_sql( ... stmt = stmt.on_duplicate_key_update(b=stmt.inserted.b, c=stmt.inserted.c) ... result = conn.execute(stmt) ... return result.rowcount - >>> df_conflict.to_sql("conflict_table", con=conn, if_exists="append", method=insert_on_conflict_update) # doctest: +SKIP + >>> df_conflict.to_sql(name="conflict_table", con=conn, if_exists="append", method=insert_on_conflict_update) # doctest: +SKIP 2 Specify the dtype (especially useful for integers with missing values). @@ -2993,7 +2993,7 @@ def to_sql( 2 2.0 >>> from sqlalchemy.types import Integer - >>> df.to_sql('integers', con=engine, index=False, + >>> df.to_sql(name='integers', con=engine, index=False, ... dtype={"A": Integer()}) 3 diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 0df8621351715..2b12339f8fdb2 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -811,7 +811,7 @@ def psql_insert_copy(table, conn, keys, data_iter): conn = request.getfixturevalue(conn) expected = DataFrame({"col1": [1, 2], "col2": [0.1, 0.2], "col3": ["a", "n"]}) result_count = expected.to_sql( - "test_frame", con=conn, index=False, method=psql_insert_copy + name="test_frame", con=conn, index=False, method=psql_insert_copy ) # GH 46891 if expected_count is None: @@ -860,11 +860,13 @@ def insert_on_conflict(table, conn, keys, data_iter): conn.execute(create_sql) expected = DataFrame([[1, 2.1, "a"]], columns=list("abc")) - expected.to_sql("test_insert_conflict", con=conn, if_exists="append", index=False) + expected.to_sql( + name="test_insert_conflict", con=conn, if_exists="append", index=False + ) df_insert = DataFrame([[1, 3.2, "b"]], columns=list("abc")) inserted = df_insert.to_sql( - "test_insert_conflict", + name="test_insert_conflict", con=conn, index=False, if_exists="append", @@ -914,11 +916,11 @@ def insert_on_conflict(table, conn, keys, data_iter): conn.execute(create_sql) df = DataFrame([[1, 2.1, "a"]], columns=list("abc")) - df.to_sql("test_insert_conflict", con=conn, if_exists="append", index=False) + df.to_sql(name="test_insert_conflict", con=conn, if_exists="append", index=False) expected = DataFrame([[1, 3.2, "b"]], columns=list("abc")) inserted = expected.to_sql( - "test_insert_conflict", + name="test_insert_conflict", con=conn, index=False, if_exists="append", From b7bd1ed046a12ea76c6274cc24854a89074dd062 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Tue, 8 Aug 2023 14:42:56 -0400 Subject: [PATCH 08/13] Updating documentation and unit tests. --- pandas/io/sql.py | 2 +- pandas/tests/io/test_sql.py | 18 +++++++++++------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 2cf9d144eb91c..7669d5aa4cea5 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -621,7 +621,7 @@ def read_sql( >>> conn = connect(':memory:') >>> df = pd.DataFrame(data=[[0, '10/11/12'], [1, '12/11/10']], ... columns=['int_column', 'date_column']) - >>> df.to_sql('test_data', conn) + >>> df.to_sql(name='test_data', con=conn) 2 >>> pd.read_sql('SELECT int_column, date_column FROM test_data', conn) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 2b12339f8fdb2..ac6306da3aa46 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -756,7 +756,7 @@ def test_read_procedure(conn, request): from sqlalchemy.engine import Engine df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) - df.to_sql("test_frame", con=conn, index=False) + df.to_sql(name="test_frame", con=conn, index=False) proc = """DROP PROCEDURE IF EXISTS get_testdb; @@ -3004,10 +3004,10 @@ def test_schema_support(self): self.conn.exec_driver_sql("CREATE SCHEMA other;") # write dataframe to different schema's - assert df.to_sql("test_schema_public", con=self.conn, index=False) == 2 + assert df.to_sql(name="test_schema_public", con=self.conn, index=False) == 2 assert ( df.to_sql( - "test_schema_public_explicit", + name="test_schema_public_explicit", con=self.conn, index=False, schema="public", @@ -3015,7 +3015,9 @@ def test_schema_support(self): == 2 ) assert ( - df.to_sql("test_schema_other", con=self.conn, index=False, schema="other") + df.to_sql( + name="test_schema_other", con=self.conn, index=False, schema="other" + ) == 2 ) @@ -3043,11 +3045,13 @@ def test_schema_support(self): # write dataframe with different if_exists options assert ( - df.to_sql("test_schema_other", con=self.conn, schema="other", index=False) + df.to_sql( + name="test_schema_other", con=self.conn, schema="other", index=False + ) == 2 ) df.to_sql( - "test_schema_other", + name="test_schema_other", con=self.conn, schema="other", index=False, @@ -3055,7 +3059,7 @@ def test_schema_support(self): ) assert ( df.to_sql( - "test_schema_other", + name="test_schema_other", con=self.conn, schema="other", index=False, From 2907ed0bd2d2da92f4b4f3ff7638abf1b0cde98f Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Wed, 9 Aug 2023 14:22:46 -0400 Subject: [PATCH 09/13] Updating implementation based on reviewer feedback. --- doc/source/user_guide/io.rst | 6 +++--- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/tests/io/test_sql.py | 13 ++++++++++++- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 02d0c85327e2f..6e352c52cd60e 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -5651,7 +5651,7 @@ the database using :func:`~pandas.DataFrame.to_sql`. data = pd.DataFrame(d, columns=c) data - data.to_sql(name="data", con=engine) + data.to_sql("data", con=engine) With some databases, writing large DataFrames can result in errors due to packet size limitations being exceeded. This can be avoided by setting the @@ -5660,7 +5660,7 @@ writes ``data`` to the database in batches of 1000 rows at a time: .. ipython:: python - data.to_sql(name="data_chunked", con=engine, chunksize=1000) + data.to_sql("data_chunked", con=engine, chunksize=1000) SQL data types ++++++++++++++ @@ -5680,7 +5680,7 @@ default ``Text`` type for string columns: from sqlalchemy.types import String - data.to_sql(name="data_dtype", con=engine, dtype={"Col_1": String}) + data.to_sql("data_dtype", con=engine, dtype={"Col_1": String}) .. note:: diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 72664f40ee882..f7ab84de9fd14 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -561,7 +561,7 @@ Other Deprecations - Deprecated the use of non-supported datetime64 and timedelta64 resolutions with :func:`pandas.array`. Supported resolutions are: "s", "ms", "us", "ns" resolutions (:issue:`53058`) - Deprecated values "pad", "ffill", "bfill", "backfill" for :meth:`Series.interpolate` and :meth:`DataFrame.interpolate`, use ``obj.ffill()`` or ``obj.bfill()`` instead (:issue:`53581`) - Deprecated the behavior of :meth:`Index.argmax`, :meth:`Index.argmin`, :meth:`Series.argmax`, :meth:`Series.argmin` with either all-NAs and skipna=True or any-NAs and skipna=False returning -1; in a future version this will raise ``ValueError`` (:issue:`33941`, :issue:`33942`) -- Updated ``con`` parameter for :meth:`DataFrame.to_sql` to be a keyword argument. (:issue:`54229`) +- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_sql` except ``name``. (:issue:`54229`) - .. --------------------------------------------------------------------------- diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index ac6306da3aa46..9f7415b5ac771 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -1450,7 +1450,7 @@ def test_complex_raises(self): df = DataFrame({"a": [1 + 1j, 2j]}) msg = "Complex datatypes not supported" with pytest.raises(ValueError, match=msg): - assert df.to_sql(name="test_complex", con=self.conn) is None + assert df.to_sql("test_complex", con=self.conn) is None @pytest.mark.parametrize( "index_name,index_label,expected", @@ -2846,6 +2846,17 @@ def setup_driver(cls): # sqlite3 is built-in cls.driver = None + def test_keyword_deprecation(self): + # GH 54397 + msg = ( + "tarting with pandas version 3.0 all arguments of to_sql except for the " + "argument 'name' will be keyword-only." + ) + df = DataFrame([{"A": 1, "B": 2, "C": 3}, {"A": 1, "B": 2, "C": 3}]) + + with tm.assert_produces_warning(FutureWarning, match=msg): + df.to_sql("example", self.conn) + def test_default_type_conversion(self): df = sql.read_sql_table("types", self.conn) From 3ab46ac989a2b13fe4db8e347cf40d9f6f8c5782 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Wed, 9 Aug 2023 14:49:37 -0400 Subject: [PATCH 10/13] Updating implementation to allow 'self' to be a positional arg. --- pandas/core/generic.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d477c894502c1..106dfffa40e3b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2795,7 +2795,9 @@ def to_hdf( ) @final - @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["name"], name="to_sql") + @deprecate_nonkeyword_arguments( + version="3.0", allowed_args=["self", "name"], name="to_sql" + ) def to_sql( self, name: str, From 302887273490ec857376b4aeb39fc20fc367153f Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Wed, 9 Aug 2023 16:28:05 -0400 Subject: [PATCH 11/13] Deprecating con positional arg in new test case. --- pandas/tests/io/test_sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 2a56ddecf119e..2e01b81f52353 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2949,7 +2949,7 @@ def test_read_sql_string_inference(self): pa = pytest.importorskip("pyarrow") table = "test" df = DataFrame({"a": ["x", "y"]}) - df.to_sql(table, self.conn, index=False, if_exists="replace") + df.to_sql(table, conn=self.conn, index=False, if_exists="replace") with pd.option_context("future.infer_string", True): result = read_sql_table(table, self.conn) From 56ec5d2beab45b35d0a05c13e96023b239b5c994 Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Wed, 9 Aug 2023 17:04:56 -0400 Subject: [PATCH 12/13] Fixing typo --- pandas/tests/io/test_sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 2e01b81f52353..d82139d705ace 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2949,7 +2949,7 @@ def test_read_sql_string_inference(self): pa = pytest.importorskip("pyarrow") table = "test" df = DataFrame({"a": ["x", "y"]}) - df.to_sql(table, conn=self.conn, index=False, if_exists="replace") + df.to_sql(table, conn=self.con, index=False, if_exists="replace") with pd.option_context("future.infer_string", True): result = read_sql_table(table, self.conn) From 71ac556e53bc98908d9036420e36ad049c1e51aa Mon Sep 17 00:00:00 2001 From: Richard Howe Date: Wed, 9 Aug 2023 17:36:38 -0400 Subject: [PATCH 13/13] Fixing typo --- pandas/tests/io/test_sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index d82139d705ace..0b98bcc4d4bec 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2949,7 +2949,7 @@ def test_read_sql_string_inference(self): pa = pytest.importorskip("pyarrow") table = "test" df = DataFrame({"a": ["x", "y"]}) - df.to_sql(table, conn=self.con, index=False, if_exists="replace") + df.to_sql(table, con=self.conn, index=False, if_exists="replace") with pd.option_context("future.infer_string", True): result = read_sql_table(table, self.conn)