diff --git a/asv_bench/benchmarks/io/hdf.py b/asv_bench/benchmarks/io/hdf.py index acf0ec4b9d359..2eb4c8c7f674b 100644 --- a/asv_bench/benchmarks/io/hdf.py +++ b/asv_bench/benchmarks/io/hdf.py @@ -123,12 +123,12 @@ def setup(self, format): index=date_range("20000101", periods=N, freq="h"), ) self.df["object"] = Index([f"i-{i}" for i in range(N)], dtype=object) - self.df.to_hdf(self.fname, "df", format=format) + self.df.to_hdf(self.fname, key="df", format=format) # Numeric df self.df1 = self.df.copy() self.df1 = self.df1.reset_index() - self.df1.to_hdf(self.fname, "df1", format=format) + self.df1.to_hdf(self.fname, key="df1", format=format) def time_read_hdf(self, format): read_hdf(self.fname, "df") @@ -137,7 +137,7 @@ def peakmem_read_hdf(self, format): read_hdf(self.fname, "df") def time_write_hdf(self, format): - self.df.to_hdf(self.fname, "df", format=format) + self.df.to_hdf(self.fname, key="df", format=format) from ..pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/doc/redirects.csv b/doc/redirects.csv index 27b41da63c513..46d69f8909e8a 100644 --- a/doc/redirects.csv +++ b/doc/redirects.csv @@ -495,7 +495,6 @@ generated/pandas.DataFrame.to_csv,../reference/api/pandas.DataFrame.to_csv generated/pandas.DataFrame.to_dict,../reference/api/pandas.DataFrame.to_dict generated/pandas.DataFrame.to_excel,../reference/api/pandas.DataFrame.to_excel generated/pandas.DataFrame.to_feather,../reference/api/pandas.DataFrame.to_feather -generated/pandas.DataFrame.to_gbq,../reference/api/pandas.DataFrame.to_gbq generated/pandas.DataFrame.to_hdf,../reference/api/pandas.DataFrame.to_hdf generated/pandas.DataFrame.to,../reference/api/pandas.DataFrame.to generated/pandas.DataFrame.to_json,../reference/api/pandas.DataFrame.to_json @@ -890,7 +889,6 @@ generated/pandas.read_csv,../reference/api/pandas.read_csv generated/pandas.read_excel,../reference/api/pandas.read_excel generated/pandas.read_feather,../reference/api/pandas.read_feather generated/pandas.read_fwf,../reference/api/pandas.read_fwf -generated/pandas.read_gbq,../reference/api/pandas.read_gbq generated/pandas.read_hdf,../reference/api/pandas.read_hdf generated/pandas.read,../reference/api/pandas.read generated/pandas.read_json,../reference/api/pandas.read_json diff --git a/doc/source/conf.py b/doc/source/conf.py index cb7352341eedd..c4b1a584836f5 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -323,7 +323,6 @@ ("pandas.io.clipboard.read_clipboard", "pandas.read_clipboard"), ("pandas.io.excel.ExcelFile.parse", "pandas.ExcelFile.parse"), ("pandas.io.excel.read_excel", "pandas.read_excel"), - ("pandas.io.gbq.read_gbq", "pandas.read_gbq"), ("pandas.io.html.read_html", "pandas.read_html"), ("pandas.io.json.read_json", "pandas.read_json"), ("pandas.io.parsers.read_csv", "pandas.read_csv"), @@ -812,8 +811,6 @@ def setup(app) -> None: for link in [ "http://scatterci.github.io/pydata/pandas", "http://specs.frictionlessdata.io/json-table-schema/", - "https://cloud.google.com/bigquery/docs/access-control#roles", - "https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query", "https://crates.io/crates/calamine", "https://devguide.python.org/setup/#macos", "https://en.wikipedia.org/wiki/Imputation_statistics", @@ -829,7 +826,6 @@ def setup(app) -> None: "https://nipunbatra.github.io/blog/visualisation/2013/05/01/aggregation-timeseries.html", "https://nbviewer.ipython.org/gist/metakermit/5720498", "https://numpy.org/doc/stable/user/basics.byteswapping.html", - "https://pandas-gbq.readthedocs.io/en/latest/changelog.html#changelog-0-8-0", "https://pandas.pydata.org/pandas-docs/stable/io.html#io-chunking", "https://pandas.pydata.org/pandas-docs/stable/ecosystem.html", "https://sqlalchemy.readthedocs.io/en/latest/dialects/index.html", diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index b2d137f12555e..7d0ddcc5d22d9 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -386,7 +386,6 @@ Dependency Minimum Version pip extra Notes fsspec 2022.11.0 fss, gcp, aws Handling files aside from simple local and HTTP (required dependency of s3fs, gcsfs). gcsfs 2022.11.0 gcp Google Cloud Storage access -pandas-gbq 0.19.0 gcp Google Big Query access s3fs 2022.11.0 aws Amazon S3 access ========================= ================== =============== ============================================================= diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst index 1ade30faa123b..8da87968cecae 100644 --- a/doc/source/reference/frame.rst +++ b/doc/source/reference/frame.rst @@ -381,7 +381,6 @@ Serialization / IO / conversion DataFrame.to_feather DataFrame.to_latex DataFrame.to_stata - DataFrame.to_gbq DataFrame.to_records DataFrame.to_string DataFrame.to_clipboard diff --git a/doc/source/reference/io.rst b/doc/source/reference/io.rst index fbd0f6bd200b9..805fb8b783459 100644 --- a/doc/source/reference/io.rst +++ b/doc/source/reference/io.rst @@ -188,13 +188,6 @@ SQL read_sql DataFrame.to_sql -Google BigQuery -~~~~~~~~~~~~~~~ -.. autosummary:: - :toctree: api/ - - read_gbq - STATA ~~~~~ .. autosummary:: diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 8adbb53227586..80572de91e0c7 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -36,7 +36,6 @@ The pandas I/O API is a set of top level ``reader`` functions accessed like binary;`SPSS `__;:ref:`read_spss`; binary;`Python Pickle Format `__;:ref:`read_pickle`;:ref:`to_pickle` SQL;`SQL `__;:ref:`read_sql`;:ref:`to_sql` - SQL;`Google BigQuery `__;:ref:`read_gbq`;:ref:`to_gbq` :ref:`Here ` is an informal performance comparison for some of these IO methods. @@ -6096,10 +6095,6 @@ Google BigQuery The ``pandas-gbq`` package provides functionality to read/write from Google BigQuery. -pandas integrates with this external package. if ``pandas-gbq`` is installed, you can -use the pandas methods ``pd.read_gbq`` and ``DataFrame.to_gbq``, which will call the -respective functions from ``pandas-gbq``. - Full documentation can be found `here `__. .. _io.stata: diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index f068a6b275304..9a09ee5f46e98 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -102,12 +102,14 @@ Deprecations Removal of prior version deprecations/changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +- All arguments except the first ``path``-like argument in IO writers are now keyword only (:issue:`54229`) - Removed :meth:`DataFrame.first` and :meth:`DataFrame.last` (:issue:`53710`) - Removed :meth:`DataFrameGroupby.fillna` and :meth:`SeriesGroupBy.fillna` (:issue:`55719`) - Removed ``DataFrameGroupBy.grouper`` and ``SeriesGroupBy.grouper`` (:issue:`56521`) - Removed ``axis`` argument from :meth:`DataFrame.groupby`, :meth:`Series.groupby`, :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.resample`, and :meth:`Series.resample` (:issue:`51203`) - Removed ``axis`` argument from all groupby operations (:issue:`50405`) - Removed ``pandas.io.sql.execute`` (:issue:`50185`) +- Removed ``read_gbq`` and ``DataFrame.to_gbq``. Use ``pandas_gbq.read_gbq`` and ``pandas_gbq.to_gbq`` instead https://pandas-gbq.readthedocs.io/en/latest/api.html (:issue:`55525`) - Removed deprecated argument ``obj`` in :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` (:issue:`53545`) - Removed the ``ArrayManager`` (:issue:`55043`) - Removed unused arguments ``*args`` and ``**kwargs`` in :class:`Resampler` methods (:issue:`50977`) diff --git a/pandas/__init__.py b/pandas/__init__.py index 535522253c415..95601f226a83f 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -162,7 +162,6 @@ read_parquet, read_orc, read_feather, - read_gbq, read_html, read_xml, read_json, @@ -352,7 +351,6 @@ "read_excel", "read_feather", "read_fwf", - "read_gbq", "read_hdf", "read_html", "read_json", diff --git a/pandas/_typing.py b/pandas/_typing.py index 8646b7425894d..8cc53335f6ce9 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -510,9 +510,6 @@ def closed(self) -> bool: # from_dict FromDictOrient = Literal["columns", "index", "tight"] -# to_gbc -ToGbqIfexist = Literal["fail", "replace", "append"] - # to_stata ToStataByteorder = Literal[">", "<", "little", "big"] diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index 0f63d3d3002c3..bcfe1385f0528 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -37,7 +37,6 @@ "numexpr": "2.8.4", "odfpy": "1.4.1", "openpyxl": "3.1.0", - "pandas_gbq": "0.19.0", "psycopg2": "2.9.6", # (dt dec pq3 ext lo64) "pymysql": "1.0.2", "pyarrow": "10.0.1", @@ -68,7 +67,6 @@ "jinja2": "Jinja2", "lxml.etree": "lxml", "odf": "odfpy", - "pandas_gbq": "pandas-gbq", "python_calamine": "python-calamine", "sqlalchemy": "SQLAlchemy", "tables": "pytables", diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 239000bf50e01..910d7b2ab2178 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -245,7 +245,6 @@ SortKind, StorageOptions, Suffixes, - ToGbqIfexist, ToStataByteorder, ToTimestampHow, UpdateJoin, @@ -1215,6 +1214,7 @@ def _repr_html_(self) -> str | None: def to_string( self, buf: None = ..., + *, columns: Axes | None = ..., col_space: int | list[int] | dict[Hashable, int] | None = ..., header: bool | SequenceNotStr[str] = ..., @@ -1240,6 +1240,7 @@ def to_string( def to_string( self, buf: FilePath | WriteBuffer[str], + *, columns: Axes | None = ..., col_space: int | list[int] | dict[Hashable, int] | None = ..., header: bool | SequenceNotStr[str] = ..., @@ -1261,9 +1262,6 @@ def to_string( ) -> None: ... - @deprecate_nonkeyword_arguments( - version="3.0", allowed_args=["self", "buf"], name="to_string" - ) @Substitution( header_type="bool or list of str", header="Write out the column names. If a list of columns " @@ -1278,6 +1276,7 @@ def to_string( def to_string( self, buf: FilePath | WriteBuffer[str] | None = None, + *, columns: Axes | None = None, col_space: int | list[int] | dict[Hashable, int] | None = None, header: bool | SequenceNotStr[str] = True, @@ -1985,14 +1984,12 @@ def to_dict( # error: Incompatible default for argument "into" (default has type "type # [dict[Any, Any]]", argument has type "type[MutableMappingT] | MutableMappingT") - @deprecate_nonkeyword_arguments( - version="3.0", allowed_args=["self", "orient"], name="to_dict" - ) def to_dict( self, orient: Literal[ "dict", "list", "series", "split", "tight", "records", "index" ] = "dict", + *, into: type[MutableMappingT] | MutableMappingT = dict, # type: ignore[assignment] index: bool = True, ) -> MutableMappingT | list[MutableMappingT]: @@ -2103,144 +2100,6 @@ def to_dict( return to_dict(self, orient, into=into, index=index) - @deprecate_nonkeyword_arguments( - version="3.0", allowed_args=["self", "destination_table"], name="to_gbq" - ) - def to_gbq( - self, - destination_table: str, - project_id: str | None = None, - chunksize: int | None = None, - reauth: bool = False, - if_exists: ToGbqIfexist = "fail", - auth_local_webserver: bool = True, - table_schema: list[dict[str, str]] | None = None, - location: str | None = None, - progress_bar: bool = True, - credentials=None, - ) -> None: - """ - Write a DataFrame to a Google BigQuery table. - - .. deprecated:: 2.2.0 - - Please use ``pandas_gbq.to_gbq`` instead. - - This function requires the `pandas-gbq package - `__. - - See the `How to authenticate with Google BigQuery - `__ - guide for authentication instructions. - - Parameters - ---------- - destination_table : str - Name of table to be written, in the form ``dataset.tablename``. - project_id : str, optional - Google BigQuery Account project ID. Optional when available from - the environment. - chunksize : int, optional - Number of rows to be inserted in each chunk from the dataframe. - Set to ``None`` to load the whole dataframe at once. - reauth : bool, default False - Force Google BigQuery to re-authenticate the user. This is useful - if multiple accounts are used. - if_exists : str, default 'fail' - Behavior when the destination table exists. Value can be one of: - - ``'fail'`` - If table exists raise pandas_gbq.gbq.TableCreationError. - ``'replace'`` - If table exists, drop it, recreate it, and insert data. - ``'append'`` - If table exists, insert data. Create if does not exist. - auth_local_webserver : bool, default True - Use the `local webserver flow`_ instead of the `console flow`_ - when getting user credentials. - - .. _local webserver flow: - https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server - .. _console flow: - https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console - - *New in version 0.2.0 of pandas-gbq*. - - .. versionchanged:: 1.5.0 - Default value is changed to ``True``. Google has deprecated the - ``auth_local_webserver = False`` `"out of band" (copy-paste) - flow - `_. - table_schema : list of dicts, optional - List of BigQuery table fields to which according DataFrame - columns conform to, e.g. ``[{'name': 'col1', 'type': - 'STRING'},...]``. If schema is not provided, it will be - generated according to dtypes of DataFrame columns. See - BigQuery API documentation on available names of a field. - - *New in version 0.3.1 of pandas-gbq*. - location : str, optional - Location where the load job should run. See the `BigQuery locations - documentation - `__ for a - list of available locations. The location must match that of the - target dataset. - - *New in version 0.5.0 of pandas-gbq*. - progress_bar : bool, default True - Use the library `tqdm` to show the progress bar for the upload, - chunk by chunk. - - *New in version 0.5.0 of pandas-gbq*. - credentials : google.auth.credentials.Credentials, optional - Credentials for accessing Google APIs. Use this parameter to - override default credentials, such as to use Compute Engine - :class:`google.auth.compute_engine.Credentials` or Service - Account :class:`google.oauth2.service_account.Credentials` - directly. - - *New in version 0.8.0 of pandas-gbq*. - - See Also - -------- - pandas_gbq.to_gbq : This function in the pandas-gbq library. - read_gbq : Read a DataFrame from Google BigQuery. - - Examples - -------- - Example taken from `Google BigQuery documentation - `_ - - >>> project_id = "my-project" - >>> table_id = 'my_dataset.my_table' - >>> df = pd.DataFrame({ - ... "my_string": ["a", "b", "c"], - ... "my_int64": [1, 2, 3], - ... "my_float64": [4.0, 5.0, 6.0], - ... "my_bool1": [True, False, True], - ... "my_bool2": [False, True, False], - ... "my_dates": pd.date_range("now", periods=3), - ... } - ... ) - - >>> df.to_gbq(table_id, project_id=project_id) # doctest: +SKIP - """ - from pandas.io import gbq - - gbq.to_gbq( - self, - destination_table, - project_id=project_id, - chunksize=chunksize, - reauth=reauth, - if_exists=if_exists, - auth_local_webserver=auth_local_webserver, - table_schema=table_schema, - location=location, - progress_bar=progress_bar, - credentials=credentials, - ) - @classmethod def from_records( cls, @@ -2859,9 +2718,6 @@ def to_feather(self, path: FilePath | WriteBuffer[bytes], **kwargs) -> None: to_feather(self, path, **kwargs) - @deprecate_nonkeyword_arguments( - version="3.0", allowed_args=["self", "buf"], name="to_markdown" - ) @doc( Series.to_markdown, klass=_shared_doc_kwargs["klass"], @@ -2891,6 +2747,7 @@ def to_feather(self, path: FilePath | WriteBuffer[bytes], **kwargs) -> None: def to_markdown( self, buf: FilePath | WriteBuffer[str] | None = None, + *, mode: str = "wt", index: bool = True, storage_options: StorageOptions | None = None, @@ -2915,6 +2772,7 @@ def to_markdown( def to_parquet( self, path: None = ..., + *, engine: Literal["auto", "pyarrow", "fastparquet"] = ..., compression: str | None = ..., index: bool | None = ..., @@ -2928,6 +2786,7 @@ def to_parquet( def to_parquet( self, path: FilePath | WriteBuffer[bytes], + *, engine: Literal["auto", "pyarrow", "fastparquet"] = ..., compression: str | None = ..., index: bool | None = ..., @@ -2937,13 +2796,11 @@ def to_parquet( ) -> None: ... - @deprecate_nonkeyword_arguments( - version="3.0", allowed_args=["self", "path"], name="to_parquet" - ) @doc(storage_options=_shared_docs["storage_options"]) def to_parquet( self, path: FilePath | WriteBuffer[bytes] | None = None, + *, engine: Literal["auto", "pyarrow", "fastparquet"] = "auto", compression: str | None = "snappy", index: bool | None = None, @@ -3135,6 +2992,7 @@ def to_orc( def to_html( self, buf: FilePath | WriteBuffer[str], + *, columns: Axes | None = ..., col_space: ColspaceArgType | None = ..., header: bool = ..., @@ -3164,6 +3022,7 @@ def to_html( def to_html( self, buf: None = ..., + *, columns: Axes | None = ..., col_space: ColspaceArgType | None = ..., header: bool = ..., @@ -3189,9 +3048,6 @@ def to_html( ) -> str: ... - @deprecate_nonkeyword_arguments( - version="3.0", allowed_args=["self", "buf"], name="to_html" - ) @Substitution( header_type="bool", header="Whether to print column labels, default True", @@ -3203,6 +3059,7 @@ def to_html( def to_html( self, buf: FilePath | WriteBuffer[str] | None = None, + *, columns: Axes | None = None, col_space: ColspaceArgType | None = None, header: bool = True, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bbe499aad695f..93c2afab51d2c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -100,10 +100,7 @@ _chained_assignment_method_msg, _chained_assignment_warning_method_msg, ) -from pandas.util._decorators import ( - deprecate_nonkeyword_arguments, - doc, -) +from pandas.util._decorators import doc from pandas.util._exceptions import find_stack_level from pandas.util._validators import ( check_dtype_backend, @@ -2204,9 +2201,6 @@ def _repr_data_resource_(self): # I/O Methods @final - @deprecate_nonkeyword_arguments( - version="3.0", allowed_args=["self", "excel_writer"], name="to_excel" - ) @doc( klass="object", storage_options=_shared_docs["storage_options"], @@ -2221,6 +2215,7 @@ def _repr_data_resource_(self): def to_excel( self, excel_writer: FilePath | WriteExcelBuffer | ExcelWriter, + *, sheet_name: str = "Sheet1", na_rep: str = "", float_format: str | None = None, @@ -2378,9 +2373,6 @@ def to_excel( ) @final - @deprecate_nonkeyword_arguments( - version="3.0", allowed_args=["self", "path_or_buf"], name="to_json" - ) @doc( storage_options=_shared_docs["storage_options"], compression_options=_shared_docs["compression_options"] % "path_or_buf", @@ -2388,6 +2380,7 @@ def to_excel( def to_json( self, path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, + *, orient: Literal["split", "records", "index", "table", "columns", "values"] | None = None, date_format: str | None = None, @@ -2669,12 +2662,10 @@ def to_json( ) @final - @deprecate_nonkeyword_arguments( - version="3.0", allowed_args=["self", "path_or_buf"], name="to_hdf" - ) def to_hdf( self, path_or_buf: FilePath | HDFStore, + *, key: str, mode: Literal["a", "w", "r+"] = "a", complevel: int | None = None, @@ -2823,13 +2814,11 @@ def to_hdf( ) @final - @deprecate_nonkeyword_arguments( - version="3.0", allowed_args=["self", "name", "con"], name="to_sql" - ) def to_sql( self, name: str, con, + *, schema: str | None = None, if_exists: Literal["fail", "replace", "append"] = "fail", index: bool_t = True, @@ -3049,9 +3038,6 @@ def to_sql( ) @final - @deprecate_nonkeyword_arguments( - version="3.0", allowed_args=["self", "path"], name="to_pickle" - ) @doc( storage_options=_shared_docs["storage_options"], compression_options=_shared_docs["compression_options"] % "path", @@ -3059,6 +3045,7 @@ def to_sql( def to_pickle( self, path: FilePath | WriteBuffer[bytes], + *, compression: CompressionOptions = "infer", protocol: int = pickle.HIGHEST_PROTOCOL, storage_options: StorageOptions | None = None, @@ -3122,11 +3109,8 @@ def to_pickle( ) @final - @deprecate_nonkeyword_arguments( - version="3.0", allowed_args=["self"], name="to_clipboard" - ) def to_clipboard( - self, excel: bool_t = True, sep: str | None = None, **kwargs + self, *, excel: bool_t = True, sep: str | None = None, **kwargs ) -> None: r""" Copy object to the system clipboard. @@ -3285,6 +3269,7 @@ class (index) object 'bird' 'bird' 'mammal' 'mammal' def to_latex( self, buf: None = ..., + *, columns: Sequence[Hashable] | None = ..., header: bool_t | SequenceNotStr[str] = ..., index: bool_t = ..., @@ -3312,6 +3297,7 @@ def to_latex( def to_latex( self, buf: FilePath | WriteBuffer[str], + *, columns: Sequence[Hashable] | None = ..., header: bool_t | SequenceNotStr[str] = ..., index: bool_t = ..., @@ -3336,12 +3322,10 @@ def to_latex( ... @final - @deprecate_nonkeyword_arguments( - version="3.0", allowed_args=["self", "buf"], name="to_latex" - ) def to_latex( self, buf: FilePath | WriteBuffer[str] | None = None, + *, columns: Sequence[Hashable] | None = None, header: bool_t | SequenceNotStr[str] = True, index: bool_t = True, @@ -3695,6 +3679,7 @@ def _to_latex_via_styler( def to_csv( self, path_or_buf: None = ..., + *, sep: str = ..., na_rep: str = ..., float_format: str | Callable | None = ..., @@ -3722,6 +3707,7 @@ def to_csv( def to_csv( self, path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str], + *, sep: str = ..., na_rep: str = ..., float_format: str | Callable | None = ..., @@ -3746,9 +3732,6 @@ def to_csv( ... @final - @deprecate_nonkeyword_arguments( - version="3.0", allowed_args=["self", "path_or_buf"], name="to_csv" - ) @doc( storage_options=_shared_docs["storage_options"], compression_options=_shared_docs["compression_options"] % "path_or_buf", @@ -3756,6 +3739,7 @@ def to_csv( def to_csv( self, path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, + *, sep: str = ",", na_rep: str = "", float_format: str | Callable | None = None, diff --git a/pandas/io/api.py b/pandas/io/api.py index 4e8b34a61dfc6..d4982399a604b 100644 --- a/pandas/io/api.py +++ b/pandas/io/api.py @@ -9,7 +9,6 @@ read_excel, ) from pandas.io.feather_format import read_feather -from pandas.io.gbq import read_gbq from pandas.io.html import read_html from pandas.io.json import read_json from pandas.io.orc import read_orc @@ -46,7 +45,6 @@ "read_excel", "read_feather", "read_fwf", - "read_gbq", "read_hdf", "read_html", "read_json", diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py deleted file mode 100644 index fe8702c2e16ae..0000000000000 --- a/pandas/io/gbq.py +++ /dev/null @@ -1,257 +0,0 @@ -""" Google BigQuery support """ -from __future__ import annotations - -from typing import ( - TYPE_CHECKING, - Any, -) -import warnings - -from pandas.compat._optional import import_optional_dependency -from pandas.util._exceptions import find_stack_level - -if TYPE_CHECKING: - from types import ModuleType - - import google.auth - - from pandas import DataFrame - - -def _try_import() -> ModuleType: - # since pandas is a dependency of pandas-gbq - # we need to import on first use - msg = ( - "pandas-gbq is required to load data from Google BigQuery. " - "See the docs: https://pandas-gbq.readthedocs.io." - ) - pandas_gbq = import_optional_dependency("pandas_gbq", extra=msg) - return pandas_gbq - - -def read_gbq( - query: str, - project_id: str | None = None, - index_col: str | None = None, - col_order: list[str] | None = None, - reauth: bool = False, - auth_local_webserver: bool = True, - dialect: str | None = None, - location: str | None = None, - configuration: dict[str, Any] | None = None, - credentials: google.auth.credentials.Credentials | None = None, - use_bqstorage_api: bool | None = None, - max_results: int | None = None, - progress_bar_type: str | None = None, -) -> DataFrame: - """ - Load data from Google BigQuery. - - .. deprecated:: 2.2.0 - - Please use ``pandas_gbq.read_gbq`` instead. - - This function requires the `pandas-gbq package - `__. - - See the `How to authenticate with Google BigQuery - `__ - guide for authentication instructions. - - Parameters - ---------- - query : str - SQL-Like Query to return data values. - project_id : str, optional - Google BigQuery Account project ID. Optional when available from - the environment. - index_col : str, optional - Name of result column to use for index in results DataFrame. - col_order : list(str), optional - List of BigQuery column names in the desired order for results - DataFrame. - reauth : bool, default False - Force Google BigQuery to re-authenticate the user. This is useful - if multiple accounts are used. - auth_local_webserver : bool, default True - Use the `local webserver flow`_ instead of the `console flow`_ - when getting user credentials. - - .. _local webserver flow: - https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server - .. _console flow: - https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console - - *New in version 0.2.0 of pandas-gbq*. - - .. versionchanged:: 1.5.0 - Default value is changed to ``True``. Google has deprecated the - ``auth_local_webserver = False`` `"out of band" (copy-paste) - flow - `_. - dialect : str, default 'legacy' - Note: The default value is changing to 'standard' in a future version. - - SQL syntax dialect to use. Value can be one of: - - ``'legacy'`` - Use BigQuery's legacy SQL dialect. For more information see - `BigQuery Legacy SQL Reference - `__. - ``'standard'`` - Use BigQuery's standard SQL, which is - compliant with the SQL 2011 standard. For more information - see `BigQuery Standard SQL Reference - `__. - location : str, optional - Location where the query job should run. See the `BigQuery locations - documentation - `__ for a - list of available locations. The location must match that of any - datasets used in the query. - - *New in version 0.5.0 of pandas-gbq*. - configuration : dict, optional - Query config parameters for job processing. - For example: - - configuration = {'query': {'useQueryCache': False}} - - For more information see `BigQuery REST API Reference - `__. - credentials : google.auth.credentials.Credentials, optional - Credentials for accessing Google APIs. Use this parameter to override - default credentials, such as to use Compute Engine - :class:`google.auth.compute_engine.Credentials` or Service Account - :class:`google.oauth2.service_account.Credentials` directly. - - *New in version 0.8.0 of pandas-gbq*. - use_bqstorage_api : bool, default False - Use the `BigQuery Storage API - `__ to - download query results quickly, but at an increased cost. To use this - API, first `enable it in the Cloud Console - `__. - You must also have the `bigquery.readsessions.create - `__ - permission on the project you are billing queries to. - - This feature requires version 0.10.0 or later of the ``pandas-gbq`` - package. It also requires the ``google-cloud-bigquery-storage`` and - ``fastavro`` packages. - - max_results : int, optional - If set, limit the maximum number of rows to fetch from the query - results. - - progress_bar_type : Optional, str - If set, use the `tqdm `__ library to - display a progress bar while the data downloads. Install the - ``tqdm`` package to use this feature. - - Possible values of ``progress_bar_type`` include: - - ``None`` - No progress bar. - ``'tqdm'`` - Use the :func:`tqdm.tqdm` function to print a progress bar - to :data:`sys.stderr`. - ``'tqdm_notebook'`` - Use the :func:`tqdm.tqdm_notebook` function to display a - progress bar as a Jupyter notebook widget. - ``'tqdm_gui'`` - Use the :func:`tqdm.tqdm_gui` function to display a - progress bar as a graphical dialog box. - - Returns - ------- - df: DataFrame - DataFrame representing results of query. - - See Also - -------- - pandas_gbq.read_gbq : This function in the pandas-gbq library. - DataFrame.to_gbq : Write a DataFrame to Google BigQuery. - - Examples - -------- - Example taken from `Google BigQuery documentation - `_ - - >>> sql = "SELECT name FROM table_name WHERE state = 'TX' LIMIT 100;" - >>> df = pd.read_gbq(sql, dialect="standard") # doctest: +SKIP - >>> project_id = "your-project-id" # doctest: +SKIP - >>> df = pd.read_gbq(sql, - ... project_id=project_id, - ... dialect="standard" - ... ) # doctest: +SKIP - """ - warnings.warn( - "read_gbq is deprecated and will be removed in a future version. " - "Please use pandas_gbq.read_gbq instead: " - "https://pandas-gbq.readthedocs.io/en/latest/api.html#pandas_gbq.read_gbq", - FutureWarning, - stacklevel=find_stack_level(), - ) - pandas_gbq = _try_import() - - kwargs: dict[str, str | bool | int | None] = {} - - # START: new kwargs. Don't populate unless explicitly set. - if use_bqstorage_api is not None: - kwargs["use_bqstorage_api"] = use_bqstorage_api - if max_results is not None: - kwargs["max_results"] = max_results - - kwargs["progress_bar_type"] = progress_bar_type - # END: new kwargs - - return pandas_gbq.read_gbq( - query, - project_id=project_id, - index_col=index_col, - col_order=col_order, - reauth=reauth, - auth_local_webserver=auth_local_webserver, - dialect=dialect, - location=location, - configuration=configuration, - credentials=credentials, - **kwargs, - ) - - -def to_gbq( - dataframe: DataFrame, - destination_table: str, - project_id: str | None = None, - chunksize: int | None = None, - reauth: bool = False, - if_exists: str = "fail", - auth_local_webserver: bool = True, - table_schema: list[dict[str, str]] | None = None, - location: str | None = None, - progress_bar: bool = True, - credentials: google.auth.credentials.Credentials | None = None, -) -> None: - warnings.warn( - "to_gbq is deprecated and will be removed in a future version. " - "Please use pandas_gbq.to_gbq instead: " - "https://pandas-gbq.readthedocs.io/en/latest/api.html#pandas_gbq.to_gbq", - FutureWarning, - stacklevel=find_stack_level(), - ) - pandas_gbq = _try_import() - pandas_gbq.to_gbq( - dataframe, - destination_table, - project_id=project_id, - chunksize=chunksize, - reauth=reauth, - if_exists=if_exists, - auth_local_webserver=auth_local_webserver, - table_schema=table_schema, - location=location, - progress_bar=progress_bar, - credentials=credentials, - ) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 61d6aaf63adf1..6ed1d07d0cc3d 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -153,7 +153,6 @@ class TestPDApi(Base): "read_csv", "read_excel", "read_fwf", - "read_gbq", "read_hdf", "read_html", "read_xml", diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py index 570f85a4a31ee..b8631d95a6399 100644 --- a/pandas/tests/frame/methods/test_to_dict.py +++ b/pandas/tests/frame/methods/test_to_dict.py @@ -513,16 +513,6 @@ def test_to_dict_masked_native_python(self): result = df.to_dict(orient="records") assert isinstance(result[0]["a"], int) - def test_to_dict_pos_args_deprecation(self): - # GH-54229 - df = DataFrame({"a": [1, 2, 3]}) - msg = ( - r"Starting with pandas version 3.0 all arguments of to_dict except for the " - r"argument 'orient' will be keyword-only." - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - df.to_dict("records", {}) - @pytest.mark.parametrize( "val", [Timestamp(2020, 1, 1), Timedelta(1), Period("2020"), Interval(1, 2)] diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 6ea48cd759fbc..3352164b2f980 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -1486,18 +1486,6 @@ def test_excelwriter_fspath(self): with ExcelWriter(path) as writer: assert os.fspath(writer) == str(path) - def test_to_excel_pos_args_deprecation(self): - # GH-54229 - df = DataFrame({"a": [1, 2, 3]}) - msg = ( - r"Starting with pandas version 3.0 all arguments of to_excel except " - r"for the argument 'excel_writer' will be keyword-only." - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - buf = BytesIO() - writer = ExcelWriter(buf) - df.to_excel(writer, "Sheet_name_1") - @pytest.mark.parametrize("klass", _writers.values()) def test_subclass_attr(klass): diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index 0db49a73621ea..49776d532db1d 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -744,15 +744,3 @@ def test_to_csv_iterative_compression_buffer(compression): pd.read_csv(buffer, compression=compression, index_col=0), df ) assert not buffer.closed - - -def test_to_csv_pos_args_deprecation(): - # GH-54229 - df = DataFrame({"a": [1, 2, 3]}) - msg = ( - r"Starting with pandas version 3.0 all arguments of to_csv except for the " - r"argument 'path_or_buf' will be keyword-only." - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - buffer = io.BytesIO() - df.to_csv(buffer, ";") diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index e85b4cb29390e..8031f67cd0567 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -15,7 +15,6 @@ get_option, option_context, ) -import pandas._testing as tm import pandas.io.formats.format as fmt @@ -1165,14 +1164,3 @@ def test_to_html_empty_complex_array(): "" ) assert result == expected - - -def test_to_html_pos_args_deprecation(): - # GH-54229 - df = DataFrame({"a": [1, 2, 3]}) - msg = ( - r"Starting with pandas version 3.0 all arguments of to_html except for the " - r"argument 'buf' will be keyword-only." - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - df.to_html(None, None) diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index 304aff0002209..b9d5f04cb203b 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -187,22 +187,6 @@ def test_to_latex_midrule_location(self): ) assert result == expected - def test_to_latex_pos_args_deprecation(self): - # GH-54229 - df = DataFrame( - { - "name": ["Raphael", "Donatello"], - "age": [26, 45], - "height": [181.23, 177.65], - } - ) - msg = ( - r"Starting with pandas version 3.0 all arguments of to_latex except for " - r"the argument 'buf' will be keyword-only." - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - df.to_latex(None, None) - class TestToLatexLongtable: def test_to_latex_empty_longtable(self): diff --git a/pandas/tests/io/formats/test_to_markdown.py b/pandas/tests/io/formats/test_to_markdown.py index 85eca834ff0d4..437f079c5f2f9 100644 --- a/pandas/tests/io/formats/test_to_markdown.py +++ b/pandas/tests/io/formats/test_to_markdown.py @@ -1,12 +1,8 @@ -from io import ( - BytesIO, - StringIO, -) +from io import StringIO import pytest import pandas as pd -import pandas._testing as tm pytest.importorskip("tabulate") @@ -92,15 +88,3 @@ def test_showindex_disallowed_in_kwargs(): df = pd.DataFrame([1, 2, 3]) with pytest.raises(ValueError, match="Pass 'index' instead of 'showindex"): df.to_markdown(index=True, showindex=True) - - -def test_markdown_pos_args_deprecatation(): - # GH-54229 - df = pd.DataFrame({"a": [1, 2, 3]}) - msg = ( - r"Starting with pandas version 3.0 all arguments of to_markdown except for the " - r"argument 'buf' will be keyword-only." - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - buffer = BytesIO() - df.to_markdown(buffer, "grid") diff --git a/pandas/tests/io/formats/test_to_string.py b/pandas/tests/io/formats/test_to_string.py index 2e5a5005cb076..c1beed8b88ee0 100644 --- a/pandas/tests/io/formats/test_to_string.py +++ b/pandas/tests/io/formats/test_to_string.py @@ -764,18 +764,6 @@ def test_to_string_string_dtype(self): ) assert result == expected - def test_to_string_pos_args_deprecation(self): - # GH#54229 - df = DataFrame({"a": [1, 2, 3]}) - msg = ( - "Starting with pandas version 3.0 all arguments of to_string " - "except for the " - "argument 'buf' will be keyword-only." - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - buf = StringIO() - df.to_string(buf, None, None, True, True) - def test_to_string_utf8_columns(self): n = "\u05d0".encode() df = DataFrame([1, 2], columns=[n]) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index a22d4666e3b2d..9a263e8bc5f44 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1,10 +1,7 @@ import datetime from datetime import timedelta from decimal import Decimal -from io import ( - BytesIO, - StringIO, -) +from io import StringIO import json import os import sys @@ -2150,18 +2147,6 @@ def test_json_roundtrip_string_inference(orient): tm.assert_frame_equal(result, expected) -def test_json_pos_args_deprecation(): - # GH-54229 - df = DataFrame({"a": [1, 2, 3]}) - msg = ( - r"Starting with pandas version 3.0 all arguments of to_json except for the " - r"argument 'path_or_buf' will be keyword-only." - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - buf = BytesIO() - df.to_json(buf, "split") - - @td.skip_if_no("pyarrow") def test_to_json_ea_null(): # GH#57224 diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index de56ab614dcd4..9ab70cd95c1cf 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -354,20 +354,6 @@ def test_store_dropna(tmp_path, setup_path): tm.assert_frame_equal(df_without_missing, reloaded) -def test_keyword_deprecation(tmp_path, setup_path): - # GH 54229 - path = tmp_path / setup_path - - msg = ( - "Starting with pandas version 3.0 all arguments of to_hdf except for the " - "argument 'path_or_buf' will be keyword-only." - ) - df = DataFrame([{"A": 1, "B": 2, "C": 3}, {"A": 1, "B": 2, "C": 3}]) - - with tm.assert_produces_warning(FutureWarning, match=msg): - df.to_hdf(path, "key") - - def test_to_hdf_with_min_itemsize(tmp_path, setup_path): path = tmp_path / setup_path diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index 3c0208fcc74ec..5f19c15817ce7 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -411,13 +411,3 @@ def test_invalid_dtype_backend(self): ) with pytest.raises(ValueError, match=msg): read_clipboard(dtype_backend="numpy") - - def test_to_clipboard_pos_args_deprecation(self): - # GH-54229 - df = DataFrame({"a": [1, 2, 3]}) - msg = ( - r"Starting with pandas version 3.0 all arguments of to_clipboard " - r"will be keyword-only." - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - df.to_clipboard(True, None) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py deleted file mode 100644 index b2b212ceb2c41..0000000000000 --- a/pandas/tests/io/test_gbq.py +++ /dev/null @@ -1,14 +0,0 @@ -import pandas as pd -import pandas._testing as tm - - -def test_read_gbq_deprecated(): - with tm.assert_produces_warning(FutureWarning): - with tm.external_error_raised(Exception): - pd.read_gbq("fake") - - -def test_to_gbq_deprecated(): - with tm.assert_produces_warning(FutureWarning): - with tm.external_error_raised(Exception): - pd.DataFrame(range(1)).to_gbq("fake") diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index bba53f7ff50a8..b56993829b0ae 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -349,23 +349,6 @@ def test_cross_engine_fp_pa(df_cross_compat, pa, fp): tm.assert_frame_equal(result, df[["a", "d"]]) -def test_parquet_pos_args_deprecation(engine): - # GH-54229 - df = pd.DataFrame({"a": [1, 2, 3]}) - msg = ( - r"Starting with pandas version 3.0 all arguments of to_parquet except for the " - r"argument 'path' will be keyword-only." - ) - with tm.ensure_clean() as path: - with tm.assert_produces_warning( - FutureWarning, - match=msg, - check_stacklevel=False, - raise_on_extra_warnings=False, - ): - df.to_parquet(path, engine) - - class Base: def check_error_on_write(self, df, engine, exc, err_msg): # check that we are raising the exception on writing diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 57c7829924531..38a0888909663 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -626,15 +626,3 @@ def test_pickle_frame_v124_unpickle_130(datapath): expected = DataFrame(index=[], columns=[]) tm.assert_frame_equal(df, expected) - - -def test_pickle_pos_args_deprecation(): - # GH-54229 - df = DataFrame({"a": [1, 2, 3]}) - msg = ( - r"Starting with pandas version 3.0 all arguments of to_pickle except for the " - r"argument 'path' will be keyword-only." - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - buffer = io.BytesIO() - df.to_pickle(buffer, "infer") diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 8f15467084cf3..e6595ca9b06a8 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -3726,20 +3726,6 @@ def test_read_sql_dtype(conn, request, func, dtype_backend): tm.assert_frame_equal(result, expected) -def test_keyword_deprecation(sqlite_engine): - conn = sqlite_engine - # GH 54397 - msg = ( - "Starting with pandas version 3.0 all arguments of to_sql except for the " - "arguments 'name' and 'con' will be keyword-only." - ) - df = DataFrame([{"A": 1, "B": 2, "C": 3}, {"A": 1, "B": 2, "C": 3}]) - df.to_sql("example", conn) - - with tm.assert_produces_warning(FutureWarning, match=msg): - df.to_sql("example", conn, None, if_exists="replace") - - def test_bigint_warning(sqlite_engine): conn = sqlite_engine # test no warning for BIGINT (to support int64) is raised (GH7433) diff --git a/pyproject.toml b/pyproject.toml index a7cb87bbca4b7..bd7172ec85132 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,7 +67,7 @@ performance = ['bottleneck>=1.3.6', 'numba>=0.56.4', 'numexpr>=2.8.4'] computation = ['scipy>=1.10.0', 'xarray>=2022.12.0'] fss = ['fsspec>=2022.11.0'] aws = ['s3fs>=2022.11.0'] -gcp = ['gcsfs>=2022.11.0', 'pandas-gbq>=0.19.0'] +gcp = ['gcsfs>=2022.11.0'] excel = ['odfpy>=1.4.1', 'openpyxl>=3.1.0', 'python-calamine>=0.1.7', 'pyxlsb>=1.0.10', 'xlrd>=2.0.1', 'xlsxwriter>=3.0.5'] parquet = ['pyarrow>=10.0.1'] feather = ['pyarrow>=10.0.1'] @@ -104,7 +104,6 @@ all = ['adbc-driver-postgresql>=0.8.0', 'numexpr>=2.8.4', 'odfpy>=1.4.1', 'openpyxl>=3.1.0', - 'pandas-gbq>=0.19.0', 'psycopg2>=2.9.6', 'pyarrow>=10.0.1', 'pymysql>=1.0.2', diff --git a/scripts/validate_min_versions_in_sync.py b/scripts/validate_min_versions_in_sync.py index f8cce255f532d..1001b00450354 100755 --- a/scripts/validate_min_versions_in_sync.py +++ b/scripts/validate_min_versions_in_sync.py @@ -36,7 +36,7 @@ SETUP_PATH = pathlib.Path("pyproject.toml").resolve() YAML_PATH = pathlib.Path("ci/deps") ENV_PATH = pathlib.Path("environment.yml") -EXCLUDE_DEPS = {"tzdata", "blosc", "pandas-gbq", "pyqt", "pyqt5"} +EXCLUDE_DEPS = {"tzdata", "blosc", "pyqt", "pyqt5"} EXCLUSION_LIST = frozenset(["python=3.8[build=*_pypy]"]) # pandas package is not available # in pre-commit environment