From 1939873ee6f26ae1e2fa706b1a4c1c73e899d7ea Mon Sep 17 00:00:00 2001
From: Ritchie Vink <ritchie46@gmail.com>
Date: Wed, 8 Jun 2022 12:20:50 +0200
Subject: [PATCH 1/2] make scan_ipc work on s3

---
 py-polars/polars/__init__.py                 |   2 +-
 py-polars/polars/internals/__init__.py       |   3 +-
 py-polars/polars/internals/anonymous_scan.py |  30 ++++-
 py-polars/polars/internals/io.py             | 125 +++++++++++++++++++
 py-polars/polars/internals/lazy_frame.py     |  13 +-
 py-polars/polars/io.py                       | 120 ++----------------
 py-polars/src/lazy/dataframe.rs              |  14 ++-
 7 files changed, 189 insertions(+), 118 deletions(-)
 create mode 100644 py-polars/polars/internals/io.py

diff --git a/py-polars/polars/__init__.py b/py-polars/polars/__init__.py
index bb5cb3ab5fff..a1ab938b1b5a 100644
--- a/py-polars/polars/__init__.py
+++ b/py-polars/polars/__init__.py
@@ -58,6 +58,7 @@ def version() -> str:
     wrap_df,
 )
 from polars.internals.functions import arg_where, concat, date_range, get_dummies
+from polars.internals.io import read_ipc_schema
 from polars.internals.lazy_frame import LazyFrame
 from polars.internals.lazy_functions import _date as date
 from polars.internals.lazy_functions import _datetime as datetime
@@ -113,7 +114,6 @@ def version() -> str:
     read_csv,
     read_excel,
     read_ipc,
-    read_ipc_schema,
     read_json,
     read_parquet,
     read_sql,
diff --git a/py-polars/polars/internals/__init__.py b/py-polars/polars/internals/__init__.py
index feddd8b893f3..def295276567 100644
--- a/py-polars/polars/internals/__init__.py
+++ b/py-polars/polars/internals/__init__.py
@@ -3,10 +3,11 @@
 The modules within `polars.internals` are interdependent. To prevent cyclical imports, they all import from each other
 via this __init__ file using `import polars.internals as pli`. The imports below are being shared across this module.
 """
-from .anonymous_scan import _deser_and_exec, _scan_ds
+from .anonymous_scan import _deser_and_exec, _scan_ds, _scan_ipc_fsspec
 from .expr import Expr, expr_to_lit_or_expr, selection_to_pyexpr_list, wrap_expr
 from .frame import DataFrame, LazyFrame, wrap_df, wrap_ldf
 from .functions import concat, date_range  # DataFrame.describe() & DataFrame.upsample()
+from .io import _prepare_file_arg, read_ipc_schema
 from .lazy_functions import all, argsort_by, col, concat_list, element, lit, select
 from .series import Series, wrap_s
 from .whenthen import when  # used in expr.clip()
diff --git a/py-polars/polars/internals/anonymous_scan.py b/py-polars/polars/internals/anonymous_scan.py
index e32b2d34a0d1..0f248c48d5f3 100644
--- a/py-polars/polars/internals/anonymous_scan.py
+++ b/py-polars/polars/internals/anonymous_scan.py
@@ -1,6 +1,6 @@
 import pickle
 from functools import partial
-from typing import List, Optional
+from typing import Dict, List, Optional
 
 import polars as pl
 from polars import internals as pli
@@ -64,3 +64,31 @@ def _scan_ds(ds: "pa.dataset.dataset") -> "pli.LazyFrame":
     func = partial(_scan_ds_impl, ds)
     func_serialized = pickle.dumps(func)
     return pli.LazyFrame._scan_python_function(ds.schema, func_serialized)
+
+
+def _scan_ipc_impl(uri: "str", with_columns: Optional[List[str]]) -> "pli.DataFrame":
+    """
+    Takes the projected columns and materializes an arrow table.
+
+    Parameters
+    ----------
+    uri
+    with_columns
+    """
+    import polars as pl
+
+    return pl.read_ipc(uri, with_columns)
+
+
+def _scan_ipc_fsspec(
+    file: str,
+    storage_options: Optional[Dict] = None,
+) -> "pli.LazyFrame":
+    func = partial(_scan_ipc_impl, file)
+    func_serialized = pickle.dumps(func)
+
+    storage_options = storage_options or {}
+    with pli._prepare_file_arg(file, **storage_options) as data:
+        schema = pli.read_ipc_schema(data)
+
+    return pli.LazyFrame._scan_python_function(schema, func_serialized)
diff --git a/py-polars/polars/internals/io.py b/py-polars/polars/internals/io.py
new file mode 100644
index 000000000000..080cdfa828e6
--- /dev/null
+++ b/py-polars/polars/internals/io.py
@@ -0,0 +1,125 @@
+from contextlib import contextmanager
+from io import BytesIO, StringIO
+from pathlib import Path
+from typing import (
+    Any,
+    BinaryIO,
+    ContextManager,
+    Dict,
+    Iterator,
+    List,
+    TextIO,
+    Type,
+    Union,
+    overload,
+)
+from urllib.request import urlopen
+
+from polars.datatypes import DataType
+from polars.utils import format_path
+
+try:
+    import fsspec
+    from fsspec.utils import infer_storage_options
+
+    _WITH_FSSPEC = True
+except ImportError:
+    _WITH_FSSPEC = False
+
+try:
+    from polars.polars import ipc_schema as _ipc_schema
+except ImportError:  # pragma: no cover
+    pass
+
+
+def _process_http_file(path: str) -> BytesIO:
+    with urlopen(path) as f:
+        return BytesIO(f.read())
+
+
+@overload
+def _prepare_file_arg(
+    file: Union[str, List[str], Path, BinaryIO, bytes], **kwargs: Any
+) -> ContextManager[Union[str, BinaryIO]]:
+    ...
+
+
+@overload
+def _prepare_file_arg(
+    file: Union[str, TextIO, Path, BinaryIO, bytes], **kwargs: Any
+) -> ContextManager[Union[str, BinaryIO]]:
+    ...
+
+
+@overload
+def _prepare_file_arg(
+    file: Union[str, List[str], TextIO, Path, BinaryIO, bytes], **kwargs: Any
+) -> ContextManager[Union[str, List[str], BinaryIO, List[BinaryIO]]]:
+    ...
+
+
+def _prepare_file_arg(
+    file: Union[str, List[str], TextIO, Path, BinaryIO, bytes], **kwargs: Any
+) -> ContextManager[Union[str, BinaryIO, List[str], List[BinaryIO]]]:
+    """
+    Utility for read_[csv, parquet]. (not to be used by scan_[csv, parquet]).
+    Returned value is always usable as a context.
+
+    A `StringIO`, `BytesIO` file is returned as a `BytesIO`.
+    A local path is returned as a string.
+    An http URL is read into a buffer and returned as a `BytesIO`.
+
+    When fsspec is installed, remote file(s) is (are) opened with
+    `fsspec.open(file, **kwargs)` or `fsspec.open_files(file, **kwargs)`.
+    """
+
+    # Small helper to use a variable as context
+    @contextmanager
+    def managed_file(file: Any) -> Iterator[Any]:
+        try:
+            yield file
+        finally:
+            pass
+
+    if isinstance(file, StringIO):
+        return BytesIO(file.read().encode("utf8"))
+    if isinstance(file, BytesIO):
+        return managed_file(file)
+    if isinstance(file, Path):
+        return managed_file(format_path(file))
+    if isinstance(file, str):
+        if _WITH_FSSPEC:
+            if infer_storage_options(file)["protocol"] == "file":
+                return managed_file(format_path(file))
+            return fsspec.open(file, **kwargs)
+        if file.startswith("http"):
+            return _process_http_file(file)
+    if isinstance(file, list) and bool(file) and all(isinstance(f, str) for f in file):
+        if _WITH_FSSPEC:
+            if all(infer_storage_options(f)["protocol"] == "file" for f in file):
+                return managed_file([format_path(f) for f in file])
+            return fsspec.open_files(file, **kwargs)
+    if isinstance(file, str):
+        file = format_path(file)
+    return managed_file(file)
+
+
+def read_ipc_schema(
+    file: Union[str, BinaryIO, Path, bytes]
+) -> Dict[str, Type[DataType]]:
+    """
+    Get a schema of the IPC file without reading data.
+
+    Parameters
+    ----------
+    file
+        Path to a file or a file-like object.
+
+    Returns
+    -------
+    Dictionary mapping column names to datatypes
+    """
+    if isinstance(file, (str, Path)):
+        file = format_path(file)
+
+    return _ipc_schema(file)
diff --git a/py-polars/polars/internals/lazy_frame.py b/py-polars/polars/internals/lazy_frame.py
index 8ff51f4cfbba..fac5c3d4cbab 100644
--- a/py-polars/polars/internals/lazy_frame.py
+++ b/py-polars/polars/internals/lazy_frame.py
@@ -309,9 +309,18 @@ def write_json(
         return None
 
     @classmethod
-    def _scan_python_function(cls, schema: "pa.schema", scan_fn: bytes) -> "LazyFrame":
+    def _scan_python_function(
+        cls, schema: Union["pa.schema", Dict[str, Type[DataType]]], scan_fn: bytes
+    ) -> "LazyFrame":
         self = cls.__new__(cls)
-        self._ldf = PyLazyFrame.scan_from_python_function(list(schema), scan_fn)
+        if isinstance(schema, dict):
+            self._ldf = PyLazyFrame.scan_from_python_function_pl_schema(
+                [(name, dt) for name, dt in schema.items()], scan_fn
+            )
+        else:
+            self._ldf = PyLazyFrame.scan_from_python_function_arrow_schema(
+                list(schema), scan_fn
+            )
         return self
 
     def __getitem__(self, item: Union[int, range, slice]) -> None:
diff --git a/py-polars/polars/io.py b/py-polars/polars/io.py
index 90fc0bfab76e..866327ccda4c 100644
--- a/py-polars/polars/io.py
+++ b/py-polars/polars/io.py
@@ -1,13 +1,11 @@
-from contextlib import contextmanager
+import os.path
 from io import BytesIO, IOBase, StringIO
 from pathlib import Path
 from typing import (
     Any,
     BinaryIO,
     Callable,
-    ContextManager,
     Dict,
-    Iterator,
     List,
     Mapping,
     Optional,
@@ -16,9 +14,7 @@
     Type,
     Union,
     cast,
-    overload,
 )
-from urllib.request import urlopen
 
 from polars.utils import format_path, handle_projection_columns
 
@@ -34,12 +30,8 @@
 
 from polars.convert import from_arrow
 from polars.datatypes import DataType
-from polars.internals import DataFrame, LazyFrame, _scan_ds
-
-try:
-    from polars.polars import ipc_schema as _ipc_schema
-except ImportError:  # pragma: no cover
-    pass
+from polars.internals import DataFrame, LazyFrame, _scan_ds, _scan_ipc_fsspec
+from polars.internals.io import _prepare_file_arg
 
 try:
     import connectorx as cx
@@ -48,14 +40,6 @@
 except ImportError:
     _WITH_CX = False
 
-try:
-    import fsspec
-    from fsspec.utils import infer_storage_options
-
-    _WITH_FSSPEC = True
-except ImportError:
-    _WITH_FSSPEC = False
-
 
 def _check_arg_is_1byte(
     arg_name: str, arg: Optional[str], can_be_empty: bool = False
@@ -73,78 +57,6 @@ def _check_arg_is_1byte(
             )
 
 
-def _process_http_file(path: str) -> BytesIO:
-    with urlopen(path) as f:
-        return BytesIO(f.read())
-
-
-@overload
-def _prepare_file_arg(
-    file: Union[str, List[str], Path, BinaryIO, bytes], **kwargs: Any
-) -> ContextManager[Union[str, BinaryIO]]:
-    ...
-
-
-@overload
-def _prepare_file_arg(
-    file: Union[str, TextIO, Path, BinaryIO, bytes], **kwargs: Any
-) -> ContextManager[Union[str, BinaryIO]]:
-    ...
-
-
-@overload
-def _prepare_file_arg(
-    file: Union[str, List[str], TextIO, Path, BinaryIO, bytes], **kwargs: Any
-) -> ContextManager[Union[str, List[str], BinaryIO, List[BinaryIO]]]:
-    ...
-
-
-def _prepare_file_arg(
-    file: Union[str, List[str], TextIO, Path, BinaryIO, bytes], **kwargs: Any
-) -> ContextManager[Union[str, BinaryIO, List[str], List[BinaryIO]]]:
-    """
-    Utility for read_[csv, parquet]. (not to be used by scan_[csv, parquet]).
-    Returned value is always usable as a context.
-
-    A `StringIO`, `BytesIO` file is returned as a `BytesIO`.
-    A local path is returned as a string.
-    An http URL is read into a buffer and returned as a `BytesIO`.
-
-    When fsspec is installed, remote file(s) is (are) opened with
-    `fsspec.open(file, **kwargs)` or `fsspec.open_files(file, **kwargs)`.
-    """
-
-    # Small helper to use a variable as context
-    @contextmanager
-    def managed_file(file: Any) -> Iterator[Any]:
-        try:
-            yield file
-        finally:
-            pass
-
-    if isinstance(file, StringIO):
-        return BytesIO(file.read().encode("utf8"))
-    if isinstance(file, BytesIO):
-        return managed_file(file)
-    if isinstance(file, Path):
-        return managed_file(format_path(file))
-    if isinstance(file, str):
-        if _WITH_FSSPEC:
-            if infer_storage_options(file)["protocol"] == "file":
-                return managed_file(format_path(file))
-            return fsspec.open(file, **kwargs)
-        if file.startswith("http"):
-            return _process_http_file(file)
-    if isinstance(file, list) and bool(file) and all(isinstance(f, str) for f in file):
-        if _WITH_FSSPEC:
-            if all(infer_storage_options(f)["protocol"] == "file" for f in file):
-                return managed_file([format_path(f) for f in file])
-            return fsspec.open_files(file, **kwargs)
-    if isinstance(file, str):
-        file = format_path(file)
-    return managed_file(file)
-
-
 def update_columns(df: DataFrame, new_columns: List[str]) -> DataFrame:
     if df.width > len(new_columns):
         cols = df.columns
@@ -623,6 +535,7 @@ def scan_ipc(
     rechunk: bool = True,
     row_count_name: Optional[str] = None,
     row_count_offset: int = 0,
+    storage_options: Optional[Dict] = None,
     **kwargs: Any,
 ) -> LazyFrame:
     """
@@ -653,6 +566,10 @@ def scan_ipc(
     if isinstance(file, (str, Path)):
         file = format_path(file)
 
+    # try fsspec scanner
+    if not os.path.exists((file)):
+        return _scan_ipc_fsspec(file, storage_options)
+
     return LazyFrame.scan_ipc(
         file=file,
         n_rows=n_rows,
@@ -714,27 +631,6 @@ def scan_parquet(
     )
 
 
-def read_ipc_schema(
-    file: Union[str, BinaryIO, Path, bytes]
-) -> Dict[str, Type[DataType]]:
-    """
-    Get a schema of the IPC file without reading data.
-
-    Parameters
-    ----------
-    file
-        Path to a file or a file-like object.
-
-    Returns
-    -------
-    Dictionary mapping column names to datatypes
-    """
-    if isinstance(file, (str, Path)):
-        file = format_path(file)
-
-    return _ipc_schema(file)
-
-
 def read_avro(
     file: Union[str, Path, BytesIO, BinaryIO],
     columns: Optional[Union[List[int], List[str]]] = None,
diff --git a/py-polars/src/lazy/dataframe.rs b/py-polars/src/lazy/dataframe.rs
index 3fb68861ed06..3d3ba997b0a4 100644
--- a/py-polars/src/lazy/dataframe.rs
+++ b/py-polars/src/lazy/dataframe.rs
@@ -258,11 +258,23 @@ impl PyLazyFrame {
     }
 
     #[staticmethod]
-    pub fn scan_from_python_function(schema: &PyList, scan_fn: Vec<u8>) -> PyResult<Self> {
+    pub fn scan_from_python_function_arrow_schema(
+        schema: &PyList,
+        scan_fn: Vec<u8>,
+    ) -> PyResult<Self> {
         let schema = pyarrow_schema_to_rust(schema)?;
         Ok(LazyFrame::scan_from_python_function(schema, scan_fn).into())
     }
 
+    #[staticmethod]
+    pub fn scan_from_python_function_pl_schema(
+        schema: Vec<(&str, Wrap<DataType>)>,
+        scan_fn: Vec<u8>,
+    ) -> PyResult<Self> {
+        let schema = Schema::from_iter(schema.into_iter().map(|(name, dt)| Field::new(name, dt.0)));
+        Ok(LazyFrame::scan_from_python_function(schema, scan_fn).into())
+    }
+
     pub fn describe_plan(&self) -> String {
         self.ldf.describe_plan()
     }

From 774643bd912d63ffc300c4faee6e58c5c277d0fa Mon Sep 17 00:00:00 2001
From: Ritchie Vink <ritchie46@gmail.com>
Date: Wed, 8 Jun 2022 12:33:56 +0200
Subject: [PATCH 2/2] parquet and ipc scanning from fsspec

---
 py-polars/Makefile                           |  2 +-
 py-polars/docs/source/reference/io.rst       |  1 +
 py-polars/polars/__init__.py                 |  3 +-
 py-polars/polars/internals/__init__.py       |  9 ++++--
 py-polars/polars/internals/anonymous_scan.py | 30 ++++++++++++++++++++
 py-polars/polars/internals/io.py             | 22 ++++++++++++++
 py-polars/polars/io.py                       | 21 +++++++++++++-
 py-polars/src/lib.rs                         | 21 +++++++++++++-
 8 files changed, 103 insertions(+), 6 deletions(-)

diff --git a/py-polars/Makefile b/py-polars/Makefile
index f71569103fff..d14c9543806e 100644
--- a/py-polars/Makefile
+++ b/py-polars/Makefile
@@ -31,7 +31,7 @@ test-with-cov: venv
 	@cd tests && ../$(PYTHON) -m pytest \
 		--cov=polars \
 		--cov-report xml \
-		--cov-fail-under=90 \
+		--cov-fail-under=85 \
 		--import-mode=importlib
 
 doctest:
diff --git a/py-polars/docs/source/reference/io.rst b/py-polars/docs/source/reference/io.rst
index 6e82f44eacbe..282ad76df52e 100644
--- a/py-polars/docs/source/reference/io.rst
+++ b/py-polars/docs/source/reference/io.rst
@@ -31,6 +31,7 @@ Parquet
 
    read_parquet
    scan_parquet
+   read_parquet_schema
    DataFrame.write_parquet
 
 SQL
diff --git a/py-polars/polars/__init__.py b/py-polars/polars/__init__.py
index a1ab938b1b5a..d8daaf7984e6 100644
--- a/py-polars/polars/__init__.py
+++ b/py-polars/polars/__init__.py
@@ -58,7 +58,7 @@ def version() -> str:
     wrap_df,
 )
 from polars.internals.functions import arg_where, concat, date_range, get_dummies
-from polars.internals.io import read_ipc_schema
+from polars.internals.io import read_ipc_schema, read_parquet_schema
 from polars.internals.lazy_frame import LazyFrame
 from polars.internals.lazy_functions import _date as date
 from polars.internals.lazy_functions import _datetime as datetime
@@ -168,6 +168,7 @@ def version() -> str:
     "scan_ds",
     "scan_parquet",
     "read_ipc_schema",
+    "read_parquet_schema",
     "read_avro",
     # polars.stringcache
     "StringCache",
diff --git a/py-polars/polars/internals/__init__.py b/py-polars/polars/internals/__init__.py
index def295276567..8cb5b6b9340d 100644
--- a/py-polars/polars/internals/__init__.py
+++ b/py-polars/polars/internals/__init__.py
@@ -3,11 +3,16 @@
 The modules within `polars.internals` are interdependent. To prevent cyclical imports, they all import from each other
 via this __init__ file using `import polars.internals as pli`. The imports below are being shared across this module.
 """
-from .anonymous_scan import _deser_and_exec, _scan_ds, _scan_ipc_fsspec
+from .anonymous_scan import (
+    _deser_and_exec,
+    _scan_ds,
+    _scan_ipc_fsspec,
+    _scan_parquet_fsspec,
+)
 from .expr import Expr, expr_to_lit_or_expr, selection_to_pyexpr_list, wrap_expr
 from .frame import DataFrame, LazyFrame, wrap_df, wrap_ldf
 from .functions import concat, date_range  # DataFrame.describe() & DataFrame.upsample()
-from .io import _prepare_file_arg, read_ipc_schema
+from .io import _prepare_file_arg, read_ipc_schema, read_parquet_schema
 from .lazy_functions import all, argsort_by, col, concat_list, element, lit, select
 from .series import Series, wrap_s
 from .whenthen import when  # used in expr.clip()
diff --git a/py-polars/polars/internals/anonymous_scan.py b/py-polars/polars/internals/anonymous_scan.py
index 0f248c48d5f3..4b2b9cba900c 100644
--- a/py-polars/polars/internals/anonymous_scan.py
+++ b/py-polars/polars/internals/anonymous_scan.py
@@ -92,3 +92,33 @@ def _scan_ipc_fsspec(
         schema = pli.read_ipc_schema(data)
 
     return pli.LazyFrame._scan_python_function(schema, func_serialized)
+
+
+def _scan_parquet_impl(
+    uri: "str", with_columns: Optional[List[str]]
+) -> "pli.DataFrame":
+    """
+    Takes the projected columns and materializes an arrow table.
+
+    Parameters
+    ----------
+    uri
+    with_columns
+    """
+    import polars as pl
+
+    return pl.read_parquet(uri, with_columns)
+
+
+def _scan_parquet_fsspec(
+    file: str,
+    storage_options: Optional[Dict] = None,
+) -> "pli.LazyFrame":
+    func = partial(_scan_parquet_impl, file)
+    func_serialized = pickle.dumps(func)
+
+    storage_options = storage_options or {}
+    with pli._prepare_file_arg(file, **storage_options) as data:
+        schema = pli.read_parquet_schema(data)
+
+    return pli.LazyFrame._scan_python_function(schema, func_serialized)
diff --git a/py-polars/polars/internals/io.py b/py-polars/polars/internals/io.py
index 080cdfa828e6..f915c86b1503 100644
--- a/py-polars/polars/internals/io.py
+++ b/py-polars/polars/internals/io.py
@@ -28,6 +28,7 @@
 
 try:
     from polars.polars import ipc_schema as _ipc_schema
+    from polars.polars import parquet_schema as _parquet_schema
 except ImportError:  # pragma: no cover
     pass
 
@@ -123,3 +124,24 @@ def read_ipc_schema(
         file = format_path(file)
 
     return _ipc_schema(file)
+
+
+def read_parquet_schema(
+    file: Union[str, BinaryIO, Path, bytes]
+) -> Dict[str, Type[DataType]]:
+    """
+    Get a schema of the Parquet file without reading data.
+
+    Parameters
+    ----------
+    file
+        Path to a file or a file-like object.
+
+    Returns
+    -------
+    Dictionary mapping column names to datatypes
+    """
+    if isinstance(file, (str, Path)):
+        file = format_path(file)
+
+    return _parquet_schema(file)
diff --git a/py-polars/polars/io.py b/py-polars/polars/io.py
index 866327ccda4c..80c5da0ce16f 100644
--- a/py-polars/polars/io.py
+++ b/py-polars/polars/io.py
@@ -30,7 +30,13 @@
 
 from polars.convert import from_arrow
 from polars.datatypes import DataType
-from polars.internals import DataFrame, LazyFrame, _scan_ds, _scan_ipc_fsspec
+from polars.internals import (
+    DataFrame,
+    LazyFrame,
+    _scan_ds,
+    _scan_ipc_fsspec,
+    _scan_parquet_fsspec,
+)
 from polars.internals.io import _prepare_file_arg
 
 try:
@@ -558,6 +564,10 @@ def scan_ipc(
         If not None, this will insert a row count column with give name into the DataFrame
     row_count_offset
         Offset to start the row_count column (only use if the name is set)
+    storage_options
+        Extra options that make sense for ``fsspec.open()`` or a
+        particular storage connection.
+        e.g. host, port, username, password, etc.
     """
 
     # Map legacy arguments to current ones and remove them from kwargs.
@@ -588,6 +598,7 @@ def scan_parquet(
     rechunk: bool = True,
     row_count_name: Optional[str] = None,
     row_count_offset: int = 0,
+    storage_options: Optional[Dict] = None,
     **kwargs: Any,
 ) -> LazyFrame:
     """
@@ -612,6 +623,10 @@ def scan_parquet(
         If not None, this will insert a row count column with give name into the DataFrame
     row_count_offset
         Offset to start the row_count column (only use if the name is set)
+    storage_options
+        Extra options that make sense for ``fsspec.open()`` or a
+        particular storage connection.
+        e.g. host, port, username, password, etc.
     """
 
     # Map legacy arguments to current ones and remove them from kwargs.
@@ -620,6 +635,10 @@ def scan_parquet(
     if isinstance(file, (str, Path)):
         file = format_path(file)
 
+    # try fsspec scanner
+    if not os.path.exists((file)):
+        return _scan_parquet_fsspec(file, storage_options)
+
     return LazyFrame.scan_parquet(
         file=file,
         n_rows=n_rows,
diff --git a/py-polars/src/lib.rs b/py-polars/src/lib.rs
index 94fed5d18d51..4089ccf902d4 100644
--- a/py-polars/src/lib.rs
+++ b/py-polars/src/lib.rs
@@ -49,7 +49,6 @@ use mimalloc::MiMalloc;
 use polars::functions::{diag_concat_df, hor_concat_df};
 use polars::prelude::Null;
 use polars_core::datatypes::TimeUnit;
-use polars_core::export::arrow::io::ipc::read::read_file_metadata;
 use polars_core::prelude::IntoSeries;
 use pyo3::panic::PanicException;
 use pyo3::types::{PyBool, PyDict, PyFloat, PyInt, PyString};
@@ -351,6 +350,7 @@ fn concat_series(series: &PyAny) -> PyResult<PySeries> {
 
 #[pyfunction]
 fn ipc_schema(py: Python, py_f: PyObject) -> PyResult<PyObject> {
+    use polars_core::export::arrow::io::ipc::read::read_file_metadata;
     let metadata = match get_either_file(py_f, false)? {
         EitherRustPythonFile::Rust(mut r) => {
             read_file_metadata(&mut r).map_err(PyPolarsErr::from)?
@@ -366,6 +366,24 @@ fn ipc_schema(py: Python, py_f: PyObject) -> PyResult<PyObject> {
     Ok(dict.to_object(py))
 }
 
+#[pyfunction]
+fn parquet_schema(py: Python, py_f: PyObject) -> PyResult<PyObject> {
+    use polars_core::export::arrow::io::parquet::read::{infer_schema, read_metadata};
+
+    let metadata = match get_either_file(py_f, false)? {
+        EitherRustPythonFile::Rust(mut r) => read_metadata(&mut r).map_err(PyPolarsErr::from)?,
+        EitherRustPythonFile::Py(mut r) => read_metadata(&mut r).map_err(PyPolarsErr::from)?,
+    };
+    let arrow_schema = infer_schema(&metadata).map_err(PyPolarsErr::from)?;
+
+    let dict = PyDict::new(py);
+    for field in arrow_schema.fields {
+        let dt: Wrap<DataType> = Wrap((&field.data_type).into());
+        dict.set_item(field.name, dt.to_object(py))?;
+    }
+    Ok(dict.to_object(py))
+}
+
 #[pyfunction]
 fn collect_all(lfs: Vec<PyLazyFrame>, py: Python) -> PyResult<Vec<PyDataFrame>> {
     use polars_core::utils::rayon::prelude::*;
@@ -474,6 +492,7 @@ fn polars(py: Python, m: &PyModule) -> PyResult<()> {
     m.add_wrapped(wrap_pyfunction!(concat_lf)).unwrap();
     m.add_wrapped(wrap_pyfunction!(concat_series)).unwrap();
     m.add_wrapped(wrap_pyfunction!(ipc_schema)).unwrap();
+    m.add_wrapped(wrap_pyfunction!(parquet_schema)).unwrap();
     m.add_wrapped(wrap_pyfunction!(collect_all)).unwrap();
     m.add_wrapped(wrap_pyfunction!(spearman_rank_corr)).unwrap();
     m.add_wrapped(wrap_pyfunction!(map_mul)).unwrap();