From 72752eb754eaa45c578188cd88b47de96fdbcdb7 Mon Sep 17 00:00:00 2001 From: Tyler White <50381805+IndexSeek@users.noreply.github.com> Date: Sat, 18 Nov 2023 11:55:12 -0500 Subject: [PATCH] feat(snowflake): read_csv with https --- ibis/backends/snowflake/__init__.py | 16 ++++++++++++---- ibis/backends/snowflake/tests/test_client.py | 16 ++++++++++++++++ 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/ibis/backends/snowflake/__init__.py b/ibis/backends/snowflake/__init__.py index e263ce30a33f..088dff1fec6f 100644 --- a/ibis/backends/snowflake/__init__.py +++ b/ibis/backends/snowflake/__init__.py @@ -17,6 +17,7 @@ import warnings from pathlib import Path from typing import TYPE_CHECKING, Any +from urllib.request import urlretrieve import pyarrow as pa import pyarrow_hotfix # noqa: F401 @@ -777,10 +778,17 @@ def read_csv( ) con.exec_driver_sql(create_infer_fmt) - # copy the local file to the stage - con.exec_driver_sql( - f"PUT 'file://{Path(path).absolute()}' @{stage} PARALLEL = {threads:d}" - ) + if path.startswith("https://"): + with tempfile.NamedTemporaryFile() as tmp: + urlretrieve(path, filename=tmp.name) + tmp.flush() + con.exec_driver_sql( + f"PUT 'file://{tmp.name}' @{stage} PARALLEL = {threads:d} AUTO_COMPRESS = TRUE" + ) + else: + con.exec_driver_sql( + f"PUT 'file://{Path(path).absolute()}' @{stage} PARALLEL = {threads:d} AUTO_COMPRESS = TRUE" + ) # handle setting up the schema in python because snowflake is # broken for csv globs: it cannot parse the result of the following diff --git a/ibis/backends/snowflake/tests/test_client.py b/ibis/backends/snowflake/tests/test_client.py index d03c21b5470b..c6bbccec1430 100644 --- a/ibis/backends/snowflake/tests/test_client.py +++ b/ibis/backends/snowflake/tests/test_client.py @@ -197,6 +197,22 @@ def test_read_csv_options(con, tmp_path): assert t.schema() == ibis.schema(dict(a="int64", b="int64")) +def test_read_csv_https(con): + t = con.read_csv( + "https://storage.googleapis.com/ibis-tutorial-data/wowah_data/locations.csv", + field_optionally_enclosed_by='"', + ) + assert t.schema() == ibis.schema( + { + "Map_ID": "int64", + "Location_Type": "string", + "Location_Name": "string", + "Game_Version": "string", + } + ) + assert t.count().execute() == 151 + + @pytest.fixture(scope="module") def json_data(): return [