Skip to content

Commit

Permalink
refactor(pyspark): remove sqlalchemy dependency from pyspark
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud authored and kszucs committed Feb 12, 2024
1 parent 65717f4 commit e9656fb
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 9 deletions.
24 changes: 16 additions & 8 deletions ibis/backends/pyspark/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from typing import TYPE_CHECKING, Any

import pyspark
import sqlalchemy as sa
import sqlglot as sg
import sqlglot.expressions as sge
from pyspark import SparkConf
Expand Down Expand Up @@ -104,15 +103,24 @@ class Options(ibis.config.Config):

def _from_url(self, url: str, **kwargs) -> Backend:
"""Construct a PySpark backend from a URL `url`."""
url = sa.engine.make_url(url)
from urllib.parse import parse_qs, urlparse

conf = SparkConf().setAll(url.query.items())
url = urlparse(url)
query_params = parse_qs(url.query)
params = query_params.copy()

if database := url.database:
conf = conf.set(
"spark.sql.warehouse.dir",
str(Path(database).absolute()),
)
for name, value in query_params.items():
if len(value) > 1:
params[name] = value
elif len(value) == 1:
params[name] = value[0]
else:
raise com.IbisError(f"Invalid URL parameter: {name}")

conf = SparkConf().setAll(params.items())

if database := url.path[1:]:
conf = conf.set("spark.sql.warehouse.dir", str(Path(database).absolute()))

builder = SparkSession.builder.config(conf=conf)
session = builder.getOrCreate()
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ pandas = ["regex"]
polars = ["polars", "packaging"]
risingwave = ["psycopg2"]
postgres = ["psycopg2"]
pyspark = ["pyspark", "sqlalchemy", "packaging"]
pyspark = ["pyspark", "packaging"]
snowflake = ["snowflake-connector-python", "packaging"]
sqlite = ["regex", "sqlalchemy", "sqlalchemy-views"]
trino = ["trino"]
Expand Down

0 comments on commit e9656fb

Please sign in to comment.