Skip to content

Commit

Permalink
feat(sqlite): implement extract url field functions
Browse files Browse the repository at this point in the history
  • Loading branch information
mesejo authored and cpcloud committed Jul 20, 2023
1 parent 6dd04d7 commit cb1956f
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 2 deletions.
15 changes: 15 additions & 0 deletions ibis/backends/sqlite/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,14 @@ def _day_of_the_week_name(arg):
)


def _extract_query(t, op):
arg = t.translate(op.arg)
if op.key is not None:
return sa.func._ibis_extract_query(arg, t.translate(op.key))
else:
return sa.func._ibis_extract_query_no_param(arg)


operation_registry.update(
{
# TODO(kszucs): don't dispatch on op.arg since that should be always an
Expand Down Expand Up @@ -430,5 +438,12 @@ def _day_of_the_week_name(arg):
ops.Last: lambda t, op: t.translate(
ops.Arbitrary(op.arg, where=op.where, how="last")
),
ops.ExtractFragment: fixed_arity(sa.func._ibis_extract_fragment, 1),
ops.ExtractProtocol: fixed_arity(sa.func._ibis_extract_protocol, 1),
ops.ExtractAuthority: fixed_arity(sa.func._ibis_extract_authority, 1),
ops.ExtractPath: fixed_arity(sa.func._ibis_extract_path, 1),
ops.ExtractHost: fixed_arity(sa.func._ibis_extract_host, 1),
ops.ExtractQuery: _extract_query,
ops.ExtractUserInfo: fixed_arity(sa.func._ibis_extract_user_info, 1),
}
)
55 changes: 55 additions & 0 deletions ibis/backends/sqlite/udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import operator
from collections import defaultdict
from typing import Callable
from urllib.parse import parse_qs, urlsplit

try:
import regex as re
Expand Down Expand Up @@ -287,6 +288,60 @@ def _ibis_sqlite_e():
return math.e


@udf
def _ibis_extract_fragment(url):
return _extract_url_field(url, "fragment")


@udf
def _ibis_extract_protocol(url):
return _extract_url_field(url, "scheme")


@udf
def _ibis_extract_authority(url):
return _extract_url_field(url, "netloc")


@udf
def _ibis_extract_path(url):
return _extract_url_field(url, "path")


@udf
def _ibis_extract_host(url):
return _extract_url_field(url, "hostname")


def _extract_url_field(data, field_name):
return getattr(urlsplit(data), field_name, "")


@udf
def _ibis_extract_query(url, param_name):
query = urlsplit(url).query
if param_name is not None:
value = parse_qs(query)[param_name]
return value if len(value) > 1 else value[0]
else:
return query


@udf
def _ibis_extract_query_no_param(url):
query = urlsplit(url).query
return query


@udf
def _ibis_extract_user_info(url):
url_parts = urlsplit(url)
username = url_parts.username or ""
password = url_parts.password or ""

return f"{username}:{password}"


class _ibis_sqlite_var:
def __init__(self, offset):
self.mean = 0.0
Expand Down
3 changes: 1 addition & 2 deletions ibis/backends/tests/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -952,7 +952,7 @@ def test_substr_with_null_values(backend, alltypes, df):
id="file",
marks=[
pytest.mark.notimpl(
["pandas", "dask"], raises=com.OperationNotDefinedError
["pandas", "dask", "sqlite"], raises=com.OperationNotDefinedError
),
],
),
Expand All @@ -977,7 +977,6 @@ def test_substr_with_null_values(backend, alltypes, df):
"polars",
"postgres",
"pyspark",
"sqlite",
"druid",
"oracle",
],
Expand Down

0 comments on commit cb1956f

Please sign in to comment.