Skip to content

Commit

Permalink
fix(bigquery,impala): escape all ASCII escape sequences in string lit…
Browse files Browse the repository at this point in the history
…erals

This also fixes Impala regex functions.
  • Loading branch information
tswast authored and cpcloud committed Aug 31, 2023
1 parent 507a00e commit 402f5ca
Show file tree
Hide file tree
Showing 11 changed files with 38 additions and 19 deletions.
17 changes: 16 additions & 1 deletion ibis/backends/base/sql/registry/literal.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,22 @@ def _boolean_literal_format(translator, op):


def _string_literal_format(translator, op):
return "'{}'".format(op.value.replace("'", "\\'"))
return "'{}'".format(
op.value
# Escape \ first so we don't double escape other characters.
.replace("\\", "\\\\")
# Escape ' since we're using those for the string literal.
.replace("'", "\\'")
# ASCII escape sequences that are recognized in Python:
# https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
.replace("\a", "\\a") # Bell
.replace("\b", "\\b") # Backspace
.replace("\f", "\\f") # Formfeed
.replace("\n", "\\n") # Newline / Linefeed
.replace("\r", "\\r") # Carriage return
.replace("\t", "\\t") # Tab
.replace("\v", "\\v") # Vertical tab
)


def _number_literal_format(translator, op):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SELECT 'a\ab\bc\fd\ne\rf\tg\vh' AS `'a_x07b_x08c_x0cd_ne_rf_tg_x0bh'`
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SELECT 'a\\b\\c' AS `'a_b_c'`
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SELECT 'a\'b"c' AS `'a_'b_c'`
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SELECT '`~!@#$%^&*()_=+-|[]{};:/?<>' AS `'_#_%_&__=+-|_:_<>'`
14 changes: 14 additions & 0 deletions ibis/backends/bigquery/tests/unit/test_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,20 @@ def test_integer_to_timestamp(case, unit, snapshot):
snapshot.assert_match(to_sql(expr), "out.sql")


@pytest.mark.parametrize(
("case",),
[
param("a\\b\\c", id="escape_backslash"),
param("a\ab\bc\fd\ne\rf\tg\vh", id="escape_ascii_sequences"),
param("a'b\"c", id="escape_quote"),
param("`~!@#$%^&*()_=+-|[]{};:/?<>", id="not_escape_special_characters"),
],
)
def test_literal_string(case, snapshot):
expr = ibis.literal(case)
snapshot.assert_match(to_sql(expr), "out.sql")


@pytest.mark.parametrize(
("case", "dtype"),
[
Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1 @@
regexp_extract(`string_col`, '[\d]+', 0)
regexp_extract(`string_col`, '[\\d]+', 0)
Original file line number Diff line number Diff line change
@@ -1 +1 @@
regexp_replace(`string_col`, '[\d]+', 'aaa')
regexp_replace(`string_col`, '[\\d]+', 'aaa')
Original file line number Diff line number Diff line change
@@ -1 +1 @@
regexp_like(`string_col`, '[\d]+')
regexp_like(`string_col`, '[\\d]+')
Original file line number Diff line number Diff line change
@@ -1 +1 @@
regexp_like(`string_col`, '[\d]+')
regexp_like(`string_col`, '[\\d]+')
14 changes: 0 additions & 14 deletions ibis/backends/tests/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,6 @@ def test_string_col_is_unicode(alltypes, df):
["mssql", "oracle"],
raises=com.OperationNotDefinedError,
),
pytest.mark.notimpl(["impala"], raises=AssertionError),
],
),
param(
Expand All @@ -211,7 +210,6 @@ def test_string_col_is_unicode(alltypes, df):
["mssql", "oracle"],
raises=com.OperationNotDefinedError,
),
pytest.mark.notimpl(["impala"], raises=AssertionError),
],
),
param(
Expand Down Expand Up @@ -240,7 +238,6 @@ def test_string_col_is_unicode(alltypes, df):
["mssql", "druid", "oracle"],
raises=com.OperationNotDefinedError,
),
pytest.mark.broken(["impala"], raises=AssertionError),
],
),
param(
Expand All @@ -252,7 +249,6 @@ def test_string_col_is_unicode(alltypes, df):
["mssql", "druid", "oracle"],
raises=com.OperationNotDefinedError,
),
pytest.mark.broken(["impala"], raises=AssertionError),
],
),
param(
Expand All @@ -276,7 +272,6 @@ def test_string_col_is_unicode(alltypes, df):
["mssql", "druid", "oracle"],
raises=com.OperationNotDefinedError,
),
pytest.mark.broken(["impala"], raises=AssertionError),
],
),
param(
Expand All @@ -290,7 +285,6 @@ def test_string_col_is_unicode(alltypes, df):
["mssql", "druid", "oracle"],
raises=com.OperationNotDefinedError,
),
pytest.mark.broken(["impala"], raises=AssertionError),
],
),
param(
Expand All @@ -304,7 +298,6 @@ def test_string_col_is_unicode(alltypes, df):
["mssql", "druid", "oracle"],
raises=com.OperationNotDefinedError,
),
pytest.mark.broken(["impala"], raises=AssertionError),
],
),
param(
Expand All @@ -318,7 +311,6 @@ def test_string_col_is_unicode(alltypes, df):
["mssql", "druid", "oracle"],
raises=com.OperationNotDefinedError,
),
pytest.mark.broken(["impala"], raises=AssertionError),
],
),
param(
Expand All @@ -330,7 +322,6 @@ def test_string_col_is_unicode(alltypes, df):
["mssql", "druid", "oracle"],
raises=com.OperationNotDefinedError,
),
pytest.mark.broken(["impala"], raises=AssertionError),
],
),
param(
Expand All @@ -342,7 +333,6 @@ def test_string_col_is_unicode(alltypes, df):
["mssql", "druid", "oracle"],
raises=com.OperationNotDefinedError,
),
pytest.mark.broken(["impala"], raises=AssertionError),
],
),
param(
Expand All @@ -369,10 +359,6 @@ def test_string_col_is_unicode(alltypes, df):
["mysql", "mssql", "druid", "oracle"],
raises=com.OperationNotDefinedError,
),
pytest.mark.broken(
["impala"],
raises=AssertionError,
),
],
),
param(
Expand Down

0 comments on commit 402f5ca

Please sign in to comment.