Skip to content

Commit

Permalink
fix(bigquery): correctly escape ASCII escape sequences in regex patterns
Browse files Browse the repository at this point in the history
As of the most recent fix for string literals, we were double-escaping
some characters in the BigQuery regex pattern arguments.
  • Loading branch information
tswast authored and cpcloud committed Aug 31, 2023
1 parent 402f5ca commit a455203
Showing 1 changed file with 3 additions and 8 deletions.
11 changes: 3 additions & 8 deletions ibis/backends/bigquery/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,20 +156,15 @@ def _string_find(translator, op):
)


def _translate_pattern(translator, op):
# add 'r' to string literals to indicate to BigQuery this is a raw string
return "r" * isinstance(op, ops.Literal) + translator.translate(op)


def _regex_search(translator, op):
arg = translator.translate(op.arg)
regex = _translate_pattern(translator, op.pattern)
regex = translator.translate(op.pattern)
return f"REGEXP_CONTAINS({arg}, {regex})"


def _regex_extract(translator, op):
arg = translator.translate(op.arg)
regex = _translate_pattern(translator, op.pattern)
regex = translator.translate(op.pattern)
index = translator.translate(op.index)
matches = f"REGEXP_CONTAINS({arg}, {regex})"
# non-greedily match the regex's prefix so the regex can match as much as possible
Expand All @@ -185,7 +180,7 @@ def _regex_extract(translator, op):

def _regex_replace(translator, op):
arg = translator.translate(op.arg)
regex = _translate_pattern(translator, op.pattern)
regex = translator.translate(op.pattern)
replacement = translator.translate(op.replacement)
return f"REGEXP_REPLACE({arg}, {regex}, {replacement})"

Expand Down

0 comments on commit a455203

Please sign in to comment.