Skip to content

Commit

Permalink
Minor: Add tests for StringView / character functions
Browse files Browse the repository at this point in the history
  • Loading branch information
alamb committed Jul 31, 2024
1 parent fa50636 commit 45117f6
Showing 1 changed file with 364 additions and 0 deletions.
364 changes: 364 additions & 0 deletions datafusion/sqllogictest/test_files/string_view.slt
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,370 @@ logical_plan
03)----TableScan: test projection=[column1_utf8, column2_utf8, column1_utf8view]


# Ensure string functions use native StringView implementation
# and do not fall back to Utf8 or LargeUtf8
# Should see no casts to Utf8 in the plans below

## Ensure no casts for LIKE/ILIKE
query TT
EXPLAIN SELECT
column1_utf8view like 'foo' as "like",
column1_utf8view ilike 'foo' as "ilike"
FROM test;
----
logical_plan
01)Projection: test.column1_utf8view LIKE Utf8View("foo") AS like, test.column1_utf8view ILIKE Utf8View("foo") AS ilike
02)--TableScan: test projection=[column1_utf8view]



## Ensure no casts for ASCII
## TODO file ticket
query TT
EXPLAIN SELECT
ASCII(column1_utf8view) AS l
FROM test;
----
logical_plan
01)Projection: ascii(CAST(test.column1_utf8view AS Utf8)) AS l
02)--TableScan: test projection=[column1_utf8view]


## Ensure no casts for BTRIM
## TODO file ticket
query TT
EXPLAIN SELECT
BTRIM(column1_utf8view, 'foo') AS l
FROM test;
----
logical_plan
01)Projection: btrim(CAST(test.column1_utf8view AS Utf8), Utf8("foo")) AS l
02)--TableScan: test projection=[column1_utf8view]

## Ensure no casts for CHARACTER_LENGTH
## TODO file ticket
query TT
EXPLAIN SELECT
CHARACTER_LENGTH(column1_utf8view) AS l
FROM test;
----
logical_plan
01)Projection: character_length(CAST(test.column1_utf8view AS Utf8)) AS l
02)--TableScan: test projection=[column1_utf8view]

## Ensure no casts for CONCAT
## TODO file ticket
query TT
EXPLAIN SELECT
concat(column1_utf8view, column2_utf8view) as c
FROM test;
----
logical_plan
01)Projection: concat(CAST(test.column1_utf8view AS Utf8), CAST(test.column2_utf8view AS Utf8)) AS c
02)--TableScan: test projection=[column1_utf8view, column2_utf8view]

## Ensure no casts for CONCAT_WS
## TODO file ticket
query TT
EXPLAIN SELECT
concat_ws(', ', column1_utf8view, column2_utf8view) as c
FROM test;
----
logical_plan
01)Projection: concat_ws(Utf8(", "), CAST(test.column1_utf8view AS Utf8), CAST(test.column2_utf8view AS Utf8)) AS c
02)--TableScan: test projection=[column1_utf8view, column2_utf8view]

## Ensure no casts for CONTAINS
## TODO file ticket
query TT
EXPLAIN SELECT
CONTAINS(column1_utf8view, 'foo') as c1,
CONTAINS(column2_utf8view, column2_utf8view) as c2
FROM test;
----
logical_plan
01)Projection: contains(CAST(test.column1_utf8view AS Utf8), Utf8("foo")) AS c1, contains(__common_expr_1, __common_expr_1) AS c2
02)--Projection: CAST(test.column2_utf8view AS Utf8) AS __common_expr_1, test.column1_utf8view
03)----TableScan: test projection=[column1_utf8view, column2_utf8view]

## Ensure no casts for ENDS_WITH
## TODO file ticket
query TT
EXPLAIN SELECT
ENDS_WITH(column1_utf8view, 'foo') as c1,
ENDS_WITH(column2_utf8view, column2_utf8view) as c2
FROM test;
----
logical_plan
01)Projection: ends_with(CAST(test.column1_utf8view AS Utf8), Utf8("foo")) AS c1, ends_with(__common_expr_1, __common_expr_1) AS c2
02)--Projection: CAST(test.column2_utf8view AS Utf8) AS __common_expr_1, test.column1_utf8view
03)----TableScan: test projection=[column1_utf8view, column2_utf8view]


## Ensure no casts for INITCAP
## TODO file ticket
query TT
EXPLAIN SELECT
INITCAP(column1_utf8view) as c
FROM test;
----
logical_plan
01)Projection: initcap(CAST(test.column1_utf8view AS Utf8)) AS c
02)--TableScan: test projection=[column1_utf8view]

## Ensure no casts for LEVENSHTEIN
## TODO file ticket
query TT
EXPLAIN SELECT
levenshtein(column1_utf8view, 'foo') as c1,
levenshtein(column1_utf8view, column2_utf8view) as c2
FROM test;
----
logical_plan
01)Projection: levenshtein(__common_expr_1, Utf8("foo")) AS c1, levenshtein(__common_expr_1, CAST(test.column2_utf8view AS Utf8)) AS c2
02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1, test.column2_utf8view
03)----TableScan: test projection=[column1_utf8view, column2_utf8view]

## Ensure no casts for LOWER
## TODO file ticket
query TT
EXPLAIN SELECT
LOWER(column1_utf8view) as c1
FROM test;
----
logical_plan
01)Projection: lower(CAST(test.column1_utf8view AS Utf8)) AS c1
02)--TableScan: test projection=[column1_utf8view]

## Ensure no casts for LTRIM
## TODO file ticket
query TT
EXPLAIN SELECT
LTRIM(column1_utf8view) as c1
FROM test;
----
logical_plan
01)Projection: ltrim(CAST(test.column1_utf8view AS Utf8)) AS c1
02)--TableScan: test projection=[column1_utf8view]

## Ensure no casts for LPAD
## TODO file ticket
query TT
EXPLAIN SELECT
LPAD(column1_utf8view, 12, ' ') as c1
FROM test;
----
logical_plan
01)Projection: lpad(CAST(test.column1_utf8view AS Utf8), Int64(12), Utf8(" ")) AS c1
02)--TableScan: test projection=[column1_utf8view]


## Ensure no casts for OCTET_LENGTH
## TODO file ticket
query TT
EXPLAIN SELECT
OCTET_LENGTH(column1_utf8view) as c1
FROM test;
----
logical_plan
01)Projection: octet_length(CAST(test.column1_utf8view AS Utf8)) AS c1
02)--TableScan: test projection=[column1_utf8view]

## Ensure no casts for OVERLAY
## TODO file ticket
query TT
EXPLAIN SELECT
OVERLAY(column1_utf8view PLACING 'foo' FROM 2 ) as c1
FROM test;
----
logical_plan
01)Projection: overlay(CAST(test.column1_utf8view AS Utf8), Utf8("foo"), Int64(2)) AS c1
02)--TableScan: test projection=[column1_utf8view]

## Ensure no casts for REGEXP_LIKE
query error DataFusion error: Error during planning: The regexp_like function can only accept strings\. Got Utf8View
EXPLAIN SELECT
REGEXP_LIKE(column1_utf8view, '^https?://(?:www\.)?([^/]+)/.*$') AS k
FROM test;

## Ensure no casts for REGEXP_MATCH
query error DataFusion error: Error during planning: The regexp_match function can only accept strings\. Got Utf8View
EXPLAIN SELECT
REGEXP_MATCH(column1_utf8view, '^https?://(?:www\.)?([^/]+)/.*$') AS k
FROM test;

## Ensure no casts for REGEXP_REPLACE
query TT
EXPLAIN SELECT
REGEXP_REPLACE(column1_utf8view, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k
FROM test;
----
logical_plan
01)Projection: regexp_replace(test.column1_utf8view, Utf8("^https?://(?:www\.)?([^/]+)/.*$"), Utf8("\1")) AS k
02)--TableScan: test projection=[column1_utf8view]


## Ensure no casts for REPEAT
## TODO file ticket
query TT
EXPLAIN SELECT
REPEAT(column1_utf8view, 2) as c1
FROM test;
----
logical_plan
01)Projection: repeat(CAST(test.column1_utf8view AS Utf8), Int64(2)) AS c1
02)--TableScan: test projection=[column1_utf8view]

## Ensure no casts for REPLACE
## TODO file ticket
query TT
EXPLAIN SELECT
REPLACE(column1_utf8view, 'foo', 'bar') as c1,
REPLACE(column1_utf8view, column2_utf8view, 'bar') as c2
FROM test;
----
logical_plan
01)Projection: replace(__common_expr_1, Utf8("foo"), Utf8("bar")) AS c1, replace(__common_expr_1, CAST(test.column2_utf8view AS Utf8), Utf8("bar")) AS c2
02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1, test.column2_utf8view
03)----TableScan: test projection=[column1_utf8view, column2_utf8view]

## Ensure no casts for REVERSE
## TODO file ticket
query TT
EXPLAIN SELECT
REVERSE(column1_utf8view) as c1
FROM test;
----
logical_plan
01)Projection: reverse(CAST(test.column1_utf8view AS Utf8)) AS c1
02)--TableScan: test projection=[column1_utf8view]

## Ensure no casts for RTRIM
## TODO file ticket
query TT
EXPLAIN SELECT
RTRIM(column1_utf8view) as c1,
RTRIM(column1_utf8view, 'foo') as c2
FROM test;
----
logical_plan
01)Projection: rtrim(__common_expr_1) AS c1, rtrim(__common_expr_1, Utf8("foo")) AS c2
02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1
03)----TableScan: test projection=[column1_utf8view]

## Ensure no casts for RIGHT
## TODO file ticket
query TT
EXPLAIN SELECT
RIGHT(column1_utf8view, 3) as c2
FROM test;
----
logical_plan
01)Projection: right(CAST(test.column1_utf8view AS Utf8), Int64(3)) AS c2
02)--TableScan: test projection=[column1_utf8view]

## Ensure no casts for RPAD
## TODO file ticket
query TT
EXPLAIN SELECT
RPAD(column1_utf8view, 1) as c1,
RPAD(column1_utf8view, 2, column2_utf8view) as c2
FROM test;
----
logical_plan
01)Projection: rpad(__common_expr_1, Int64(1)) AS c1, rpad(__common_expr_1, Int64(2), CAST(test.column2_utf8view AS Utf8)) AS c2
02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1, test.column2_utf8view
03)----TableScan: test projection=[column1_utf8view, column2_utf8view]


## Ensure no casts for RTRIM
## TODO file ticket
query TT
EXPLAIN SELECT
RTRIM(column1_utf8view) as c,
RTRIM(column1_utf8view, column2_utf8view) as c1
FROM test;
----
logical_plan
01)Projection: rtrim(__common_expr_1) AS c, rtrim(__common_expr_1, CAST(test.column2_utf8view AS Utf8)) AS c1
02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1, test.column2_utf8view
03)----TableScan: test projection=[column1_utf8view, column2_utf8view]

## Ensure no casts for SPLIT_PART
## TODO file ticket
query TT
EXPLAIN SELECT
SPLIT_PART(column1_utf8view, 'f', 1) as c
FROM test;
----
logical_plan
01)Projection: split_part(CAST(test.column1_utf8view AS Utf8), Utf8("f"), Int64(1)) AS c
02)--TableScan: test projection=[column1_utf8view]

## Ensure no casts for STRPOS
## TODO file ticket
query TT
EXPLAIN SELECT
STRPOS(column1_utf8view, 'f') as c,
STRPOS(column1_utf8view, column2_utf8view) as c2
FROM test;
----
logical_plan
01)Projection: strpos(__common_expr_1, Utf8("f")) AS c, strpos(__common_expr_1, CAST(test.column2_utf8view AS Utf8)) AS c2
02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1, test.column2_utf8view
03)----TableScan: test projection=[column1_utf8view, column2_utf8view]

## Ensure no casts for SUBSTR
## TODO file ticket
query TT
EXPLAIN SELECT
SUBSTR(column1_utf8view, 1) as c,
SUBSTR(column1_utf8view, 1 ,2) as c2
FROM test;
----
logical_plan
01)Projection: substr(__common_expr_1, Int64(1)) AS c, substr(__common_expr_1, Int64(1), Int64(2)) AS c2
02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1
03)----TableScan: test projection=[column1_utf8view]

## Ensure no casts for STARTS_WITH
## TODO file ticket
query TT
EXPLAIN SELECT
STARTS_WITH(column1_utf8view, 'foo') as c,
STARTS_WITH(column1_utf8view, column2_utf8view) as c2
FROM test;
----
logical_plan
01)Projection: starts_with(__common_expr_1, Utf8("foo")) AS c, starts_with(__common_expr_1, CAST(test.column2_utf8view AS Utf8)) AS c2
02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1, test.column2_utf8view
03)----TableScan: test projection=[column1_utf8view, column2_utf8view]

## Ensure no casts for TRANSLATE
## TODO file ticket
query TT
EXPLAIN SELECT
TRANSLATE(column1_utf8view, 'foo', 'bar') as c
FROM test;
----
logical_plan
01)Projection: translate(CAST(test.column1_utf8view AS Utf8), Utf8("foo"), Utf8("bar")) AS c
02)--TableScan: test projection=[column1_utf8view]

## Ensure no casts for FIND_IN_SET
## TODO file ticket
query TT
EXPLAIN SELECT
FIND_IN_SET(column1_utf8view, 'a,b,c,d') as c
FROM test;
----
logical_plan
01)Projection: find_in_set(CAST(test.column1_utf8view AS Utf8), Utf8("a,b,c,d")) AS c
02)--TableScan: test projection=[column1_utf8view]




statement ok
drop table test;

Expand Down

0 comments on commit 45117f6

Please sign in to comment.