From b61a76f7ca03d136af803ec9f34bd8dbee1fb6ab Mon Sep 17 00:00:00 2001 From: Dehowe Feng Date: Wed, 13 Dec 2023 12:57:43 +0800 Subject: [PATCH 1/2] Add grammar rules for exists subquery Add grammar rule for exists subquery. EXISTS subquery can be used with UNION and does not have to feature a RETURN. Regression tests not yet added. --- src/backend/parser/cypher_gram.y | 88 ++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/src/backend/parser/cypher_gram.y b/src/backend/parser/cypher_gram.y index 673e06836..5b6d2283f 100644 --- a/src/backend/parser/cypher_gram.y +++ b/src/backend/parser/cypher_gram.y @@ -98,6 +98,9 @@ reading_clause_list updating_clause_list_0 updating_clause_list_1 %type reading_clause updating_clause +%type subquery_stmt subquery_stmt_no_return single_subquery + single_subquery_no_return subquery_part_init + /* RETURN and WITH clause */ %type return return_item sort_item skip_opt limit_opt with %type return_item_list order_by_opt sort_item_list @@ -156,6 +159,8 @@ %type expr_list expr_list_opt map_keyval_list_opt map_keyval_list %type property_value +%type expr_subquery + /* names */ %type property_key_name var_name var_name_opt label_name %type symbolic_name schema_name @@ -607,6 +612,53 @@ updating_clause: | merge ; +subquery_stmt: + single_subquery + { + $$ = $1; + } + | subquery_stmt UNION all_or_distinct subquery_stmt + { + $$ = list_make1(make_set_op(SETOP_UNION, $3, $1, $4)); + } + ; + +subquery_stmt_no_return: + single_subquery_no_return + { + $$ = $1; + } + | subquery_stmt_no_return UNION all_or_distinct subquery_stmt_no_return + { + $$ = list_make1(make_set_op(SETOP_UNION, $3, $1, $4)); + } + ; + +single_subquery: + subquery_part_init reading_clause_list return + { + $$ = list_concat($1, lappend($2, $3)); + } + ; + +single_subquery_no_return: + subquery_part_init reading_clause_list + { + $$ = list_concat($1, $2); + } + ; + +subquery_part_init: + /* empty */ + { + $$ = NIL; + } + | subquery_part_init reading_clause_list with + { + $$ = lappend(list_concat($1, $2), $3); + } + ; + cypher_varlen_opt: '*' cypher_range_opt { @@ -1806,6 +1858,41 @@ expr_func_subexpr: } ; +expr_subquery: + EXISTS '{' anonymous_path '}' + { + cypher_sub_pattern *sub; + SubLink *n; + + sub = make_ag_node(cypher_sub_pattern); + sub->kind = CSP_EXISTS; + sub->pattern = list_make1($3); + + n = makeNode(SubLink); + n->subLinkType = EXISTS_SUBLINK; + n->subLinkId = 0; + n->testexpr = NULL; + n->operName = NIL; + n->subselect = (Node *) sub; + n->location = @1; + $$ = (Node *)node_to_agtype((Node *)n, "boolean", @1); + } + | EXISTS '{' subquery_stmt '}' + { + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("EXISTS subquery not yet implemented"), + ag_scanner_errposition(@1, scanner))); + } + | EXISTS '{' subquery_stmt_no_return '}' + { + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("EXISTS subquery not yet implemented"), + ag_scanner_errposition(@1, scanner))); + } + ; + property_value: expr_var '.' property_key_name { @@ -1839,6 +1926,7 @@ expr_atom: | expr_case | expr_var | expr_func + | expr_subquery ; expr_literal: From d18655f2b8cefdfbbd6f9d88bbdae471931ee447 Mon Sep 17 00:00:00 2001 From: Dehowe Feng Date: Tue, 6 Feb 2024 18:03:38 +0800 Subject: [PATCH 2/2] Implement EXISTS Subquery Implements a naive version of EXISTS subquery that evaluates the underlying subquery. Add regression tests related to EXISTS TODO: Implement logic to allow the underlying subquery to constrain the queries in the outer scope. Also fixes a minor typo. --- Makefile | 1 + regress/expected/cypher_subquery.out | 360 +++++++++++++++++++++++++ regress/sql/cypher_subquery.sql | 201 ++++++++++++++ src/backend/nodes/ag_nodes.c | 2 + src/backend/nodes/cypher_outfuncs.c | 9 + src/backend/parser/cypher_analyze.c | 1 + src/backend/parser/cypher_clause.c | 85 +++++- src/backend/parser/cypher_gram.y | 83 ++++-- src/backend/parser/cypher_parse_node.c | 3 +- src/include/nodes/ag_nodes.h | 3 +- src/include/nodes/cypher_nodes.h | 9 +- src/include/nodes/cypher_outfuncs.h | 3 +- src/include/parser/cypher_parse_node.h | 1 + 13 files changed, 738 insertions(+), 23 deletions(-) create mode 100644 regress/expected/cypher_subquery.out create mode 100644 regress/sql/cypher_subquery.sql diff --git a/Makefile b/Makefile index 1a176af8e..ef97774bc 100644 --- a/Makefile +++ b/Makefile @@ -102,6 +102,7 @@ REGRESS = scan \ cypher_union \ cypher_call \ cypher_merge \ + cypher_subquery \ age_global_graph \ age_load \ index \ diff --git a/regress/expected/cypher_subquery.out b/regress/expected/cypher_subquery.out new file mode 100644 index 000000000..1b21319b3 --- /dev/null +++ b/regress/expected/cypher_subquery.out @@ -0,0 +1,360 @@ +LOAD 'age'; +SET search_path TO ag_catalog; +SELECT * FROM create_graph('exists_subquery'); +NOTICE: graph "exists_subquery" has been created + create_graph +-------------- + +(1 row) + +SELECT * FROM cypher('exists_subquery', $$ + CREATE (:person {name: "Briggite", age: 32})-[:knows]->(:person {name: "Takeshi", age: 28}), + (:person {name: "Faye", age: 25})-[:knows]->(:person {name: "Tony", age: 34})-[:loved]->(:person {name : "Valerie", age: 33}), + (:person {name: "Calvin", age: 6})-[:knows]->(:pet {name: "Hobbes"}), + (:person {name: "Charlie", age: 8})-[:knows]->(:pet {name : "Snoopy"}) + $$) AS (result agtype); + result +-------- +(0 rows) + +SELECT * FROM cypher('exists_subquery', $$ MATCH (a) RETURN (a) $$) AS (result agtype); + result +--------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "person", "properties": {"age": 32, "name": "Briggite"}}::vertex + {"id": 844424930131970, "label": "person", "properties": {"age": 28, "name": "Takeshi"}}::vertex + {"id": 844424930131971, "label": "person", "properties": {"age": 25, "name": "Faye"}}::vertex + {"id": 844424930131972, "label": "person", "properties": {"age": 34, "name": "Tony"}}::vertex + {"id": 844424930131973, "label": "person", "properties": {"age": 33, "name": "Valerie"}}::vertex + {"id": 844424930131974, "label": "person", "properties": {"age": 6, "name": "Calvin"}}::vertex + {"id": 844424930131975, "label": "person", "properties": {"age": 8, "name": "Charlie"}}::vertex + {"id": 1688849860263937, "label": "pet", "properties": {"name": "Hobbes"}}::vertex + {"id": 1688849860263938, "label": "pet", "properties": {"name": "Snoopy"}}::vertex +(9 rows) + +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + WHERE EXISTS {(a:person)-[]->(:pet)} + RETURN (a) $$) AS (result agtype); + result +------------------------------------------------------------------------------------------------- + {"id": 844424930131974, "label": "person", "properties": {"age": 6, "name": "Calvin"}}::vertex + {"id": 844424930131975, "label": "person", "properties": {"age": 8, "name": "Charlie"}}::vertex +(2 rows) + +--trying to use b when not defined, should fail +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + WHERE EXISTS {(a:person)-[]->(b:pet)} + RETURN (a) $$) AS (result agtype); +ERROR: variable `b` does not exist +LINE 2: WHERE EXISTS {(a:person)-[]->(b:pet)} + ^ +--query inside +--EXISTS subquery is currently implemented naively, without constraints in the +--subquery. the results of this regression test may change upon implementation +--TODO: implement inner subquery constraints +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + WHERE EXISTS {MATCH (a:person)-[]->(b:pet) RETURN b} + RETURN (a) $$) AS (result agtype); + result +--------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "person", "properties": {"age": 32, "name": "Briggite"}}::vertex + {"id": 844424930131970, "label": "person", "properties": {"age": 28, "name": "Takeshi"}}::vertex + {"id": 844424930131971, "label": "person", "properties": {"age": 25, "name": "Faye"}}::vertex + {"id": 844424930131972, "label": "person", "properties": {"age": 34, "name": "Tony"}}::vertex + {"id": 844424930131973, "label": "person", "properties": {"age": 33, "name": "Valerie"}}::vertex + {"id": 844424930131974, "label": "person", "properties": {"age": 6, "name": "Calvin"}}::vertex + {"id": 844424930131975, "label": "person", "properties": {"age": 8, "name": "Charlie"}}::vertex +(7 rows) + +--repeat variable in match +--EXISTS subquery is currently implemented naively, without constraints in the +--subquery. the results of this regression test may change upon implementation +--TODO: implement inner subquery constraints +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + WHERE EXISTS { + MATCH (a:person) + WHERE a.name = 'Takeshi' + RETURN a + } + RETURN (a) $$) AS (result agtype); + result +--------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "person", "properties": {"age": 32, "name": "Briggite"}}::vertex + {"id": 844424930131970, "label": "person", "properties": {"age": 28, "name": "Takeshi"}}::vertex + {"id": 844424930131971, "label": "person", "properties": {"age": 25, "name": "Faye"}}::vertex + {"id": 844424930131972, "label": "person", "properties": {"age": 34, "name": "Tony"}}::vertex + {"id": 844424930131973, "label": "person", "properties": {"age": 33, "name": "Valerie"}}::vertex + {"id": 844424930131974, "label": "person", "properties": {"age": 6, "name": "Calvin"}}::vertex + {"id": 844424930131975, "label": "person", "properties": {"age": 8, "name": "Charlie"}}::vertex +(7 rows) + +--query inside, with WHERE +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + WHERE EXISTS {MATCH (a:person)-[]->(b:pet) + WHERE b.name = 'Briggite' + RETURN b} + RETURN (a) $$) AS (result agtype); + result +-------- +(0 rows) + +--no return +--EXISTS subquery is currently implemented naively, without constraints in the +--subquery. the results of this regression test may change upon implementation +--TODO: implement inner subquery constraints +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + WHERE EXISTS {MATCH (a:person)-[]->(b:pet) + WHERE a.name = 'Calvin'} + RETURN (a) $$) AS (result agtype); + result +--------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "person", "properties": {"age": 32, "name": "Briggite"}}::vertex + {"id": 844424930131970, "label": "person", "properties": {"age": 28, "name": "Takeshi"}}::vertex + {"id": 844424930131971, "label": "person", "properties": {"age": 25, "name": "Faye"}}::vertex + {"id": 844424930131972, "label": "person", "properties": {"age": 34, "name": "Tony"}}::vertex + {"id": 844424930131973, "label": "person", "properties": {"age": 33, "name": "Valerie"}}::vertex + {"id": 844424930131974, "label": "person", "properties": {"age": 6, "name": "Calvin"}}::vertex + {"id": 844424930131975, "label": "person", "properties": {"age": 8, "name": "Charlie"}}::vertex +(7 rows) + +--union +--EXISTS subquery is currently implemented naively, without constraints in the +--subquery. the results of this regression test may change upon implementation +--TODO: implement inner subquery constraints +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + WHERE EXISTS { + MATCH (a:person)-[]->(b:pet) + WHERE b.name = 'Hobbes' + RETURN b + UNION + MATCH (c:person)-[]->(d:person) + RETURN c + } + RETURN (a) $$) AS (result agtype); + result +--------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "person", "properties": {"age": 32, "name": "Briggite"}}::vertex + {"id": 844424930131970, "label": "person", "properties": {"age": 28, "name": "Takeshi"}}::vertex + {"id": 844424930131971, "label": "person", "properties": {"age": 25, "name": "Faye"}}::vertex + {"id": 844424930131972, "label": "person", "properties": {"age": 34, "name": "Tony"}}::vertex + {"id": 844424930131973, "label": "person", "properties": {"age": 33, "name": "Valerie"}}::vertex + {"id": 844424930131974, "label": "person", "properties": {"age": 6, "name": "Calvin"}}::vertex + {"id": 844424930131975, "label": "person", "properties": {"age": 8, "name": "Charlie"}}::vertex +(7 rows) + +-- union, mismatched var, should fail +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + WHERE EXISTS { + MATCH (a:person)-[]->(b:pet) + WHERE b.name = 'Snoopy' + RETURN c + UNION + MATCH (c:person)-[]->(d:person) + RETURN c + } + RETURN (a) $$) AS (result agtype); +ERROR: could not find rte for c +LINE 5: RETURN c + ^ +--union, no returns +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + WHERE EXISTS { + MATCH (a:person)-[]->(b:pet) + WHERE a.name = 'Charlie' + UNION + MATCH (c:person)-[]->(d:person) + } + RETURN (a) $$) AS (result agtype); + result +--------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "person", "properties": {"age": 32, "name": "Briggite"}}::vertex + {"id": 844424930131970, "label": "person", "properties": {"age": 28, "name": "Takeshi"}}::vertex + {"id": 844424930131971, "label": "person", "properties": {"age": 25, "name": "Faye"}}::vertex + {"id": 844424930131972, "label": "person", "properties": {"age": 34, "name": "Tony"}}::vertex + {"id": 844424930131973, "label": "person", "properties": {"age": 33, "name": "Valerie"}}::vertex + {"id": 844424930131974, "label": "person", "properties": {"age": 6, "name": "Calvin"}}::vertex + {"id": 844424930131975, "label": "person", "properties": {"age": 8, "name": "Charlie"}}::vertex +(7 rows) + +--union, mismatched returns, should fail +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + WHERE EXISTS { + MATCH (a:person)-[]->(b:pet) + WHERE a.name = 'Faye' + RETURN a + UNION + MATCH (c:person)-[]->(d:person) + } + RETURN (a) $$) AS (result agtype); +ERROR: syntax error at or near "}" +LINE 8: } + ^ +--nesting +--EXISTS subquery is currently implemented naively, without constraints in the +--subquery. the results of this regression test may change upon implementation +--TODO: implement inner subquery constraints +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + WHERE EXISTS { + MATCH (b:person) + WHERE EXISTS { + MATCH (c:person) + WHERE c.name = 'Takeshi' + RETURN c + } + } + RETURN (a) $$) AS (result agtype); + result +--------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "person", "properties": {"age": 32, "name": "Briggite"}}::vertex + {"id": 844424930131970, "label": "person", "properties": {"age": 28, "name": "Takeshi"}}::vertex + {"id": 844424930131971, "label": "person", "properties": {"age": 25, "name": "Faye"}}::vertex + {"id": 844424930131972, "label": "person", "properties": {"age": 34, "name": "Tony"}}::vertex + {"id": 844424930131973, "label": "person", "properties": {"age": 33, "name": "Valerie"}}::vertex + {"id": 844424930131974, "label": "person", "properties": {"age": 6, "name": "Calvin"}}::vertex + {"id": 844424930131975, "label": "person", "properties": {"age": 8, "name": "Charlie"}}::vertex +(7 rows) + +--nesting, accessing var in outer scope +--EXISTS subquery is currently implemented naively, without constraints in the +--subquery. the results of this regression test may change upon implementation +--TODO: implement inner subquery constraints +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + WHERE EXISTS { + MATCH (b:person) + WHERE EXISTS { + MATCH (c:person) + WHERE b = c + RETURN c + } + } + RETURN (a) $$) AS (result agtype); + result +--------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "person", "properties": {"age": 32, "name": "Briggite"}}::vertex + {"id": 844424930131970, "label": "person", "properties": {"age": 28, "name": "Takeshi"}}::vertex + {"id": 844424930131971, "label": "person", "properties": {"age": 25, "name": "Faye"}}::vertex + {"id": 844424930131972, "label": "person", "properties": {"age": 34, "name": "Tony"}}::vertex + {"id": 844424930131973, "label": "person", "properties": {"age": 33, "name": "Valerie"}}::vertex + {"id": 844424930131974, "label": "person", "properties": {"age": 6, "name": "Calvin"}}::vertex + {"id": 844424930131975, "label": "person", "properties": {"age": 8, "name": "Charlie"}}::vertex +(7 rows) + +--nesting, accessing indirection in outer scope +--EXISTS subquery is currently implemented naively, without constraints in the +--subquery. the results of this regression test may change upon implementation +--TODO: implement inner subquery constraints +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + WHERE EXISTS { + MATCH (b:person) + WHERE EXISTS { + MATCH (c:person) + WHERE b.name = 'Takeshi' + RETURN c + } + } + RETURN (a) $$) AS (result agtype); + result +--------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "person", "properties": {"age": 32, "name": "Briggite"}}::vertex + {"id": 844424930131970, "label": "person", "properties": {"age": 28, "name": "Takeshi"}}::vertex + {"id": 844424930131971, "label": "person", "properties": {"age": 25, "name": "Faye"}}::vertex + {"id": 844424930131972, "label": "person", "properties": {"age": 34, "name": "Tony"}}::vertex + {"id": 844424930131973, "label": "person", "properties": {"age": 33, "name": "Valerie"}}::vertex + {"id": 844424930131974, "label": "person", "properties": {"age": 6, "name": "Calvin"}}::vertex + {"id": 844424930131975, "label": "person", "properties": {"age": 8, "name": "Charlie"}}::vertex +(7 rows) + +--nesting, accessing var 2+ levels up +--EXISTS subquery is currently implemented naively, without constraints in the +--subquery. the results of this regression test may change upon implementation +--TODO: implement inner subquery constraints +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + WHERE EXISTS { + MATCH (b:person) + WHERE EXISTS { + MATCH (c:person) + WHERE a.name = 'Takeshi' + RETURN c + } + } + RETURN (a) $$) AS (result agtype); + result +--------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "person", "properties": {"age": 32, "name": "Briggite"}}::vertex + {"id": 844424930131970, "label": "person", "properties": {"age": 28, "name": "Takeshi"}}::vertex + {"id": 844424930131971, "label": "person", "properties": {"age": 25, "name": "Faye"}}::vertex + {"id": 844424930131972, "label": "person", "properties": {"age": 34, "name": "Tony"}}::vertex + {"id": 844424930131973, "label": "person", "properties": {"age": 33, "name": "Valerie"}}::vertex + {"id": 844424930131974, "label": "person", "properties": {"age": 6, "name": "Calvin"}}::vertex + {"id": 844424930131975, "label": "person", "properties": {"age": 8, "name": "Charlie"}}::vertex +(7 rows) + +--nesting, accessing indirection 2+ levels up +--EXISTS subquery is currently implemented naively, without constraints in the +--subquery. the results of this regression test may change upon implementation +--TODO: implement inner subquery constraints +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + WHERE EXISTS { + MATCH (b:person) + WHERE EXISTS { + MATCH (c:person) + WHERE a = b + RETURN c + } + } + RETURN (a) $$) AS (result agtype); + result +--------------------------------------------------------------------------------------------------- + {"id": 844424930131969, "label": "person", "properties": {"age": 32, "name": "Briggite"}}::vertex + {"id": 844424930131970, "label": "person", "properties": {"age": 28, "name": "Takeshi"}}::vertex + {"id": 844424930131971, "label": "person", "properties": {"age": 25, "name": "Faye"}}::vertex + {"id": 844424930131972, "label": "person", "properties": {"age": 34, "name": "Tony"}}::vertex + {"id": 844424930131973, "label": "person", "properties": {"age": 33, "name": "Valerie"}}::vertex + {"id": 844424930131974, "label": "person", "properties": {"age": 6, "name": "Calvin"}}::vertex + {"id": 844424930131975, "label": "person", "properties": {"age": 8, "name": "Charlie"}}::vertex +(7 rows) + +--EXISTS outside of WHERE +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + RETURN a, EXISTS {(a:person)-[]->(:pet)} + $$) AS (a agtype, exists agtype); + a | exists +---------------------------------------------------------------------------------------------------+-------- + {"id": 844424930131969, "label": "person", "properties": {"age": 32, "name": "Briggite"}}::vertex | false + {"id": 844424930131970, "label": "person", "properties": {"age": 28, "name": "Takeshi"}}::vertex | false + {"id": 844424930131971, "label": "person", "properties": {"age": 25, "name": "Faye"}}::vertex | false + {"id": 844424930131972, "label": "person", "properties": {"age": 34, "name": "Tony"}}::vertex | false + {"id": 844424930131973, "label": "person", "properties": {"age": 33, "name": "Valerie"}}::vertex | false + {"id": 844424930131974, "label": "person", "properties": {"age": 6, "name": "Calvin"}}::vertex | true + {"id": 844424930131975, "label": "person", "properties": {"age": 8, "name": "Charlie"}}::vertex | true +(7 rows) + +--Var doesnt exist in outside scope, should fail +SELECT * FROM cypher('exists_subquery', $$ RETURN 1, + EXISTS { + MATCH (b:person)-[]->(:pet) + RETURN a + } + $$) AS (a agtype, exists agtype); +ERROR: could not find rte for a +LINE 4: RETURN a + ^ +-- +-- Cleanup +-- +SELECT * FROM drop_graph('exists_subquery', true); +NOTICE: drop cascades to 6 other objects +DETAIL: drop cascades to table exists_subquery._ag_label_vertex +drop cascades to table exists_subquery._ag_label_edge +drop cascades to table exists_subquery.person +drop cascades to table exists_subquery.knows +drop cascades to table exists_subquery.loved +drop cascades to table exists_subquery.pet +NOTICE: graph "exists_subquery" has been dropped + drop_graph +------------ + +(1 row) + +-- +-- End of tests +-- diff --git a/regress/sql/cypher_subquery.sql b/regress/sql/cypher_subquery.sql new file mode 100644 index 000000000..efc90295d --- /dev/null +++ b/regress/sql/cypher_subquery.sql @@ -0,0 +1,201 @@ +LOAD 'age'; +SET search_path TO ag_catalog; + +SELECT * FROM create_graph('exists_subquery'); + +SELECT * FROM cypher('exists_subquery', $$ + CREATE (:person {name: "Briggite", age: 32})-[:knows]->(:person {name: "Takeshi", age: 28}), + (:person {name: "Faye", age: 25})-[:knows]->(:person {name: "Tony", age: 34})-[:loved]->(:person {name : "Valerie", age: 33}), + (:person {name: "Calvin", age: 6})-[:knows]->(:pet {name: "Hobbes"}), + (:person {name: "Charlie", age: 8})-[:knows]->(:pet {name : "Snoopy"}) + $$) AS (result agtype); + +SELECT * FROM cypher('exists_subquery', $$ MATCH (a) RETURN (a) $$) AS (result agtype); + +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + WHERE EXISTS {(a:person)-[]->(:pet)} + RETURN (a) $$) AS (result agtype); +--trying to use b when not defined, should fail +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + WHERE EXISTS {(a:person)-[]->(b:pet)} + RETURN (a) $$) AS (result agtype); +--query inside +--EXISTS subquery is currently implemented naively, without constraints in the +--subquery. the results of this regression test may change upon implementation +--TODO: implement inner subquery constraints +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + WHERE EXISTS {MATCH (a:person)-[]->(b:pet) RETURN b} + RETURN (a) $$) AS (result agtype); + +--repeat variable in match +--EXISTS subquery is currently implemented naively, without constraints in the +--subquery. the results of this regression test may change upon implementation +--TODO: implement inner subquery constraints +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + WHERE EXISTS { + MATCH (a:person) + WHERE a.name = 'Takeshi' + RETURN a + } + RETURN (a) $$) AS (result agtype); +--query inside, with WHERE +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + WHERE EXISTS {MATCH (a:person)-[]->(b:pet) + WHERE b.name = 'Briggite' + RETURN b} + RETURN (a) $$) AS (result agtype); + + +--no return +--EXISTS subquery is currently implemented naively, without constraints in the +--subquery. the results of this regression test may change upon implementation +--TODO: implement inner subquery constraints +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + WHERE EXISTS {MATCH (a:person)-[]->(b:pet) + WHERE a.name = 'Calvin'} + RETURN (a) $$) AS (result agtype); + +--union +--EXISTS subquery is currently implemented naively, without constraints in the +--subquery. the results of this regression test may change upon implementation +--TODO: implement inner subquery constraints +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + WHERE EXISTS { + MATCH (a:person)-[]->(b:pet) + WHERE b.name = 'Hobbes' + RETURN b + UNION + MATCH (c:person)-[]->(d:person) + RETURN c + } + RETURN (a) $$) AS (result agtype); + +-- union, mismatched var, should fail +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + WHERE EXISTS { + MATCH (a:person)-[]->(b:pet) + WHERE b.name = 'Snoopy' + RETURN c + UNION + MATCH (c:person)-[]->(d:person) + RETURN c + } + RETURN (a) $$) AS (result agtype); + +--union, no returns +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + WHERE EXISTS { + MATCH (a:person)-[]->(b:pet) + WHERE a.name = 'Charlie' + UNION + MATCH (c:person)-[]->(d:person) + } + RETURN (a) $$) AS (result agtype); + +--union, mismatched returns, should fail +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + WHERE EXISTS { + MATCH (a:person)-[]->(b:pet) + WHERE a.name = 'Faye' + RETURN a + UNION + MATCH (c:person)-[]->(d:person) + } + RETURN (a) $$) AS (result agtype); + +--nesting +--EXISTS subquery is currently implemented naively, without constraints in the +--subquery. the results of this regression test may change upon implementation +--TODO: implement inner subquery constraints +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + WHERE EXISTS { + MATCH (b:person) + WHERE EXISTS { + MATCH (c:person) + WHERE c.name = 'Takeshi' + RETURN c + } + } + RETURN (a) $$) AS (result agtype); + +--nesting, accessing var in outer scope +--EXISTS subquery is currently implemented naively, without constraints in the +--subquery. the results of this regression test may change upon implementation +--TODO: implement inner subquery constraints +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + WHERE EXISTS { + MATCH (b:person) + WHERE EXISTS { + MATCH (c:person) + WHERE b = c + RETURN c + } + } + RETURN (a) $$) AS (result agtype); + +--nesting, accessing indirection in outer scope +--EXISTS subquery is currently implemented naively, without constraints in the +--subquery. the results of this regression test may change upon implementation +--TODO: implement inner subquery constraints +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + WHERE EXISTS { + MATCH (b:person) + WHERE EXISTS { + MATCH (c:person) + WHERE b.name = 'Takeshi' + RETURN c + } + } + RETURN (a) $$) AS (result agtype); + +--nesting, accessing var 2+ levels up +--EXISTS subquery is currently implemented naively, without constraints in the +--subquery. the results of this regression test may change upon implementation +--TODO: implement inner subquery constraints +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + WHERE EXISTS { + MATCH (b:person) + WHERE EXISTS { + MATCH (c:person) + WHERE a.name = 'Takeshi' + RETURN c + } + } + RETURN (a) $$) AS (result agtype); + +--nesting, accessing indirection 2+ levels up +--EXISTS subquery is currently implemented naively, without constraints in the +--subquery. the results of this regression test may change upon implementation +--TODO: implement inner subquery constraints +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + WHERE EXISTS { + MATCH (b:person) + WHERE EXISTS { + MATCH (c:person) + WHERE a = b + RETURN c + } + } + RETURN (a) $$) AS (result agtype); + +--EXISTS outside of WHERE +SELECT * FROM cypher('exists_subquery', $$ MATCH (a:person) + RETURN a, EXISTS {(a:person)-[]->(:pet)} + $$) AS (a agtype, exists agtype); + +--Var doesnt exist in outside scope, should fail +SELECT * FROM cypher('exists_subquery', $$ RETURN 1, + EXISTS { + MATCH (b:person)-[]->(:pet) + RETURN a + } + $$) AS (a agtype, exists agtype); + +-- +-- Cleanup +-- +SELECT * FROM drop_graph('exists_subquery', true); + +-- +-- End of tests +-- diff --git a/src/backend/nodes/ag_nodes.c b/src/backend/nodes/ag_nodes.c index 73f6154ae..a41cdd0ba 100644 --- a/src/backend/nodes/ag_nodes.c +++ b/src/backend/nodes/ag_nodes.c @@ -52,6 +52,7 @@ const char *node_names[] = { "cypher_typecast", "cypher_integer_const", "cypher_sub_pattern", + "cypher_sub_query", "cypher_call", "cypher_create_target_nodes", "cypher_create_path", @@ -117,6 +118,7 @@ const ExtensibleNodeMethods node_methods[] = { DEFINE_NODE_METHODS(cypher_typecast), DEFINE_NODE_METHODS(cypher_integer_const), DEFINE_NODE_METHODS(cypher_sub_pattern), + DEFINE_NODE_METHODS(cypher_sub_query), DEFINE_NODE_METHODS(cypher_call), DEFINE_NODE_METHODS_EXTENDED(cypher_create_target_nodes), DEFINE_NODE_METHODS_EXTENDED(cypher_create_path), diff --git a/src/backend/nodes/cypher_outfuncs.c b/src/backend/nodes/cypher_outfuncs.c index a7547286c..2904503f4 100644 --- a/src/backend/nodes/cypher_outfuncs.c +++ b/src/backend/nodes/cypher_outfuncs.c @@ -321,6 +321,15 @@ void out_cypher_sub_pattern(StringInfo str, const ExtensibleNode *node) WRITE_NODE_FIELD(pattern); } +// serialization function for the cypher_sub_pattern ExtensibleNode. +void out_cypher_sub_query(StringInfo str, const ExtensibleNode *node) +{ + DEFINE_AG_NODE(cypher_sub_query); + + WRITE_ENUM_FIELD(kind, csp_kind); + WRITE_NODE_FIELD(query); +} + // serialization function for the cypher_call ExtensibleNode. void out_cypher_call(StringInfo str, const ExtensibleNode *node) { diff --git a/src/backend/parser/cypher_analyze.c b/src/backend/parser/cypher_analyze.c index 4882b2881..d69cc0558 100644 --- a/src/backend/parser/cypher_analyze.c +++ b/src/backend/parser/cypher_analyze.c @@ -711,6 +711,7 @@ static Query *analyze_cypher(List *stmt, ParseState *parent_pstate, cpstate->params = params; cpstate->default_alias_num = 0; cpstate->entities = NIL; + cpstate->subquery_where_flag = false; /* * install error context callback to adjust an error position since * locations in stmt are 0 based diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index 6798f3d2a..a6f5b67b3 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -208,6 +208,8 @@ static TargetEntry *placeholder_target_entry(cypher_parsestate *cpstate, char *name); static Query *transform_cypher_sub_pattern(cypher_parsestate *cpstate, cypher_clause *clause); +static Query *transform_cypher_sub_query(cypher_parsestate *cpstate, + cypher_clause *clause); // set and remove clause static Query *transform_cypher_set(cypher_parsestate *cpstate, cypher_clause *clause); @@ -224,6 +226,8 @@ static List *transform_cypher_delete_item_list(cypher_parsestate *cpstate, List *delete_item_list, Query *query); //set operators +static cypher_clause *make_cypher_clause(List *stmt); + static Query *transform_cypher_union(cypher_parsestate *cpstate, cypher_clause *clause); @@ -389,6 +393,10 @@ Query *transform_cypher_clause(cypher_parsestate *cpstate, { result = transform_cypher_sub_pattern(cpstate, clause); } + else if (is_ag_node(self, cypher_sub_query)) + { + result = transform_cypher_sub_query(cpstate, clause); + } else if (is_ag_node(self, cypher_unwind)) { cypher_unwind *n = (cypher_unwind *) self; @@ -2846,6 +2854,55 @@ static Query *transform_cypher_sub_pattern(cypher_parsestate *cpstate, return qry; } +static Query *transform_cypher_sub_query(cypher_parsestate *cpstate, + cypher_clause *clause) +{ + cypher_clause *c; + Query *qry; + ParseState *pstate =(ParseState *)cpstate; + cypher_sub_query *sub_query = (cypher_sub_query*)clause->self; + ParseNamespaceItem *pnsi; + cypher_parsestate *child_parse_state = make_cypher_parsestate(cpstate); + ParseState *p_child_parse_state = (ParseState *) child_parse_state; + p_child_parse_state->p_expr_kind = pstate->p_expr_kind; + + c = make_cypher_clause((List *)sub_query->query); + + qry = makeNode(Query); + qry->commandType = CMD_SELECT; + + child_parse_state->subquery_where_flag = true; + + pnsi = transform_cypher_clause_as_subquery(child_parse_state, + transform_cypher_clause, + c, + NULL, true); + + qry->targetList = makeTargetListFromPNSItem(p_child_parse_state, pnsi); + + markTargetListOrigins(p_child_parse_state, qry->targetList); + + qry->rtable = p_child_parse_state->p_rtable; + qry->rteperminfos = p_child_parse_state->p_rteperminfos; + qry->jointree = makeFromExpr(p_child_parse_state->p_joinlist, NULL); + + /* the state will be destroyed so copy the data we need */ + qry->hasSubLinks = p_child_parse_state->p_hasSubLinks; + qry->hasTargetSRFs = p_child_parse_state->p_hasTargetSRFs; + qry->hasAggs = p_child_parse_state->p_hasAggs; + + if (qry->hasAggs) + { + parse_check_aggregates(p_child_parse_state, qry); + } + + assign_query_collations(p_child_parse_state, qry); + + free_cypher_parsestate(child_parse_state); + + return qry; +} + /* * Code borrowed and inspired by PG's transformFromClauseItem. This function * will transform the VLE function, depending on type. Currently, only @@ -4983,8 +5040,20 @@ static Expr *transform_cypher_edge(cypher_parsestate *cpstate, /* * If we are in a WHERE clause transform, we don't want to create new * variables, we want to use the existing ones. So, error if otherwise. + * If we are in a subquery transform, we are allowed to create new variables + * in the match, and all variables outside are visible to + * the subquery. Since there is no existing SQL logic that allows + * subqueries to alter variables of outer queries, we bypass this + * logic we would normally use to process WHERE clauses. + * + * Currently, the EXISTS subquery logic is naive. It returns a boolean + * result on the outer queries, but does not restrict the results set. + * + * TODO: Implement logic to alter outer scope results. + * */ - if (pstate->p_expr_kind == EXPR_KIND_WHERE) + if (pstate->p_expr_kind == EXPR_KIND_WHERE && + cpstate->subquery_where_flag == false) { cypher_parsestate *parent_cpstate = (cypher_parsestate *)pstate->parentParseState->parentParseState; @@ -5243,8 +5312,20 @@ static Expr *transform_cypher_node(cypher_parsestate *cpstate, /* * If we are in a WHERE clause transform, we don't want to create new * variables, we want to use the existing ones. So, error if otherwise. + * If we are in a subquery transform, we are allowed to create new variables + * in the match, and all variables outside are visible to + * the subquery. Since there is no existing SQL logic that allows + * subqueries to alter variables of outer queries, we bypass this + * logic we would normally use to process WHERE clauses. + * + * Currently, the EXISTS subquery logic is naive. It returns a boolean + * result on the outer queries, but does not restrict the results set. + * + * TODO: Implement logic to alter outer scope results. + * */ - if (pstate->p_expr_kind == EXPR_KIND_WHERE) + if (pstate->p_expr_kind == EXPR_KIND_WHERE && + cpstate->subquery_where_flag == false) { cypher_parsestate *parent_cpstate = (cypher_parsestate *)pstate->parentParseState->parentParseState; diff --git a/src/backend/parser/cypher_gram.y b/src/backend/parser/cypher_gram.y index 5b6d2283f..c3fcd7899 100644 --- a/src/backend/parser/cypher_gram.y +++ b/src/backend/parser/cypher_gram.y @@ -98,8 +98,8 @@ reading_clause_list updating_clause_list_0 updating_clause_list_1 %type reading_clause updating_clause -%type subquery_stmt subquery_stmt_no_return single_subquery - single_subquery_no_return subquery_part_init +%type subquery_stmt subquery_stmt_with_return subquery_stmt_no_return + single_subquery single_subquery_no_return subquery_part_init /* RETURN and WITH clause */ %type return return_item sort_item skip_opt limit_opt with @@ -613,11 +613,22 @@ updating_clause: ; subquery_stmt: + subquery_stmt_with_return + { + $$ = $1; + } + | subquery_stmt_no_return + { + $$ = $1; + } + ; + +subquery_stmt_with_return: single_subquery { $$ = $1; } - | subquery_stmt UNION all_or_distinct subquery_stmt + | subquery_stmt_with_return UNION all_or_distinct subquery_stmt_with_return { $$ = list_make1(make_set_op(SETOP_UNION, $3, $1, $4)); } @@ -635,16 +646,42 @@ subquery_stmt_no_return: ; single_subquery: - subquery_part_init reading_clause_list return + subquery_part_init reading_clause_list return { $$ = list_concat($1, lappend($2, $3)); } ; single_subquery_no_return: - subquery_part_init reading_clause_list + subquery_part_init reading_clause_list { - $$ = list_concat($1, $2); + ColumnRef *cr; + ResTarget *rt; + cypher_return *n; + + /* + * since subqueries allow return-less clauses, we add a + * return node manually to reflect that syntax + */ + cr = makeNode(ColumnRef); + cr->fields = list_make1(makeNode(A_Star)); + cr->location = @1; + + rt = makeNode(ResTarget); + rt->name = NULL; + rt->indirection = NIL; + rt->val = (Node *)cr; + rt->location = @1; + + n = make_ag_node(cypher_return); + n->distinct = false; + n->items = list_make1((Node *)rt); + n->order_by = NULL; + n->skip = NULL; + n->limit = NULL; + + $$ = list_concat($1, lappend($2, n)); + } ; @@ -1861,6 +1898,13 @@ expr_func_subexpr: expr_subquery: EXISTS '{' anonymous_path '}' { + /* + * EXISTS subquery with an anonymous path is almost + * the same as a EXISTS sub pattern, so we reuse that + * logic here to simplify more complex subquery transformations. + * TODO: Add WHERE clause support for anonymous paths in functions. + */ + cypher_sub_pattern *sub; SubLink *n; @@ -1879,17 +1923,22 @@ expr_subquery: } | EXISTS '{' subquery_stmt '}' { - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("EXISTS subquery not yet implemented"), - ag_scanner_errposition(@1, scanner))); - } - | EXISTS '{' subquery_stmt_no_return '}' - { - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("EXISTS subquery not yet implemented"), - ag_scanner_errposition(@1, scanner))); + cypher_sub_query *sub; + SubLink *n; + + sub = make_ag_node(cypher_sub_query); + sub->kind = CSP_EXISTS; + sub->query = $3; + + n = makeNode(SubLink); + + n->subLinkType = EXISTS_SUBLINK; + n->subLinkId = 0; + n->testexpr = NULL; + n->operName = NIL; + n->subselect = (Node *) sub; + n->location = @1; + $$ = (Node *)node_to_agtype((Node *)n, "boolean", @1); } ; diff --git a/src/backend/parser/cypher_parse_node.c b/src/backend/parser/cypher_parse_node.c index 09e6ad4dc..2e5378b77 100644 --- a/src/backend/parser/cypher_parse_node.c +++ b/src/backend/parser/cypher_parse_node.c @@ -59,6 +59,7 @@ cypher_parsestate *make_cypher_parsestate(cypher_parsestate *parent_cpstate) cpstate->graph_name = parent_cpstate->graph_name; cpstate->graph_oid = parent_cpstate->graph_oid; cpstate->params = parent_cpstate->params; + cpstate->subquery_where_flag = parent_cpstate->subquery_where_flag; } return cpstate; @@ -105,7 +106,7 @@ static void errpos_ecb(void *arg) } /* - * Generates a default alias name for when a query needs on and the parse + * Generates a default alias name for when a query needs one and the parse * state does not provide one. */ char *get_next_default_alias(cypher_parsestate *cpstate) diff --git a/src/include/nodes/ag_nodes.h b/src/include/nodes/ag_nodes.h index 598871899..23d683936 100644 --- a/src/include/nodes/ag_nodes.h +++ b/src/include/nodes/ag_nodes.h @@ -57,8 +57,9 @@ typedef enum ag_node_tag cypher_typecast_t, // integer constant cypher_integer_const_t, - // sub patterns + // sub patterns/queries cypher_sub_pattern_t, + cypher_sub_query_t, // procedure calls cypher_call_t, // create data structures diff --git a/src/include/nodes/cypher_nodes.h b/src/include/nodes/cypher_nodes.h index bdb21f227..1af2390fd 100644 --- a/src/include/nodes/cypher_nodes.h +++ b/src/include/nodes/cypher_nodes.h @@ -22,7 +22,7 @@ #include "nodes/ag_nodes.h" -/* cypher sub patterns */ +/* cypher sub patterns/queries */ typedef enum csp_kind { CSP_EXISTS, @@ -37,6 +37,13 @@ typedef struct cypher_sub_pattern List *pattern; } cypher_sub_pattern; +typedef struct cypher_sub_query +{ + ExtensibleNode extensible; + csp_kind kind; + List *query; +} cypher_sub_query; + /* * clauses */ diff --git a/src/include/nodes/cypher_outfuncs.h b/src/include/nodes/cypher_outfuncs.h index 6be3c01c7..3963be0d0 100644 --- a/src/include/nodes/cypher_outfuncs.h +++ b/src/include/nodes/cypher_outfuncs.h @@ -61,8 +61,9 @@ void out_cypher_typecast(StringInfo str, const ExtensibleNode *node); // integer constant void out_cypher_integer_const(StringInfo str, const ExtensibleNode *node); -// sub pattern +// sub patterns/queries void out_cypher_sub_pattern(StringInfo str, const ExtensibleNode *node); +void out_cypher_sub_query(StringInfo str, const ExtensibleNode *node); // procedure call diff --git a/src/include/parser/cypher_parse_node.h b/src/include/parser/cypher_parse_node.h index 9f81b2d4c..75dcc1c01 100644 --- a/src/include/parser/cypher_parse_node.h +++ b/src/include/parser/cypher_parse_node.h @@ -40,6 +40,7 @@ typedef struct cypher_parsestate int default_alias_num; List *entities; List *property_constraint_quals; + bool subquery_where_flag; // flag for knowing we are in a subquery where /* * To flag when an aggregate has been found in an expression during an * expression transform. This is used during the return_item list transform