From 53c9831dea7345f0c03dd1b0405855ecdbe2bc6a Mon Sep 17 00:00:00 2001 From: Bruce Williams Date: Mon, 4 Dec 2017 17:13:43 -0800 Subject: [PATCH] Block Strings (#463) * Block strings lexer changes * Block string parser, without indentation handling * Handle block string processing, add tests * Working indentation rules * Add typespecs * Use `re:split/3` vs `string:split/3` to support Erlang/OTP 19. * Add `prefix` as a light OTP 19 replacement for OTP 20 `string:prefix/2`. * Use `Access.key/2` vs `Access.key/1` * Add CHANGELOG entry --- CHANGELOG.md | 4 + src/absinthe_lexer.xrl | 7 +- src/absinthe_parser.yrl | 220 +++++++++++++++--- .../phase/parse/block_strings_test.exs | 199 ++++++++++++++++ 4 files changed, 395 insertions(+), 35 deletions(-) create mode 100644 test/lib/absinthe/phase/parse/block_strings_test.exs diff --git a/CHANGELOG.md b/CHANGELOG.md index 218364f9ef..7c4e059852 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## v1.4.5 + +- Feature: Support for `"""`-quoted block strings, as defined in the GraphQL Specification (See facebook/graphql#397). + ## v1.4.4 - Bug Fix: fix where self referential interface type would cause infinite loop when introspecting. diff --git a/src/absinthe_lexer.xrl b/src/absinthe_lexer.xrl index 02eae4cd9c..04cb57977c 100644 --- a/src/absinthe_lexer.xrl +++ b/src/absinthe_lexer.xrl @@ -31,6 +31,11 @@ ExponentIndicator = [eE] ExponentPart = {ExponentIndicator}{Sign}?{Digit}+ FloatValue = {IntegerPart}{FractionalPart}|{IntegerPart}{ExponentPart}|{IntegerPart}{FractionalPart}{ExponentPart} +% Block String Value +EscapedBlockStringQuote = (\\""") +BlockStringCharacter = (\n|\t|\r|[^\x{0000}-\x{001F}]|{EscapedBlockStringQuote}) +BlockStringValue = """{BlockStringCharacter}*""" + % String Value HexDigit = [0-9A-Fa-f] EscapedUnicode = u{HexDigit}{HexDigit}{HexDigit}{HexDigit} @@ -41,7 +46,6 @@ StringValue = "{StringCharacter}*" % Boolean Value BooleanValue = true|false - % Reserved words ReservedWord = query|mutation|subscription|fragment|on|implements|interface|union|scalar|enum|input|extend|type|directive|ON|null|schema @@ -52,6 +56,7 @@ Rules. {ReservedWord} : {token, {list_to_atom(TokenChars), TokenLine}}. {IntValue} : {token, {int_value, TokenLine, TokenChars}}. {FloatValue} : {token, {float_value, TokenLine, TokenChars}}. +{BlockStringValue} : {token, {block_string_value, TokenLine, TokenChars}}. {StringValue} : {token, {string_value, TokenLine, TokenChars}}. {BooleanValue} : {token, {boolean_value, TokenLine, TokenChars}}. {Name} : {token, {name, TokenLine, TokenChars}}. diff --git a/src/absinthe_parser.yrl b/src/absinthe_parser.yrl index 773b8cbbf6..5f300e327d 100644 --- a/src/absinthe_parser.yrl +++ b/src/absinthe_parser.yrl @@ -19,7 +19,7 @@ Terminals '{' '}' '(' ')' '[' ']' '!' ':' '@' '$' '=' '|' '...' 'query' 'mutation' 'subscription' 'fragment' 'on' 'directive' 'type' 'implements' 'interface' 'union' 'scalar' 'enum' 'input' 'extend' 'schema' - name int_value float_value string_value boolean_value null. + name int_value float_value string_value block_string_value boolean_value null. Rootsymbol Document. @@ -141,6 +141,7 @@ Name -> 'on' : extract_binary('$1'). Value -> Variable : '$1'. Value -> int_value : build_ast_node('IntValue', #{'value' => extract_integer('$1')}, #{'start_line' => extract_line('$1')}). Value -> float_value : build_ast_node('FloatValue', #{'value' => extract_float('$1')}, #{'start_line' => extract_line('$1')}). +Value -> block_string_value : build_ast_node('StringValue', #{'value' => extract_quoted_block_string_token('$1')}, #{'start_line' => extract_line('$1')}). Value -> string_value : build_ast_node('StringValue', #{'value' => extract_quoted_string_token('$1')}, #{'start_line' => extract_line('$1')}). Value -> boolean_value : build_ast_node('BooleanValue', #{'value' => extract_boolean('$1')}, #{'start_line' => extract_line('$1')}). Value -> null : build_ast_node('NullValue', #{}, #{'start_line' => extract_line('$1')}). @@ -260,46 +261,197 @@ TypeExtensionDefinition -> 'extend' ObjectTypeDefinition : Erlang code. -extract_atom({Value, _Line}) -> Value. -extract_binary(Value) when is_binary(Value) -> Value; -extract_binary({Token, _Line}) -> list_to_binary(atom_to_list(Token)); -extract_binary({_Token, _Line, Value}) -> list_to_binary(Value). -extract_quoted_string_token({_Token, _Line, Value}) -> iolist_to_binary(unescape(lists:sublist(Value, 2, length(Value) - 2))). +% Line-Level Utilities -unescape(Escaped) -> unescape(Escaped, []). - -unescape([], Acc) -> lists:reverse(Acc); -unescape([$\\, $" | T], Acc) -> unescape(T, [$" | Acc]); -unescape([$\\, $\\ | T], Acc) -> unescape(T, [$\\ | Acc]); -unescape([$\\, $/ | T], Acc) -> unescape(T, [$/ | Acc]); -unescape([$\\, $b | T], Acc) -> unescape(T, [$\b | Acc]); -unescape([$\\, $f | T], Acc) -> unescape(T, [$\f | Acc]); -unescape([$\\, $n | T], Acc) -> unescape(T, [$\n | Acc]); -unescape([$\\, $r | T], Acc) -> unescape(T, [$\r | Acc]); -unescape([$\\, $t | T], Acc) -> unescape(T, [$\t | Acc]); -unescape([$\\, $u, A, B, C, D | T], Acc) -> unescape(T, [hexlist_to_utf8_binary([A, B, C, D]) | Acc]); -unescape([H | T], Acc) -> unescape(T, [H | Acc]). - -hexlist_to_utf8_binary(HexList) -> unicode:characters_to_binary([httpd_util:hexlist_to_integer(HexList)]). - -extract_integer({_Token, _Line, Value}) -> - {Int, []} = string:to_integer(Value), Int. -extract_float({_Token, _Line, Value}) -> - {Float, []} = string:to_float(Value), Float. -extract_boolean({_Token, _Line, "true"}) -> true; -extract_boolean({_Token, _Line, "false"}) -> false. -extract_line({_Token, Line}) -> Line; -extract_line({_Token, Line, _Value}) -> Line; -extract_line(_) -> nil. +extract_line({_Token, Line}) -> + Line; +extract_line({_Token, Line, _Value}) -> + Line; +extract_line(_) -> + nil. extract_child_line([Head|_]) -> - extract_child_line(Head); + extract_child_line(Head); extract_child_line(#{loc := #{'start_line' := Line}}) -> - Line; + Line; extract_child_line(_) -> - nil. + nil. + + +% Value-level Utilities + +extract_atom({Value, _Line}) -> + Value. + +extract_binary(Value) when is_binary(Value) -> + Value; + +extract_binary({Token, _Line}) -> + list_to_binary(atom_to_list(Token)); + +extract_binary({_Token, _Line, Value}) -> + list_to_binary(Value). + + +% AST Generation build_ast_node(Type, Node, #{'start_line' := nil}) -> build_ast_node(Type, Node, nil); build_ast_node(Type, Node, Loc) -> 'Elixir.Kernel':struct(list_to_atom("Elixir.Absinthe.Language." ++ atom_to_list(Type)), Node#{loc => Loc}). + + +% String + +extract_quoted_string_token({_Token, _Line, Value}) -> + iolist_to_binary(process_string(lists:sublist(Value, 2, length(Value) - 2))). + +process_string(Escaped) -> + process_string(Escaped, []). + +process_string([], Acc) -> + lists:reverse(Acc); +process_string([$\\, $" | T], Acc) -> + process_string(T, [$" | Acc]); +process_string([$\\, $\\ | T], Acc) -> + process_string(T, [$\\ | Acc]); +process_string([$\\, $/ | T], Acc) -> + process_string(T, [$/ | Acc]); +process_string([$\\, $b | T], Acc) -> + process_string(T, [$\b | Acc]); +process_string([$\\, $f | T], Acc) -> + process_string(T, [$\f | Acc]); +process_string([$\\, $n | T], Acc) -> + process_string(T, [$\n | Acc]); +process_string([$\\, $r | T], Acc) -> + process_string(T, [$\r | Acc]); +process_string([$\\, $t | T], Acc) -> + process_string(T, [$\t | Acc]); +process_string([$\\, $u, A, B, C, D | T], Acc) -> + process_string(T, [hexlist_to_utf8_binary([A, B, C, D]) | Acc]); +process_string([H | T], Acc) -> + process_string(T, [H | Acc]). + +hexlist_to_utf8_binary(HexList) -> + unicode:characters_to_binary([httpd_util:hexlist_to_integer(HexList)]). + + +% Block String + +extract_quoted_block_string_token({_Token, _Line, Value}) -> + iolist_to_binary(process_block_string(lists:sublist(Value, 4, length(Value) - 6))). + +-spec process_block_string(string()) -> string(). +process_block_string(Escaped) -> + process_block_string(Escaped, []). + +-spec process_block_string(string(), string()) -> string(). +process_block_string([], Acc) -> + block_string_value(lists:reverse(Acc)); +process_block_string([$\r, $\n | T], Acc) -> process_block_string(T, [$\n | Acc]); +process_block_string([$\\, $", $", $" | T], Acc) -> process_block_string(T, [$", $", $"] ++ Acc); +process_block_string([H | T], Acc) -> process_block_string(T, [H | Acc]). + +-spec block_string_value(string()) -> string(). +block_string_value(Value) -> + [FirstLine | Rest] = re:split(Value, "\n", [{return,list}]), + Prefix = indentation_prefix(common_indent(Rest)), + UnindentedLines = unindent(Rest, Prefix), + Lines = trim_blank_lines([FirstLine | UnindentedLines]), + string:join(Lines, "\n"). + +-spec trim_blank_lines([string()]) -> [string()]. +trim_blank_lines(Lines) -> + trim_blank_lines(trim_blank_lines(Lines, leading), trailing). + +-spec trim_blank_lines([string()], leading | trailing) -> [string()]. +trim_blank_lines(Lines, leading) -> + lists:dropwhile(fun is_blank/1, Lines); +trim_blank_lines(Lines, trailing) -> + lists:reverse(trim_blank_lines(lists:reverse(Lines), leading)). + +-spec indentation_prefix(non_neg_integer()) -> string(). +indentation_prefix(Indent) -> + lists:map(fun(_) -> 32 end, lists:seq(1, Indent)). + +-spec unindent([string()], string()) -> [string()]. +unindent(Lines, Prefix) -> + unindent(Lines, Prefix, []). + +-spec unindent([string()], string(), [string()]) -> [string()]. +unindent([], _Prefix, Result) -> + lists:reverse(Result); +unindent([H | T], Prefix, Result) -> + Processed = prefix(H, Prefix), + unindent(T, Prefix, [Processed | Result]). + +-spec prefix(string(), string()) -> string(). +prefix(Line, []) -> + Line; +prefix(Line, Prefix) -> + Prefixed = lists:prefix(Prefix, Line), + if + Prefixed -> + string:substr(Line, length(Prefix) + 1); + true -> + Line + end. + +-spec common_indent([string()]) -> non_neg_integer(). +common_indent(Lines) -> + case common_indent(Lines, noindent) of + noindent -> + 0; + Indent -> + Indent + end. + +-spec common_indent([string()], noindent | non_neg_integer()) -> noindent | non_neg_integer(). +common_indent([], Indent) -> + Indent; +common_indent([H | T], Indent) -> + CurrentIndent = leading_whitespace(H), + if + (CurrentIndent < length(H)) and ((Indent == noindent) or (CurrentIndent < Indent)) -> + common_indent(T, CurrentIndent); + true -> + common_indent(T, Indent) + end. + +-spec leading_whitespace(string()) -> non_neg_integer(). +leading_whitespace(BlockStringValue) -> + leading_whitespace(BlockStringValue, 0). + +-spec leading_whitespace(string(), non_neg_integer()) -> non_neg_integer(). +leading_whitespace([], N) -> + N; +leading_whitespace([32 | T], N) -> + leading_whitespace(T, N + 1); +leading_whitespace([$\t | T], N) -> + leading_whitespace(T, N + 1); +leading_whitespace([_H | _T], N) -> + N. + +-spec is_blank(string()) -> boolean(). +is_blank(BlockStringValue) -> + leading_whitespace(BlockStringValue) == length(BlockStringValue). + + +% Integer + +extract_integer({_Token, _Line, Value}) -> + {Int, []} = string:to_integer(Value), Int. + + +% Float + +extract_float({_Token, _Line, Value}) -> + {Float, []} = string:to_float(Value), Float. + + +% Boolean + +extract_boolean({_Token, _Line, "true"}) -> + true; +extract_boolean({_Token, _Line, "false"}) -> + false. diff --git a/test/lib/absinthe/phase/parse/block_strings_test.exs b/test/lib/absinthe/phase/parse/block_strings_test.exs new file mode 100644 index 0000000000..1b03545b97 --- /dev/null +++ b/test/lib/absinthe/phase/parse/block_strings_test.exs @@ -0,0 +1,199 @@ +defmodule Absinthe.Phase.Parse.BlockStringsTest do + use Absinthe.Case, async: true + + it "parses a query with a block string literal and no newlines" do + assert {:ok, result} = run(~S<{ post(title: "single", body: """text""") { name } }>) + assert "text" == extract_body(result) + end + + it "parses a query with a block string argument that contains a quote" do + assert {:ok, result} = run(~S<{ post(title: "single", body: """text "here""") { name } }>) + assert "text \"here" == extract_body(result) + end + + it "parses a query with a block string literal that contains various escapes" do + assert {:ok, result} = run(~s<{ post(title: "single", body: """unescaped \\n\\r\\b\\t\\f\\u1234""") { name } }>) + assert "unescaped \\n\\r\\b\\t\\f\\u1234" == extract_body(result) + end + + it "parses a query with a block string literal that contains various slashes" do + assert {:ok, result} = run(~s<{ post(title: "single", body: """slashes \\\\ \\/""") { name } }>) + assert "slashes \\\\ \\/" == extract_body(result) + end + + + @input [ + "", + " Hello,", + " World!", + "", + " Yours,", + " GraphQL." + ] + @result [ + "Hello,", + " World!", + "", + "Yours,", + " GraphQL." + ] + it "parses a query with a block string literal, removing uniform indentation from a string" do + assert {:ok, result} = run(~s<{ post(title: "single", body: """#{lines(@input)}""") { name } }>) + assert lines(@result) == extract_body(result) + end + + @input [ + "", + "", + " Hello,", + " World!", + "", + " Yours,", + " GraphQL.", + "", + "" + ] + @result [ + "Hello,", + " World!", + "", + "Yours,", + " GraphQL." + ] + it "parses a query with a block string literal, removing empty leading and trailing lines" do + assert {:ok, result} = run(~s<{ post(title: "single", body: """#{lines(@input)}""") { name } }>) + assert lines(@result) == extract_body(result) + end + + @input [ + " ", + " ", + " Hello,", + " World!", + "", + " Yours,", + " GraphQL.", + " ", + " " + ] + @result [ + "Hello,", + " World!", + "", + "Yours,", + " GraphQL." + ] + it "parses a query with a block string literal, removing blank leading and trailing lines" do + assert {:ok, result} = run(~s<{ post(title: "single", body: """#{lines(@input)}""") { name } }>) + assert lines(@result) == extract_body(result) + end + + @input [ + " Hello,", + " World!", + "", + " Yours,", + " GraphQL." + ] + @result [ + " Hello,", + " World!", + "", + "Yours,", + " GraphQL." + ] + it "parses a query with a block string literal, retaining indentation from first line" do + assert {:ok, result} = run(~s<{ post(title: "single", body: """#{lines(@input)}""") { name } }>) + assert lines(@result) == extract_body(result) + end + + @input [ + " ", + " Hello, ", + " World! ", + " ", + " Yours, ", + " GraphQL. ", + " " + ] + @result [ + "Hello, ", + " World! ", + " ", + "Yours, ", + " GraphQL. " + ] + it "parses a query with a block string literal, not altering trailing spaces" do + assert {:ok, result} = run(~s<{ post(title: "single", body: """#{lines(@input)}""") { name } }>) + assert lines(@result) == extract_body(result) + end + + it "parses a query with a block string literal and carriage returns, normalizing" do + assert {:ok, result} = run(~s<{ post(title: "single", body: """text\nline\r\nanother""") { name } }>) + assert "text\nline\nanother" == extract_body(result) + end + + it "parses a query with a block string literal with escaped triple quotes and no newlines" do + assert {:ok, result} = run(~S<{ post(title: "single", body: """text\""" """) { name } }>) + assert ~S == extract_body(result) + end + + it "returns an error for a bad byte" do + assert {:error, err} = run(~s<{ post(title: "single", body: """trying to escape a \u0000 byte""") { name } }>) + assert "syntax error" <> _ = extract_error_message(err) + end + + it "parses a query with a block string literal as a variable default" do + assert {:ok, result} = run(~S) + assert "text" == get_in(result, + [ + Access.key(:definitions, []), + Access.at(0), + Access.key(:variable_definitions, %{}), + Access.at(0), + Access.key(:default_value, %{}), + Access.key(:value, nil) + ] + ) + end + + + defp extract_error_message(err) do + get_in(err, + [ + Access.key(:execution, %{}), + Access.key(:validation_errors, []), + Access.at(0), + Access.key(:message, nil) + ] + ) + end + + defp extract_body(value) do + get_in(value, + [ + Access.key(:definitions), + Access.at(0), + Access.key(:selection_set), + Access.key(:selections), + Access.at(0), + Access.key(:arguments), + Access.at(1), + Access.key(:value), + Access.key(:value) + ] + ) + end + + def run(input) do + with {:ok, %{input: input}} <- Absinthe.Phase.Parse.run(input) do + {:ok, input} + end + end + + defp lines(input) do + input + |> Enum.join("\n") + end + +end