Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for block strings to language package #261

Merged
merged 3 commits into from
Dec 18, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 146 additions & 4 deletions language/lexer/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package lexer
import (
"bytes"
"fmt"
"regexp"
"strings"
"unicode/utf8"

"github.com/graphql-go/graphql/gqlerrors"
Expand All @@ -28,6 +30,7 @@ const (
INT
FLOAT
STRING
BLOCK_STRING
)

var TokenKind map[int]int
Expand All @@ -54,6 +57,7 @@ func init() {
TokenKind[INT] = INT
TokenKind[FLOAT] = FLOAT
TokenKind[STRING] = STRING
TokenKind[BLOCK_STRING] = BLOCK_STRING
tokenDescription[TokenKind[EOF]] = "EOF"
tokenDescription[TokenKind[BANG]] = "!"
tokenDescription[TokenKind[DOLLAR]] = "$"
Expand All @@ -72,6 +76,7 @@ func init() {
tokenDescription[TokenKind[INT]] = "Int"
tokenDescription[TokenKind[FLOAT]] = "Float"
tokenDescription[TokenKind[STRING]] = "String"
tokenDescription[TokenKind[BLOCK_STRING]] = "BlockString"
}

// Token is a representation of a lexed Token. Value only appears for non-punctuation
Expand Down Expand Up @@ -303,6 +308,138 @@ func readString(s *source.Source, start int) (Token, error) {
return makeToken(TokenKind[STRING], start, position+1, value), nil
}

// readBlockString reads a block string token from the source file.
//
// """("?"?(\\"""|\\(?!=""")|[^"\\]))*"""
func readBlockString(s *source.Source, start int) (Token, error) {
body := s.Body
position := start + 3
runePosition := start + 3
chunkStart := position
var valueBuffer bytes.Buffer

for {
// Stop if we've reached the end of the buffer
if position >= len(body) {
break
}

code, n := runeAt(body, position)

// Closing Triple-Quote (""")
if code == '"' {
x, _ := runeAt(body, position+1)
y, _ := runeAt(body, position+2)
if x == '"' && y == '"' {
stringContent := body[chunkStart:position]
valueBuffer.Write(stringContent)
value := blockStringValue(valueBuffer.String())
return makeToken(TokenKind[BLOCK_STRING], start, position+3, value), nil
}
}

// SourceCharacter
if code < 0x0020 &&
code != 0x0009 &&
code != 0x000a &&
code != 0x000d {
return Token{}, gqlerrors.NewSyntaxError(s, runePosition, fmt.Sprintf(`Invalid character within String: %v.`, printCharCode(code)))
}

// Escape Triple-Quote (\""")
if code == '\\' { // \
x, _ := runeAt(body, position+1)
y, _ := runeAt(body, position+2)
z, _ := runeAt(body, position+3)
if x == '"' && y == '"' && z == '"' {
stringContent := append(body[chunkStart:position], []byte(`"""`)...)
valueBuffer.Write(stringContent)
position += 4 // account for `"""` characters
runePosition += 4 // " " " "
chunkStart = position
continue
}
}

position += n
runePosition++
}

return Token{}, gqlerrors.NewSyntaxError(s, runePosition, "Unterminated string.")
}

var splitLinesRegex = regexp.MustCompile("\r\n|[\n\r]")

// This implements the GraphQL spec's BlockStringValue() static algorithm.
//
// Produces the value of a block string from its parsed raw value, similar to
// Coffeescript's block string, Python's docstring trim or Ruby's strip_heredoc.
//
// Spec: http://facebook.github.io/graphql/draft/#BlockStringValue()
// Heavily borrows from: https://github.com/graphql/graphql-js/blob/8e0c599ceccfa8c40d6edf3b72ee2a71490b10e0/src/language/blockStringValue.js
func blockStringValue(in string) string {
// Expand a block string's raw value into independent lines.
lines := splitLinesRegex.Split(in, -1)

// Remove common indentation from all lines but first
commonIndent := -1
for i := 1; i < len(lines); i++ {
line := lines[i]
indent := leadingWhitespaceLen(line)
if indent < len(line) && (commonIndent == -1 || indent < commonIndent) {
commonIndent = indent
if commonIndent == 0 {
break
}
}
}
if commonIndent > 0 {
for i, line := range lines {
if commonIndent > len(line) {
continue
}
lines[i] = line[commonIndent:]
}
}

// Remove leading blank lines.
for {
if isBlank := lineIsBlank(lines[0]); !isBlank {
break
}
lines = lines[1:]
}

// Remove trailing blank lines.
for {
i := len(lines) - 1
if isBlank := lineIsBlank(lines[i]); !isBlank {
break
}
lines = append(lines[:i], lines[i+1:]...)
}

// Return a string of the lines joined with U+000A.
return strings.Join(lines, "\n")
}

// leadingWhitespaceLen returns count of whitespace characters on given line.
func leadingWhitespaceLen(in string) (n int) {
for _, ch := range in {
if ch == ' ' || ch == '\t' {
n++
} else {
break
}
}
return
}

// lineIsBlank returns true when given line has no content.
func lineIsBlank(in string) bool {
return leadingWhitespaceLen(in) == len(in)
}

// Converts four hexidecimal chars to the integer that the
// string represents. For example, uniCharCode('0','0','0','f')
// will return 15, and uniCharCode('0','0','f','f') returns 255.
Expand Down Expand Up @@ -425,11 +562,16 @@ func readToken(s *source.Source, fromPosition int) (Token, error) {
return token, nil
// "
case '"':
token, err := readString(s, position)
if err != nil {
return token, err
var token Token
var err error
x, _ := runeAt(body, position+1)
y, _ := runeAt(body, position+2)
if x == '"' && y == '"' {
token, err = readBlockString(s, position)
} else {
token, err = readString(s, position)
}
return token, nil
return token, err
}
description := fmt.Sprintf("Unexpected character %v.", printCharCode(code))
return Token{}, gqlerrors.NewSyntaxError(s, runePosition, description)
Expand Down
162 changes: 162 additions & 0 deletions language/lexer/lexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,168 @@ func TestLexer_ReportsUsefulStringErrors(t *testing.T) {
}
}

func TestLexer_LexesBlockStrings(t *testing.T) {
tests := []Test{
{
Body: `"""simple"""`,
Expected: Token{
Kind: TokenKind[BLOCK_STRING],
Start: 0,
End: 12,
Value: "simple",
},
},
{
Body: `""" white space """`,
Expected: Token{
Kind: TokenKind[BLOCK_STRING],
Start: 0,
End: 19,
Value: " white space ",
},
},
{
Body: `
""" white space """
""" white space """
""" white space """
`,
Expected: Token{
Kind: TokenKind[BLOCK_STRING],
Start: 5,
End: 25,
Value: " white space ",
},
},
{
Body: `
"""
my great description
spans multiple lines

with breaks
"""
`,
Expected: Token{
Kind: TokenKind[BLOCK_STRING],
Start: 5,
End: 89,
Value: "my great description\nspans multiple lines\n\nwith breaks",
},
},
{
Body: `"""contains " quote"""`,
Expected: Token{
Kind: TokenKind[BLOCK_STRING],
Start: 0,
End: 22,
Value: `contains " quote`,
},
},
{
Body: `"""contains \""" triplequote"""`,
Expected: Token{
Kind: TokenKind[BLOCK_STRING],
Start: 0,
End: 31,
Value: `contains """ triplequote`,
},
},
{
Body: "\"\"\"multi\nline\"\"\"",
Expected: Token{
Kind: TokenKind[BLOCK_STRING],
Start: 0,
End: 16,
Value: "multi\nline",
},
},
{
Body: "\"\"\"multi\rline\r\nnormalized\"\"\"",
Expected: Token{
Kind: TokenKind[BLOCK_STRING],
Start: 0,
End: 28,
Value: "multi\nline\nnormalized",
},
},
{
Body: "\"\"\"unescaped \\n\\r\\b\\t\\f\\u1234\"\"\"",
Expected: Token{
Kind: TokenKind[BLOCK_STRING],
Start: 0,
End: 32,
Value: "unescaped \\n\\r\\b\\t\\f\\u1234",
},
},
{
Body: "\"\"\"slashes \\\\ \\/\"\"\"",
Expected: Token{
Kind: TokenKind[BLOCK_STRING],
Start: 0,
End: 19,
Value: "slashes \\\\ \\/",
},
},
}
for _, test := range tests {
token, err := Lex(&source.Source{Body: []byte(test.Body)})(0)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(token, test.Expected) {
t.Errorf("unexpected token, expected: %v, got: %v", test.Expected, token)
}
}
}

func TestLexer_ReportsUsefulBlockStringErrors(t *testing.T) {
tests := []Test{
{
Body: `"""`,
Expected: `Syntax Error GraphQL (1:4) Unterminated string.

1: """
^
`,
},
{
Body: `"""no end quote`,
Expected: `Syntax Error GraphQL (1:16) Unterminated string.

1: """no end quote
^
`,
},
{
Body: "\"\"\"contains unescaped \u0007 control char\"\"\"",
Expected: `Syntax Error GraphQL (1:23) Invalid character within String: "\\u0007".

1: """contains unescaped \u0007 control char"""
^
`,
},
{
Body: "\"\"\"null-byte is not \u0000 end of file\"\"\"",
Expected: `Syntax Error GraphQL (1:21) Invalid character within String: "\\u0000".

1: """null-byte is not \u0000 end of file"""
^
`,
},
}
for _, test := range tests {
_, err := Lex(createSource(test.Body))(0)
if err == nil {
t.Errorf("unexpected nil error\nexpected:\n%v\n\ngot:\n%v", test.Expected, err)
}

if err.Error() != test.Expected {
t.Errorf("unexpected error.\nexpected:\n%v\n\ngot:\n%v", test.Expected, err.Error())
}
}
}

func TestLexer_LexesNumbers(t *testing.T) {
tests := []Test{
{
Expand Down
2 changes: 2 additions & 0 deletions language/parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,8 @@ func parseValueLiteral(parser *Parser, isConst bool) (ast.Value, error) {
Value: token.Value,
Loc: loc(parser, token.Start),
}), nil
case lexer.TokenKind[lexer.BLOCK_STRING]:
fallthrough
case lexer.TokenKind[lexer.STRING]:
if err := advance(parser); err != nil {
return nil, err
Expand Down