From fa7c453538de0d933c27239bcad5b69b3b7560d4 Mon Sep 17 00:00:00 2001
From: Radek Simko <radek.simko@gmail.com>
Date: Thu, 28 May 2020 10:55:33 +0100
Subject: [PATCH] hclsyntax: Introduce token-based parse methods

This change introduces new methods to allow two-phased
approach where tokenization is done prior to parsing.
---
 hclsyntax/peeker.go      |   6 +-
 hclsyntax/public.go      |  94 ++++++++++++++++++++++++++
 hclsyntax/public_test.go | 138 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 237 insertions(+), 1 deletion(-)

diff --git a/hclsyntax/peeker.go b/hclsyntax/peeker.go
index f056f906..63dead55 100644
--- a/hclsyntax/peeker.go
+++ b/hclsyntax/peeker.go
@@ -112,7 +112,11 @@ func (p *peeker) nextToken() (Token, int) {
 	// if we fall out here then we'll return the EOF token, and leave
 	// our index pointed off the end of the array so we'll keep
 	// returning EOF in future too.
-	return p.Tokens[len(p.Tokens)-1], len(p.Tokens)
+	return p.lastToken(), len(p.Tokens)
+}
+
+func (p *peeker) lastToken() Token {
+	return p.Tokens[len(p.Tokens)-1]
 }
 
 func (p *peeker) includingNewlines() bool {
diff --git a/hclsyntax/public.go b/hclsyntax/public.go
index 0b68efd6..3a3b958a 100644
--- a/hclsyntax/public.go
+++ b/hclsyntax/public.go
@@ -1,6 +1,8 @@
 package hclsyntax
 
 import (
+	"fmt"
+
 	"github.com/hashicorp/hcl/v2"
 )
 
@@ -36,6 +38,98 @@ func ParseConfig(src []byte, filename string, start hcl.Pos) (*hcl.File, hcl.Dia
 	}, diags
 }
 
+// ParseBodyFromTokens parses given tokens as a body of a whole HCL config file,
+// returning a *Body representing its contents.
+func ParseBodyFromTokens(tokens Tokens, end TokenType) (*Body, hcl.Diagnostics) {
+	peeker := newPeeker(tokens, false)
+	parser := &parser{peeker: peeker}
+	return parser.ParseBody(end)
+}
+
+// ParseBodyItemFromTokens parses given tokens as a body item
+// such as an attribute or a block, returning such item as Node
+func ParseBodyItemFromTokens(tokens Tokens) (Node, hcl.Diagnostics) {
+	if len(tokens) == 0 {
+		return nil, nil
+	}
+
+	peeker := newPeeker(tokens, false)
+
+	// Sanity checks to avoid surprises
+	firstToken := peeker.Peek()
+	if firstToken.Type != TokenIdent {
+		return nil, hcl.Diagnostics{
+				&hcl.Diagnostic{
+					Severity: hcl.DiagError,
+					Summary:  "Identifier not found",
+					Detail:   fmt.Sprintf("Expected definition to start with an identifier, %s found",
+						firstToken.Type),
+					Subject:  &firstToken.Range,
+				},
+			}
+	}
+	lastToken := peeker.lastToken()
+	if lastToken.Type != TokenEOF &&
+		lastToken.Type != TokenNewline {
+			return nil, hcl.Diagnostics{
+				&hcl.Diagnostic{
+					Severity: hcl.DiagError,
+					Summary:  "Unterminated definition",
+					Detail:   fmt.Sprintf("Expected definition terminated either by a newline or EOF, %s found",
+						lastToken.Type),
+					Subject:  &lastToken.Range,
+				},
+			}
+	}
+
+	parser := &parser{peeker: peeker}
+	return parser.ParseBodyItem()
+}
+
+// ParseBlockFromTokens parses given tokens as a block, returning
+// diagnostic error in case the body item isn't a block
+func ParseBlockFromTokens(tokens Tokens) (*Block, hcl.Diagnostics) {
+	bi, diags := ParseBodyItemFromTokens(tokens)
+	if bi == nil {
+		return nil, diags
+	}
+
+	block, ok := bi.(*Block)
+	if !ok {
+		rng := bi.Range()
+		diags = append(diags, &hcl.Diagnostic{
+			Severity: hcl.DiagError,
+			Summary:  fmt.Sprintf("Unexpected definition (%T)", bi),
+			Detail:   fmt.Sprintf("Expected a block definition, but found %T instead", bi),
+			Subject:  &rng,
+		})
+	}
+
+	return block, diags
+}
+
+// ParseAttributeFromTokens parses given tokens as an attribute
+// diagnostic error in case the body item isn't an attribute
+func ParseAttributeFromTokens(tokens Tokens) (*Attribute, hcl.Diagnostics) {
+	bi, diags := ParseBodyItemFromTokens(tokens)
+	if bi == nil {
+		return nil, diags
+	}
+
+	block, ok := bi.(*Attribute)
+	if !ok {
+		rng := bi.Range()
+		diags = append(diags, &hcl.Diagnostic{
+			Severity: hcl.DiagError,
+			Summary:  fmt.Sprintf("Unexpected definition (%T)", bi),
+			Detail:   fmt.Sprintf("Expected an attribute, but found %T instead", bi),
+			Subject:  &rng,
+		})
+	}
+
+	return block, diags
+}
+
 // ParseExpression parses the given buffer as a standalone HCL expression,
 // returning it as an instance of Expression.
 func ParseExpression(src []byte, filename string, start hcl.Pos) (Expression, hcl.Diagnostics) {
diff --git a/hclsyntax/public_test.go b/hclsyntax/public_test.go
index 62809985..613c5de6 100644
--- a/hclsyntax/public_test.go
+++ b/hclsyntax/public_test.go
@@ -2,6 +2,10 @@ package hclsyntax
 
 import (
 	"testing"
+
+	"github.com/google/go-cmp/cmp"
+	"github.com/google/go-cmp/cmp/cmpopts"
+	"github.com/zclconf/go-cty/cty"
 )
 
 func TestValidIdentifier(t *testing.T) {
@@ -44,3 +48,137 @@ func TestValidIdentifier(t *testing.T) {
 		})
 	}
 }
+
+func TestParseBlockFromTokens_withoutNewline(t *testing.T) {
+	_, diags := ParseBlockFromTokens(testBlockTokensWithoutNewline)
+	if len(diags) != 1 {
+		t.Fatalf("Expected exactly 1 diagnostic, %d given", len(diags))
+	}
+}
+
+func TestParseBlockFromTokens_block(t *testing.T) {
+	b, diags := ParseBlockFromTokens(testBlockTokensWithNewline)
+	if len(diags) > 0 {
+		t.Fatal(diags)
+	}
+	expectedBlock := &Block{
+		Type:   "blocktype",
+		Labels: []string{"onelabel"},
+		Body: &Body{
+			Attributes: Attributes{
+				"attr": &Attribute{
+					Name: "attr",
+					Expr: &LiteralValueExpr{
+						Val: cty.NumberIntVal(42),
+					},
+				},
+			},
+			Blocks: Blocks{},
+		},
+	}
+	opts := cmp.Options{
+		cmpopts.IgnoreUnexported(Body{}),
+		cmpopts.IgnoreUnexported(cty.Value{}),
+	}
+	opts = append(opts, optsIgnoreRanges...)
+	if diff := cmp.Diff(expectedBlock, b, opts); diff != "" {
+		t.Fatalf("Blocks don't match:\n%s", diff)
+	}
+}
+
+func TestParseBlockFromTokens_invalid(t *testing.T) {
+	_, diags := ParseBlockFromTokens(invalidTokens)
+	if len(diags) != 1 {
+		t.Fatalf("Expected exactly 1 diagnostic, %d given", len(diags))
+	}
+}
+
+func TestParseBlockFromTokens_attr(t *testing.T) {
+	_, diags := ParseBlockFromTokens(testAttributeTokensValid)
+	if len(diags) != 1 {
+		t.Fatalf("Expected exactly 1 diagnostic, given:\n%#v", diags)
+	}
+}
+
+func TestParseAttributeFromTokens_attr(t *testing.T) {
+	b, diags := ParseAttributeFromTokens(testAttributeTokensValid)
+	if len(diags) > 0 {
+		t.Fatal(diags)
+	}
+	expectedAttribute := &Attribute{
+		Name: "attr",
+		Expr: &LiteralValueExpr{
+			Val: cty.NumberIntVal(79),
+		},
+	}
+	opts := cmp.Options{
+		cmpopts.IgnoreFields(Token{}, "Range"),
+		cmpopts.IgnoreUnexported(Attribute{}),
+		cmpopts.IgnoreUnexported(cty.Value{}),
+	}
+	if diff := cmp.Diff(expectedAttribute, b, opts); diff != "" {
+		t.Fatalf("Blocks don't match:\n%s", diff)
+	}
+}
+
+func TestParseAttributeFromTokens_invalid(t *testing.T) {
+	_, diags := ParseAttributeFromTokens(invalidTokens)
+	if len(diags) != 1 {
+		t.Fatalf("Expected exactly 1 diagnostic, %d given", len(diags))
+	}
+}
+
+func TestParseAttributeFromTokens_block(t *testing.T) {
+	_, diags := ParseAttributeFromTokens(testBlockTokensWithNewline)
+	if len(diags) != 1 {
+		t.Fatalf("Expected exactly 1 diagnostic, given:\n%#v", diags)
+	}
+}
+
+var optsIgnoreRanges = []cmp.Option{
+	cmpopts.IgnoreFields(Token{}, "Range"),
+	cmpopts.IgnoreFields(Attribute{}, "SrcRange", "NameRange", "EqualsRange"),
+	cmpopts.IgnoreFields(Block{}, "TypeRange", "LabelRanges", "OpenBraceRange", "CloseBraceRange"),
+	cmpopts.IgnoreFields(LiteralValueExpr{}, "SrcRange"),
+	cmpopts.IgnoreFields(Body{}, "SrcRange", "EndRange"),
+}
+
+var testAttributeTokensValid = Tokens{
+	{Type: TokenIdent, Bytes: []byte("attr")},
+	{Type: TokenEqual, Bytes: []byte("=")},
+	{Type: TokenNumberLit, Bytes: []byte("79")},
+	{Type: TokenNewline, Bytes: []byte("\n")},
+}
+
+var testBlockTokensWithNewline = Tokens{
+	{Type: TokenIdent, Bytes: []byte("blocktype")},
+	{Type: TokenOQuote, Bytes: []byte(`"`)},
+	{Type: TokenQuotedLit, Bytes: []byte("onelabel")},
+	{Type: TokenCQuote, Bytes: []byte(`"`)},
+	{Type: TokenOBrace, Bytes: []byte("{")},
+	{Type: TokenNewline, Bytes: []byte("\n")},
+	{Type: TokenIdent, Bytes: []byte("attr")},
+	{Type: TokenEqual, Bytes: []byte("=")},
+	{Type: TokenNumberLit, Bytes: []byte("42")},
+	{Type: TokenNewline, Bytes: []byte("\n")},
+	{Type: TokenCBrace, Bytes: []byte("}")},
+	{Type: TokenNewline, Bytes: []byte("\n")},
+}
+
+var testBlockTokensWithoutNewline = Tokens{
+	{Type: TokenIdent, Bytes: []byte("blocktype")},
+	{Type: TokenOQuote, Bytes: []byte(`"`)},
+	{Type: TokenQuotedLit, Bytes: []byte("onelabel")},
+	{Type: TokenCQuote, Bytes: []byte(`"`)},
+	{Type: TokenOBrace, Bytes: []byte("{")},
+	{Type: TokenNewline, Bytes: []byte("\n")},
+	{Type: TokenIdent, Bytes: []byte("attr")},
+	{Type: TokenEqual, Bytes: []byte("=")},
+	{Type: TokenNumberLit, Bytes: []byte("42")},
+	{Type: TokenNewline, Bytes: []byte("\n")},
+	{Type: TokenCBrace, Bytes: []byte("}")},
+}
+
+var invalidTokens = Tokens{
+	{Type: TokenNewline, Bytes: []byte("\n")},
+}