Skip to content

Commit

Permalink
Decouple parse & resolve steps (#148)
Browse files Browse the repository at this point in the history
This separates variables resolution (the resolver) from the parser.
It also adds an ast.Walk() method to generically walk a program's AST.
  • Loading branch information
xonixx authored Sep 14, 2022
1 parent 0aeb2c2 commit 0a84980
Show file tree
Hide file tree
Showing 17 changed files with 570 additions and 241 deletions.
103 changes: 99 additions & 4 deletions internal/ast/ast.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,17 @@ import (
// Program is an entire AWK program.
type Program struct {
Begin []Stmts
Actions []Action
Actions []*Action
End []Stmts
Functions []Function
Scalars map[string]int
Arrays map[string]int
Functions []*Function
}

// ResolvedProgram is a parsed AWK program + additional data prepared by resolve step
// needed for subsequent interpretation
type ResolvedProgram struct {
Program
Scalars map[string]int
Arrays map[string]int
}

// String returns an indented, pretty-printed version of the parsed
Expand Down Expand Up @@ -75,8 +81,54 @@ func (a *Action) String() string {
return strings.Join(patterns, ", ") + sep + stmtsStr
}

// Node is an interface to be satisfied by all AST elements.
// We need it to be able to work with AST in a generic way, like in ast.Walk().
type Node interface {
node()
}

// All these types implement the Node interface.
func (p *Program) node() {}
func (a *Action) node() {}
func (f *Function) node() {}
func (e *FieldExpr) node() {}
func (e *NamedFieldExpr) node() {}
func (e *UnaryExpr) node() {}
func (e *BinaryExpr) node() {}
func (e *ArrayExpr) node() {}
func (e *InExpr) node() {}
func (e *CondExpr) node() {}
func (e *NumExpr) node() {}
func (e *StrExpr) node() {}
func (e *RegExpr) node() {}
func (e *VarExpr) node() {}
func (e *IndexExpr) node() {}
func (e *AssignExpr) node() {}
func (e *AugAssignExpr) node() {}
func (e *IncrExpr) node() {}
func (e *CallExpr) node() {}
func (e *UserCallExpr) node() {}
func (e *MultiExpr) node() {}
func (e *GetlineExpr) node() {}
func (s *PrintStmt) node() {}
func (s *PrintfStmt) node() {}
func (s *ExprStmt) node() {}
func (s *IfStmt) node() {}
func (s *ForStmt) node() {}
func (s *ForInStmt) node() {}
func (s *WhileStmt) node() {}
func (s *DoWhileStmt) node() {}
func (s *BreakStmt) node() {}
func (s *ContinueStmt) node() {}
func (s *NextStmt) node() {}
func (s *ExitStmt) node() {}
func (s *DeleteStmt) node() {}
func (s *ReturnStmt) node() {}
func (s *BlockStmt) node() {}

// Expr is the abstract syntax tree for any AWK expression.
type Expr interface {
Node
expr()
String() string
}
Expand Down Expand Up @@ -154,6 +206,7 @@ type ArrayExpr struct {
Scope VarScope
Index int
Name string
Pos Position
}

func (e *ArrayExpr) String() string {
Expand Down Expand Up @@ -221,6 +274,9 @@ func (e *RegExpr) String() string {
return "/" + escaped + "/"
}

// meaning it will be set during resolve step
const resolvedLater = -1

type VarScope int

const (
Expand All @@ -236,6 +292,7 @@ type VarExpr struct {
Scope VarScope
Index int
Name string
Pos Position
}

func (e *VarExpr) String() string {
Expand Down Expand Up @@ -315,6 +372,7 @@ type UserCallExpr struct {
Index int
Name string
Args []Expr
Pos Position
}

func (e *UserCallExpr) String() string {
Expand Down Expand Up @@ -375,6 +433,7 @@ func IsLValue(expr Expr) bool {

// Stmt is the abstract syntax tree for any AWK statement.
type Stmt interface {
Node
stmt()
String() string
}
Expand Down Expand Up @@ -585,6 +644,7 @@ type Function struct {
Params []string
Arrays []bool
Body Stmts
Pos Position
}

func (f *Function) String() string {
Expand All @@ -598,3 +658,38 @@ func trimParens(s string) string {
}
return s
}

// VarRef is a constructor for *VarExpr
func VarRef(name string, pos Position) *VarExpr {
return &VarExpr{resolvedLater, resolvedLater, name, pos}
}

// ArrayRef is a constructor for *ArrayExpr
func ArrayRef(name string, pos Position) *ArrayExpr {
return &ArrayExpr{resolvedLater, resolvedLater, name, pos}
}

// UserCall is a constructor for *UserCallExpr
func UserCall(name string, args []Expr, pos Position) *UserCallExpr {
return &UserCallExpr{false, resolvedLater, name, args, pos}
}

// PositionError represents an error bound to specific position in source.
type PositionError struct {
// Source line/column position where the error occurred.
Position Position
// Error message.
Message string
}

// PosErrorf like fmt.Errorf, but with an explicit position.
func PosErrorf(pos Position, format string, args ...interface{}) error {
message := fmt.Sprintf(format, args...)
return &PositionError{pos, message}
}

// Error returns a formatted version of the error, including the line
// and column numbers.
func (e *PositionError) Error() string {
return fmt.Sprintf("parse error at %d:%d: %s", e.Position.Line, e.Position.Column, e.Message)
}
180 changes: 180 additions & 0 deletions internal/ast/walk.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
package ast

import "fmt"

// A Visitor's Visit method is invoked for each node encountered by Walk.
// If the result visitor w is not nil, Walk visits each of the children
// of node with the visitor w, followed by a call of w.Visit(nil).
type Visitor interface {
Visit(node Node) (w Visitor)
}

// WalkExprList walks a visitor over a list of expression AST nodes
func WalkExprList(v Visitor, exprs []Expr) {
for _, expr := range exprs {
Walk(v, expr)
}
}

// WalkStmtList walks a visitor over a list of statement AST nodes
func WalkStmtList(v Visitor, stmts []Stmt) {
for _, stmt := range stmts {
Walk(v, stmt)
}
}

// Walk traverses an AST in depth-first order: It starts by calling
// v.Visit(node); if node is nil, it does nothing. If the visitor w returned by
// v.Visit(node) is not nil, Walk is invoked recursively with visitor
// w for each of the non-nil children of node, followed by a call of
// w.Visit(nil).
//
func Walk(v Visitor, node Node) {
if node == nil {
return
}
if v = v.Visit(node); v == nil {
return
}

// walk children
// (the order of the cases matches the order
// of the corresponding node types in ast.go)
switch n := node.(type) {

// expressions
case *FieldExpr:
Walk(v, n.Index)

case *NamedFieldExpr:
Walk(v, n.Field)

case *UnaryExpr:
Walk(v, n.Value)

case *BinaryExpr:
Walk(v, n.Left)
Walk(v, n.Right)

case *ArrayExpr: // leaf
case *InExpr:
WalkExprList(v, n.Index)
Walk(v, n.Array)

case *CondExpr:
Walk(v, n.Cond)
Walk(v, n.True)
Walk(v, n.False)

case *NumExpr: // leaf
case *StrExpr: // leaf
case *RegExpr: // leaf
case *VarExpr: // leaf
case *IndexExpr:
Walk(v, n.Array)
WalkExprList(v, n.Index)

case *AssignExpr:
Walk(v, n.Left)
Walk(v, n.Right)

case *AugAssignExpr:
Walk(v, n.Left)
Walk(v, n.Right)

case *IncrExpr:
Walk(v, n.Expr)

case *CallExpr:
WalkExprList(v, n.Args)

case *UserCallExpr:
WalkExprList(v, n.Args)

case *MultiExpr:
WalkExprList(v, n.Exprs)

case *GetlineExpr:
Walk(v, n.Command)
Walk(v, n.Target)
Walk(v, n.File)

// statements
case *PrintStmt:
WalkExprList(v, n.Args)
Walk(v, n.Dest)

case *PrintfStmt:
WalkExprList(v, n.Args)
Walk(v, n.Dest)

case *ExprStmt:
Walk(v, n.Expr)

case *IfStmt:
Walk(v, n.Cond)
WalkStmtList(v, n.Body)
WalkStmtList(v, n.Else)

case *ForStmt:
Walk(v, n.Pre)
Walk(v, n.Cond)
Walk(v, n.Post)
WalkStmtList(v, n.Body)

case *ForInStmt:
Walk(v, n.Var)
Walk(v, n.Array)
WalkStmtList(v, n.Body)

case *WhileStmt:
Walk(v, n.Cond)
WalkStmtList(v, n.Body)

case *DoWhileStmt:
WalkStmtList(v, n.Body)
Walk(v, n.Cond)

case *BreakStmt: // leaf
case *ContinueStmt: // leaf
case *NextStmt: // leaf
case *ExitStmt:
Walk(v, n.Status)

case *DeleteStmt:
Walk(v, n.Array)
WalkExprList(v, n.Index)

case *ReturnStmt:
Walk(v, n.Value)

case *BlockStmt:
WalkStmtList(v, n.Body)

case *Program:
for _, stmts := range n.Begin {
WalkStmtList(v, stmts)
}
for _, action := range n.Actions {
Walk(v, action)
}
for _, function := range n.Functions {
Walk(v, function)
}
for _, stmts := range n.End {
WalkStmtList(v, stmts)
}

case *Action:
WalkExprList(v, n.Pattern)
WalkStmtList(v, n.Stmts)

case *Function:
WalkStmtList(v, n.Body)

default:
panic(fmt.Sprintf("ast.Walk: unexpected node type %T", n))
}

v.Visit(nil)
}
2 changes: 1 addition & 1 deletion internal/compiler/compiler.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ func (e *compileError) Error() string {
}

// Compile compiles an AST (parsed program) into virtual machine instructions.
func Compile(prog *ast.Program) (compiledProg *Program, err error) {
func Compile(prog *ast.ResolvedProgram) (compiledProg *Program, err error) {
defer func() {
// The compiler uses panic with a *compileError to signal compile
// errors internally, and they're caught here. This avoids the
Expand Down
Loading

0 comments on commit 0a84980

Please sign in to comment.