From 6d42e3d7cfa52b5b25fa35586040bec83a20dae7 Mon Sep 17 00:00:00 2001 From: xonix Date: Thu, 25 Aug 2022 17:35:01 +0300 Subject: [PATCH 01/57] decoupling parse/resolve --- decouple_parse_resolve.txt | 15 +++++++++++++++ internal/resolver/resolver.go | 20 ++++++++++++++++++++ parser/parser.go | 11 +++++++++++ 3 files changed, 46 insertions(+) create mode 100644 decouple_parse_resolve.txt create mode 100644 internal/resolver/resolver.go diff --git a/decouple_parse_resolve.txt b/decouple_parse_resolve.txt new file mode 100644 index 00000000..b7caae50 --- /dev/null +++ b/decouple_parse_resolve.txt @@ -0,0 +1,15 @@ + + +struct parser + +parser.ParseProgram() -> Program + +struct Program + +Program.state: parsed/resolved/compiled + +Program.Resolve() + +Program.Compile() + +----- \ No newline at end of file diff --git a/internal/resolver/resolver.go b/internal/resolver/resolver.go new file mode 100644 index 00000000..315d2b8c --- /dev/null +++ b/internal/resolver/resolver.go @@ -0,0 +1,20 @@ +package resolver + +import "github.com/benhoyt/goawk/internal/ast" + +type resolver struct { +} + +type ResolveResult struct { +} + +type Program struct { + Begin []ast.Stmts + Actions []ast.Action + End []ast.Stmts + Functions []ast.Function +} + +func Resolve(prog *Program) (resolveResult *ResolveResult, err error) { + +} diff --git a/parser/parser.go b/parser/parser.go index 491b3462..56df6fa1 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -6,6 +6,7 @@ package parser import ( "fmt" + "github.com/benhoyt/goawk/internal/resolver" "io" "regexp" "strconv" @@ -116,6 +117,16 @@ func (p *Program) toAST() *ast.Program { } } +// toResolverProgram converts the *Program to an *resolver.Program. +func (p *Program) toResolverProgram() *resolver.Program { + return &resolver.Program{ + Begin: p.Begin, + Actions: p.Actions, + End: p.End, + Functions: p.Functions, + } +} + // Parser state type parser struct { // Lexer instance and current token values From 62c0e917b87179dc83331904638f2df3144e6dc7 Mon Sep 17 00:00:00 2001 From: xonix Date: Thu, 25 Aug 2022 20:41:05 +0300 Subject: [PATCH 02/57] decoupling parse/resolve --- {parser => internal/resolver}/resolve.go | 201 +++++++++--------- internal/resolver/resolver.go | 25 ++- {parser => internal/resolver}/toposort.go | 2 +- .../resolver}/toposort_test.go | 2 +- parser/parser.go | 16 +- 5 files changed, 134 insertions(+), 112 deletions(-) rename {parser => internal/resolver}/resolve.go (64%) rename {parser => internal/resolver}/toposort.go (98%) rename {parser => internal/resolver}/toposort_test.go (99%) diff --git a/parser/resolve.go b/internal/resolver/resolve.go similarity index 64% rename from parser/resolve.go rename to internal/resolver/resolve.go index a2ed08d0..b300fa09 100644 --- a/parser/resolve.go +++ b/internal/resolver/resolve.go @@ -1,6 +1,7 @@ // Resolve function calls and variable types +package resolver -package parser +// TODO put all into resolver.go import ( "fmt" @@ -71,30 +72,30 @@ type arrayRef struct { } // Initialize the resolver -func (p *parser) initResolve() { - p.varTypes = make(map[string]map[string]typeInfo) - p.varTypes[""] = make(map[string]typeInfo) // globals - p.functions = make(map[string]int) - p.arrayRef("ARGV", Position{1, 1}) // interpreter relies on ARGV being present - p.arrayRef("ENVIRON", Position{1, 1}) // and other built-in arrays - p.arrayRef("FIELDS", Position{1, 1}) - p.multiExprs = make(map[*ast.MultiExpr]Position, 3) +func (r *resolver) initResolve() { + r.varTypes = make(map[string]map[string]typeInfo) + r.varTypes[""] = make(map[string]typeInfo) // globals + r.functions = make(map[string]int) + r.arrayRef("ARGV", Position{1, 1}) // interpreter relies on ARGV being present + r.arrayRef("ENVIRON", Position{1, 1}) // and other built-in arrays + r.arrayRef("FIELDS", Position{1, 1}) + r.multiExprs = make(map[*ast.MultiExpr]Position, 3) } // Signal the start of a function -func (p *parser) startFunction(name string, params []string) { - p.funcName = name - p.varTypes[name] = make(map[string]typeInfo) +func (r *resolver) startFunction(name string, params []string) { + r.funcName = name + r.varTypes[name] = make(map[string]typeInfo) } // Signal the end of a function -func (p *parser) stopFunction() { - p.funcName = "" +func (r *resolver) stopFunction() { + r.funcName = "" } // Add function by name with given index -func (p *parser) addFunction(name string, index int) { - p.functions[name] = index +func (r *resolver) addFunction(name string, index int) { + r.functions[name] = index } // Records a call to a user function (for resolving indexes later) @@ -105,17 +106,17 @@ type userCall struct { } // Record a user call site -func (p *parser) recordUserCall(call *ast.UserCallExpr, pos Position) { - p.userCalls = append(p.userCalls, userCall{call, pos, p.funcName}) +func (r *resolver) recordUserCall(call *ast.UserCallExpr, pos Position) { + r.userCalls = append(r.userCalls, userCall{call, pos, r.funcName}) } // After parsing, resolve all user calls to their indexes. Also // ensures functions called have actually been defined, and that // they're not being called with too many arguments. -func (p *parser) resolveUserCalls(prog *Program) { +func (r *resolver) resolveUserCalls(prog *Program) { // Number the native funcs (order by name to get consistent order) - nativeNames := make([]string, 0, len(p.nativeFuncs)) - for name := range p.nativeFuncs { + nativeNames := make([]string, 0, len(r.nativeFuncs)) + for name := range r.nativeFuncs { nativeNames = append(nativeNames, name) } sort.Strings(nativeNames) @@ -124,17 +125,17 @@ func (p *parser) resolveUserCalls(prog *Program) { nativeIndexes[name] = i } - for _, c := range p.userCalls { + for _, c := range r.userCalls { // AWK-defined functions take precedence over native Go funcs - index, ok := p.functions[c.call.Name] + index, ok := r.functions[c.call.Name] if !ok { - f, haveNative := p.nativeFuncs[c.call.Name] + f, haveNative := r.nativeFuncs[c.call.Name] if !haveNative { - panic(p.posErrorf(c.pos, "undefined function %q", c.call.Name)) + panic(r.posErrorf(c.pos, "undefined function %q", c.call.Name)) } typ := reflect.TypeOf(f) if !typ.IsVariadic() && len(c.call.Args) > typ.NumIn() { - panic(p.posErrorf(c.pos, "%q called with more arguments than declared", c.call.Name)) + panic(r.posErrorf(c.pos, "%q called with more arguments than declared", c.call.Name)) } c.call.Native = true c.call.Index = nativeIndexes[c.call.Name] @@ -142,7 +143,7 @@ func (p *parser) resolveUserCalls(prog *Program) { } function := prog.Functions[index] if len(c.call.Args) > len(function.Params) { - panic(p.posErrorf(c.pos, "%q called with more arguments than declared", c.call.Name)) + panic(r.posErrorf(c.pos, "%q called with more arguments than declared", c.call.Name)) } c.call.Index = index } @@ -150,27 +151,27 @@ func (p *parser) resolveUserCalls(prog *Program) { // For arguments that are variable references, we don't know the // type based on context, so mark the types for these as unknown. -func (p *parser) processUserCallArg(funcName string, arg ast.Expr, index int) { +func (r *resolver) processUserCallArg(funcName string, arg ast.Expr, index int) { if varExpr, ok := arg.(*ast.VarExpr); ok { - scope, varFuncName := p.getScope(varExpr.Name) - ref := p.varTypes[varFuncName][varExpr.Name].ref + scope, varFuncName := r.getScope(varExpr.Name) + ref := r.varTypes[varFuncName][varExpr.Name].ref if ref == varExpr { // Only applies if this is the first reference to this // variable (otherwise we know the type already) - p.varTypes[varFuncName][varExpr.Name] = typeInfo{typeUnknown, ref, scope, 0, funcName, index} + r.varTypes[varFuncName][varExpr.Name] = typeInfo{typeUnknown, ref, scope, 0, funcName, index} } // Mark the last related varRef (the most recent one) as a // call argument for later error handling - p.varRefs[len(p.varRefs)-1].isArg = true + r.varRefs[len(r.varRefs)-1].isArg = true } } // Determine scope of given variable reference (and funcName if it's // a local, otherwise empty string) -func (p *parser) getScope(name string) (ast.VarScope, string) { +func (r *resolver) getScope(name string) (ast.VarScope, string) { switch { - case p.locals[name]: - return ast.ScopeLocal, p.funcName + case r.locals[name]: + return ast.ScopeLocal, r.funcName case ast.SpecialVarIndex(name) > 0: return ast.ScopeSpecial, "" default: @@ -180,69 +181,69 @@ func (p *parser) getScope(name string) (ast.VarScope, string) { // Record a variable (scalar) reference and return the *VarExpr (but // VarExpr.Index won't be set till later) -func (p *parser) varRef(name string, pos Position) *ast.VarExpr { - scope, funcName := p.getScope(name) +func (r *resolver) varRef(name string, pos Position) *ast.VarExpr { + scope, funcName := r.getScope(name) expr := &ast.VarExpr{scope, 0, name} - p.varRefs = append(p.varRefs, varRef{funcName, expr, false, pos}) - info := p.varTypes[funcName][name] + r.varRefs = append(r.varRefs, varRef{funcName, expr, false, pos}) + info := r.varTypes[funcName][name] if info.typ == typeUnknown { - p.varTypes[funcName][name] = typeInfo{typeScalar, expr, scope, 0, info.callName, 0} + r.varTypes[funcName][name] = typeInfo{typeScalar, expr, scope, 0, info.callName, 0} } return expr } // Record an array reference and return the *ArrayExpr (but // ArrayExpr.Index won't be set till later) -func (p *parser) arrayRef(name string, pos Position) *ast.ArrayExpr { - scope, funcName := p.getScope(name) +func (r *resolver) arrayRef(name string, pos Position) *ast.ArrayExpr { + scope, funcName := r.getScope(name) if scope == ast.ScopeSpecial { - panic(p.errorf("can't use scalar %q as array", name)) + panic(r.errorf("can't use scalar %q as array", name)) } expr := &ast.ArrayExpr{scope, 0, name} - p.arrayRefs = append(p.arrayRefs, arrayRef{funcName, expr, pos}) - info := p.varTypes[funcName][name] + r.arrayRefs = append(r.arrayRefs, arrayRef{funcName, expr, pos}) + info := r.varTypes[funcName][name] if info.typ == typeUnknown { - p.varTypes[funcName][name] = typeInfo{typeArray, nil, scope, 0, info.callName, 0} + r.varTypes[funcName][name] = typeInfo{typeArray, nil, scope, 0, info.callName, 0} } return expr } // Print variable type information (for debugging) on p.debugWriter -func (p *parser) printVarTypes(prog *Program) { - fmt.Fprintf(p.debugWriter, "scalars: %v\n", prog.Scalars) - fmt.Fprintf(p.debugWriter, "arrays: %v\n", prog.Arrays) +func (r *resolver) printVarTypes(prog *Program) { + fmt.Fprintf(r.debugWriter, "scalars: %v\n", prog.Scalars) + fmt.Fprintf(r.debugWriter, "arrays: %v\n", prog.Arrays) funcNames := []string{} - for funcName := range p.varTypes { + for funcName := range r.varTypes { funcNames = append(funcNames, funcName) } sort.Strings(funcNames) for _, funcName := range funcNames { if funcName != "" { - fmt.Fprintf(p.debugWriter, "function %s\n", funcName) + fmt.Fprintf(r.debugWriter, "function %s\n", funcName) } else { - fmt.Fprintf(p.debugWriter, "globals\n") + fmt.Fprintf(r.debugWriter, "globals\n") } varNames := []string{} - for name := range p.varTypes[funcName] { + for name := range r.varTypes[funcName] { varNames = append(varNames, name) } sort.Strings(varNames) for _, name := range varNames { - info := p.varTypes[funcName][name] - fmt.Fprintf(p.debugWriter, " %s: %s\n", name, info) + info := r.varTypes[funcName][name] + fmt.Fprintf(r.debugWriter, " %s: %s\n", name, info) } } } // Resolve unknown variables types and generate variable indexes and // name-to-index mappings for interpreter -func (p *parser) resolveVars(prog *Program) { +func (r *resolver) resolveVars(prog *Program) { // First go through all unknown types and try to determine the // type from the parameter type in that function definition. // Iterate through functions in topological order, for example // if f() calls g(), process g first, then f. callGraph := make(map[string]map[string]struct{}) - for _, call := range p.userCalls { + for _, call := range r.userCalls { if _, ok := callGraph[call.inFunc]; !ok { callGraph[call.inFunc] = make(map[string]struct{}) } @@ -250,13 +251,13 @@ func (p *parser) resolveVars(prog *Program) { } sortedFuncs := topoSort(callGraph) for _, funcName := range sortedFuncs { - infos := p.varTypes[funcName] + infos := r.varTypes[funcName] for name, info := range infos { if info.scope == ast.ScopeSpecial || info.typ != typeUnknown { // It's a special var or type is already known continue } - funcIndex, ok := p.functions[info.callName] + funcIndex, ok := r.functions[info.callName] if !ok { // Function being called is a native function continue @@ -264,14 +265,14 @@ func (p *parser) resolveVars(prog *Program) { // Determine var type based on type of this parameter // in the called function (if we know that) paramName := prog.Functions[funcIndex].Params[info.argIndex] - typ := p.varTypes[info.callName][paramName].typ + typ := r.varTypes[info.callName][paramName].typ if typ != typeUnknown { - if p.debugTypes { - fmt.Fprintf(p.debugWriter, "resolving %s:%s to %s\n", + if r.debugTypes { + fmt.Fprintf(r.debugWriter, "resolving %s:%s to %s\n", funcName, name, typ) } info.typ = typ - p.varTypes[funcName][name] = info + r.varTypes[funcName][name] = info } } } @@ -280,11 +281,11 @@ func (p *parser) resolveVars(prog *Program) { // assign indexes basically randomly) prog.Scalars = make(map[string]int) prog.Arrays = make(map[string]int) - for name, info := range p.varTypes[""] { - _, isFunc := p.functions[name] + for name, info := range r.varTypes[""] { + _, isFunc := r.functions[name] if isFunc { // Global var can't also be the name of a function - panic(p.errorf("global var %q can't also be a function", name)) + panic(r.errorf("global var %q can't also be a function", name)) } var index int if info.scope == ast.ScopeSpecial { @@ -297,7 +298,7 @@ func (p *parser) resolveVars(prog *Program) { prog.Scalars[name] = index } info.index = index - p.varTypes[""][name] = info + r.varTypes[""][name] = info } // Fill in unknown parameter types that are being called with arrays, @@ -305,7 +306,7 @@ func (p *parser) resolveVars(prog *Program) { // // BEGIN { arr[0]; f(arr) } // function f(a) { } - for _, c := range p.userCalls { + for _, c := range r.userCalls { if c.call.Native { continue } @@ -315,12 +316,12 @@ func (p *parser) resolveVars(prog *Program) { if !ok { continue } - funcName := p.getVarFuncName(prog, varExpr.Name, c.inFunc) - argType := p.varTypes[funcName][varExpr.Name] - paramType := p.varTypes[function.Name][function.Params[i]] + funcName := r.getVarFuncName(prog, varExpr.Name, c.inFunc) + argType := r.varTypes[funcName][varExpr.Name] + paramType := r.varTypes[function.Name][function.Params[i]] if argType.typ == typeArray && paramType.typ == typeUnknown { paramType.typ = argType.typ - p.varTypes[function.Name][function.Params[i]] = paramType + r.varTypes[function.Name][function.Params[i]] = paramType } } } @@ -328,13 +329,13 @@ func (p *parser) resolveVars(prog *Program) { // Resolve local variables (assign indexes in order of params). // Also patch up Function.Arrays (tells interpreter which args // are arrays). - for funcName, infos := range p.varTypes { + for funcName, infos := range r.varTypes { if funcName == "" { continue } scalarIndex := 0 arrayIndex := 0 - functionIndex := p.functions[funcName] + functionIndex := r.functions[funcName] function := prog.Functions[functionIndex] arrays := make([]bool, len(function.Params)) for i, name := range function.Params { @@ -352,13 +353,13 @@ func (p *parser) resolveVars(prog *Program) { scalarIndex++ } info.index = index - p.varTypes[funcName][name] = info + r.varTypes[funcName][name] = info } prog.Functions[functionIndex].Arrays = arrays } // Check that variables passed to functions are the correct type - for _, c := range p.userCalls { + for _, c := range r.userCalls { // Check native function calls if c.call.Native { for _, arg := range c.call.Args { @@ -367,10 +368,10 @@ func (p *parser) resolveVars(prog *Program) { // Non-variable expression, must be scalar continue } - funcName := p.getVarFuncName(prog, varExpr.Name, c.inFunc) - info := p.varTypes[funcName][varExpr.Name] + funcName := r.getVarFuncName(prog, varExpr.Name, c.inFunc) + info := r.varTypes[funcName][varExpr.Name] if info.typ == typeArray { - panic(p.posErrorf(c.pos, "can't pass array %q to native function", varExpr.Name)) + panic(r.posErrorf(c.pos, "can't pass array %q to native function", varExpr.Name)) } } continue @@ -382,38 +383,38 @@ func (p *parser) resolveVars(prog *Program) { varExpr, ok := arg.(*ast.VarExpr) if !ok { if function.Arrays[i] { - panic(p.posErrorf(c.pos, "can't pass scalar %s as array param", arg)) + panic(r.posErrorf(c.pos, "can't pass scalar %s as array param", arg)) } continue } - funcName := p.getVarFuncName(prog, varExpr.Name, c.inFunc) - info := p.varTypes[funcName][varExpr.Name] + funcName := r.getVarFuncName(prog, varExpr.Name, c.inFunc) + info := r.varTypes[funcName][varExpr.Name] if info.typ == typeArray && !function.Arrays[i] { - panic(p.posErrorf(c.pos, "can't pass array %q as scalar param", varExpr.Name)) + panic(r.posErrorf(c.pos, "can't pass array %q as scalar param", varExpr.Name)) } if info.typ != typeArray && function.Arrays[i] { - panic(p.posErrorf(c.pos, "can't pass scalar %q as array param", varExpr.Name)) + panic(r.posErrorf(c.pos, "can't pass scalar %q as array param", varExpr.Name)) } } } - if p.debugTypes { - p.printVarTypes(prog) + if r.debugTypes { + r.printVarTypes(prog) } // Patch up variable indexes (interpreter uses an index instead // of name for more efficient lookups) - for _, varRef := range p.varRefs { - info := p.varTypes[varRef.funcName][varRef.ref.Name] + for _, varRef := range r.varRefs { + info := r.varTypes[varRef.funcName][varRef.ref.Name] if info.typ == typeArray && !varRef.isArg { - panic(p.posErrorf(varRef.pos, "can't use array %q as scalar", varRef.ref.Name)) + panic(r.posErrorf(varRef.pos, "can't use array %q as scalar", varRef.ref.Name)) } varRef.ref.Index = info.index } - for _, arrayRef := range p.arrayRefs { - info := p.varTypes[arrayRef.funcName][arrayRef.ref.Name] + for _, arrayRef := range r.arrayRefs { + info := r.varTypes[arrayRef.funcName][arrayRef.ref.Name] if info.typ == typeScalar { - panic(p.posErrorf(arrayRef.pos, "can't use scalar %q as array", arrayRef.ref.Name)) + panic(r.posErrorf(arrayRef.pos, "can't use scalar %q as array", arrayRef.ref.Name)) } arrayRef.ref.Index = info.index } @@ -435,28 +436,28 @@ func (p *parser) getVarFuncName(prog *Program, name, inFunc string) string { // Record a "multi expression" (comma-separated pseudo-expression // used to allow commas around print/printf arguments). -func (p *parser) multiExpr(exprs []ast.Expr, pos Position) ast.Expr { +func (r *resolver) multiExpr(exprs []ast.Expr, pos Position) ast.Expr { expr := &ast.MultiExpr{exprs} - p.multiExprs[expr] = pos + r.multiExprs[expr] = pos return expr } // Mark the multi expression as used (by a print/printf statement). -func (p *parser) useMultiExpr(expr *ast.MultiExpr) { - delete(p.multiExprs, expr) +func (r *resolver) useMultiExpr(expr *ast.MultiExpr) { + delete(r.multiExprs, expr) } // Check that there are no unused multi expressions (syntax error). -func (p *parser) checkMultiExprs() { - if len(p.multiExprs) == 0 { +func (r *resolver) checkMultiExprs() { + if len(r.multiExprs) == 0 { return } // Show error on first comma-separated expression min := Position{1000000000, 1000000000} - for _, pos := range p.multiExprs { + for _, pos := range r.multiExprs { if pos.Line < min.Line || pos.Line == min.Line && pos.Column < min.Column { min = pos } } - panic(p.posErrorf(min, "unexpected comma-separated expression")) + panic(r.posErrorf(min, "unexpected comma-separated expression")) } diff --git a/internal/resolver/resolver.go b/internal/resolver/resolver.go index 315d2b8c..88cddbee 100644 --- a/internal/resolver/resolver.go +++ b/internal/resolver/resolver.go @@ -1,8 +1,25 @@ package resolver -import "github.com/benhoyt/goawk/internal/ast" +import ( + "github.com/benhoyt/goawk/internal/ast" + "github.com/benhoyt/goawk/lexer" +) type resolver struct { + // Parsing state + // TODO this reflects the var in parser - is this needed? + funcName string // function name if parsing a func, else "" + + // Variable tracking and resolving + locals map[string]bool // current function's locals (for determining scope) + varTypes map[string]map[string]typeInfo // map of func name to var name to type + varRefs []varRef // all variable references (usually scalars) + arrayRefs []arrayRef // all array references + multiExprs map[*ast.MultiExpr]lexer.Position // tracks comma-separated expressions + + // Function tracking + functions map[string]int // map of function name to index + userCalls []userCall // record calls so we can resolve them later } type ResolveResult struct { @@ -15,6 +32,10 @@ type Program struct { Functions []ast.Function } -func Resolve(prog *Program) (resolveResult *ResolveResult, err error) { +type ResolverConfig struct { + NativeFuncs map[string]interface{} +} + +func Resolve(prog *Program, config *ResolverConfig) (resolveResult *ResolveResult, err error) { } diff --git a/parser/toposort.go b/internal/resolver/toposort.go similarity index 98% rename from parser/toposort.go rename to internal/resolver/toposort.go index 90b71fa4..745b0126 100644 --- a/parser/toposort.go +++ b/internal/resolver/toposort.go @@ -1,6 +1,6 @@ // Topological sorting -package parser +package resolver /* This algorithm is taken from: diff --git a/parser/toposort_test.go b/internal/resolver/toposort_test.go similarity index 99% rename from parser/toposort_test.go rename to internal/resolver/toposort_test.go index d8d4c4c8..cb161aa6 100644 --- a/parser/toposort_test.go +++ b/internal/resolver/toposort_test.go @@ -1,4 +1,4 @@ -package parser +package resolver import ( "strconv" diff --git a/parser/parser.go b/parser/parser.go index 56df6fa1..188b0c49 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -142,16 +142,16 @@ type parser struct { loopDepth int // current loop depth (0 if not in any loops) // Variable tracking and resolving - locals map[string]bool // current function's locals (for determining scope) - varTypes map[string]map[string]typeInfo // map of func name to var name to type - varRefs []varRef // all variable references (usually scalars) - arrayRefs []arrayRef // all array references - multiExprs map[*ast.MultiExpr]Position // tracks comma-separated expressions + //locals map[string]bool // current function's locals (for determining scope) + //varTypes map[string]map[string]typeInfo // map of func name to var name to type + //varRefs []varRef // all variable references (usually scalars) + //arrayRefs []arrayRef // all array references + //multiExprs map[*ast.MultiExpr]Position // tracks comma-separated expressions // Function tracking - functions map[string]int // map of function name to index - userCalls []userCall // record calls so we can resolve them later - nativeFuncs map[string]interface{} + //functions map[string]int // map of function name to index + //userCalls []userCall // record calls so we can resolve them later + //nativeFuncs map[string]interface{} // Configuration and debugging debugTypes bool // show variable types for debugging From 6f97aeafc376201d37ac3bd557885ae2d6e63467 Mon Sep 17 00:00:00 2001 From: xonix Date: Thu, 25 Aug 2022 20:56:05 +0300 Subject: [PATCH 03/57] decoupling parse/resolve --- internal/ast/ast.go | 2 ++ parser/parser.go | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/internal/ast/ast.go b/internal/ast/ast.go index 8232765a..2d4187a3 100644 --- a/internal/ast/ast.go +++ b/internal/ast/ast.go @@ -227,6 +227,8 @@ const ( ScopeSpecial VarScope = iota ScopeGlobal ScopeLocal + + ScopeUnresolved VarScope = -1 ) // VarExpr is a variable reference (special var, global, or local). diff --git a/parser/parser.go b/parser/parser.go index 188b0c49..b692a5cc 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -1057,3 +1057,11 @@ func (p *parser) userCall(name string, pos Position) *ast.UserCallExpr { p.recordUserCall(call, pos) return call } + +func (p *parser) varRef(name string, pos Position) *ast.VarExpr { + return &ast.VarExpr{ast.ScopeUnresolved, 0, name} +} + +func (p *parser) arrayRef(name string, pos Position) *ast.ArrayExpr { + return &ast.ArrayExpr{ast.ScopeUnresolved, 0, name} +} From a2bb3025b51369d787b331644209f0cc8ca10136 Mon Sep 17 00:00:00 2001 From: xonix Date: Thu, 25 Aug 2022 21:07:48 +0300 Subject: [PATCH 04/57] decoupling parse/resolve --- internal/resolver/resolve.go | 3 ++- internal/resolver/resolver.go | 8 +++++--- parser/parser.go | 10 ++++++++-- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/internal/resolver/resolve.go b/internal/resolver/resolve.go index b300fa09..6c2a84f3 100644 --- a/internal/resolver/resolve.go +++ b/internal/resolver/resolve.go @@ -72,7 +72,8 @@ type arrayRef struct { } // Initialize the resolver -func (r *resolver) initResolve() { +func (r *resolver) initResolve(config *ResolverConfig) { + r.nativeFuncs = config.NativeFuncs r.varTypes = make(map[string]map[string]typeInfo) r.varTypes[""] = make(map[string]typeInfo) // globals r.functions = make(map[string]int) diff --git a/internal/resolver/resolver.go b/internal/resolver/resolver.go index 88cddbee..4e75ae78 100644 --- a/internal/resolver/resolver.go +++ b/internal/resolver/resolver.go @@ -18,8 +18,9 @@ type resolver struct { multiExprs map[*ast.MultiExpr]lexer.Position // tracks comma-separated expressions // Function tracking - functions map[string]int // map of function name to index - userCalls []userCall // record calls so we can resolve them later + functions map[string]int // map of function name to index + userCalls []userCall // record calls so we can resolve them later + nativeFuncs map[string]interface{} } type ResolveResult struct { @@ -37,5 +38,6 @@ type ResolverConfig struct { } func Resolve(prog *Program, config *ResolverConfig) (resolveResult *ResolveResult, err error) { - + r := &resolver{} + r.initResolve(config) } diff --git a/parser/parser.go b/parser/parser.go index b692a5cc..ea754fc2 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -62,17 +62,23 @@ func ParseProgram(src []byte, config *ParserConfig) (prog *Program, err error) { }() lexer := NewLexer(src) p := parser{lexer: lexer} + resolverConfig := &resolver.ResolverConfig{} if config != nil { p.debugTypes = config.DebugTypes p.debugWriter = config.DebugWriter - p.nativeFuncs = config.Funcs + resolverConfig.NativeFuncs = config.Funcs } - p.initResolve() + //p.initResolve() p.next() // initialize p.tok // Parse into abstract syntax tree prog = p.program() + result, err := resolver.Resolve(prog.toResolverProgram(), resolverConfig) + if err != nil { + return nil, err + } + // Compile to virtual machine code prog.Compiled, err = compiler.Compile(prog.toAST()) return prog, err From 4a93430dc811930eff865248baad07772658f4d8 Mon Sep 17 00:00:00 2001 From: xonix Date: Thu, 25 Aug 2022 21:51:49 +0300 Subject: [PATCH 05/57] decoupling parse/resolve --- internal/resolver/resolve.go | 4 ++-- internal/resolver/resolver.go | 7 +++++++ parser/parser.go | 6 +++--- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/internal/resolver/resolve.go b/internal/resolver/resolve.go index 6c2a84f3..ac336f46 100644 --- a/internal/resolver/resolve.go +++ b/internal/resolver/resolve.go @@ -423,11 +423,11 @@ func (r *resolver) resolveVars(prog *Program) { // If name refers to a local (in function inFunc), return that // function's name, otherwise return "" (meaning global). -func (p *parser) getVarFuncName(prog *Program, name, inFunc string) string { +func (r *resolver) getVarFuncName(prog *Program, name, inFunc string) string { if inFunc == "" { return "" } - for _, param := range prog.Functions[p.functions[inFunc]].Params { + for _, param := range prog.Functions[r.functions[inFunc]].Params { if name == param { return inFunc } diff --git a/internal/resolver/resolver.go b/internal/resolver/resolver.go index 4e75ae78..d479efeb 100644 --- a/internal/resolver/resolver.go +++ b/internal/resolver/resolver.go @@ -40,4 +40,11 @@ type ResolverConfig struct { func Resolve(prog *Program, config *ResolverConfig) (resolveResult *ResolveResult, err error) { r := &resolver{} r.initResolve(config) + + // TODO resolution step to iterate over AST + + r.resolveUserCalls(prog) + r.resolveVars(prog) + r.checkMultiExprs() + } diff --git a/parser/parser.go b/parser/parser.go index ea754fc2..73513aac 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -202,9 +202,9 @@ func (p *parser) program() *Program { p.optionalNewlines() } - p.resolveUserCalls(prog) - p.resolveVars(prog) - p.checkMultiExprs() + //p.resolveUserCalls(prog) + //p.resolveVars(prog) + //p.checkMultiExprs() return prog } From f4b7a25e80d5ea95927b8f25c265db0bbf9cab60 Mon Sep 17 00:00:00 2001 From: xonix Date: Thu, 25 Aug 2022 22:17:39 +0300 Subject: [PATCH 06/57] decoupling parse/resolve : functions --- internal/resolver/resolve.go | 2 +- internal/resolver/resolver.go | 20 ++++++++++++++++++++ parser/parser.go | 26 +++++++++++++------------- 3 files changed, 34 insertions(+), 14 deletions(-) diff --git a/internal/resolver/resolve.go b/internal/resolver/resolve.go index ac336f46..ac7a2530 100644 --- a/internal/resolver/resolve.go +++ b/internal/resolver/resolve.go @@ -84,7 +84,7 @@ func (r *resolver) initResolve(config *ResolverConfig) { } // Signal the start of a function -func (r *resolver) startFunction(name string, params []string) { +func (r *resolver) startFunction(name string) { r.funcName = name r.varTypes[name] = make(map[string]typeInfo) } diff --git a/internal/resolver/resolver.go b/internal/resolver/resolver.go index d479efeb..d492034f 100644 --- a/internal/resolver/resolver.go +++ b/internal/resolver/resolver.go @@ -42,6 +42,26 @@ func Resolve(prog *Program, config *ResolverConfig) (resolveResult *ResolveResul r.initResolve(config) // TODO resolution step to iterate over AST + // 1. process functions + for i, function := range prog.Functions { + name := function.Name + r.addFunction(name, i) + if _, ok := r.functions[name]; ok { + panic(r.errorf("function %q already defined", name)) + } + r.locals = make(map[string]bool, 7) + for _, param := range function.Params { + if r.locals[param] { + panic(r.errorf("duplicate parameter name %q", param)) + } + r.locals[param] = true + + } + r.startFunction(name) + // TODO process body + r.stopFunction() + r.locals = nil + } r.resolveUserCalls(prog) r.resolveVars(prog) diff --git a/parser/parser.go b/parser/parser.go index 73513aac..1f80074f 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -148,7 +148,7 @@ type parser struct { loopDepth int // current loop depth (0 if not in any loops) // Variable tracking and resolving - //locals map[string]bool // current function's locals (for determining scope) + locals map[string]bool // current function's locals (for determining scope) //varTypes map[string]map[string]typeInfo // map of func name to var name to type //varRefs []varRef // all variable references (usually scalars) //arrayRefs []arrayRef // all array references @@ -178,7 +178,7 @@ func (p *parser) program() *Program { prog.End = append(prog.End, p.stmtsBrace()) case FUNCTION: function := p.function() - p.addFunction(function.Name, len(prog.Functions)) + //p.addFunction(function.Name, len(prog.Functions)) prog.Functions = append(prog.Functions, function) default: p.inAction = true @@ -454,14 +454,14 @@ func (p *parser) function() ast.Function { } p.next() name := p.val - if _, ok := p.functions[name]; ok { - panic(p.errorf("function %q already defined", name)) - } + //if _, ok := p.functions[name]; ok { + // panic(p.errorf("function %q already defined", name)) + //} p.expect(NAME) p.expect(LPAREN) first := true params := make([]string, 0, 7) // pre-allocate some to reduce allocations - p.locals = make(map[string]bool, 7) + //p.locals = make(map[string]bool, 7) for p.tok != RPAREN { if !first { p.commaNewlines() @@ -471,21 +471,21 @@ func (p *parser) function() ast.Function { if param == name { panic(p.errorf("can't use function name as parameter name")) } - if p.locals[param] { - panic(p.errorf("duplicate parameter name %q", param)) - } + //if p.locals[param] { + // panic(p.errorf("duplicate parameter name %q", param)) + //} p.expect(NAME) params = append(params, param) - p.locals[param] = true + //p.locals[param] = true } p.expect(RPAREN) p.optionalNewlines() // Parse the body - p.startFunction(name, params) + //p.startFunction(name, params) body := p.stmtsBrace() - p.stopFunction() - p.locals = nil + //p.stopFunction() + //p.locals = nil return ast.Function{name, params, nil, body} } From fac460d8ad6b089ba438df71b33ab6acd7961bc1 Mon Sep 17 00:00:00 2001 From: xonix Date: Thu, 25 Aug 2022 23:06:37 +0300 Subject: [PATCH 07/57] decoupling parse/resolve : visitor approach from golang --- internal/ast/ast.go | 8 +++ internal/ast/walk.go | 160 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 168 insertions(+) create mode 100644 internal/ast/walk.go diff --git a/internal/ast/ast.go b/internal/ast/ast.go index 2d4187a3..cebb0943 100644 --- a/internal/ast/ast.go +++ b/internal/ast/ast.go @@ -75,8 +75,15 @@ func (a *Action) String() string { return strings.Join(patterns, ", ") + sep + stmtsStr } +type Node interface { + // TODO positions below + //Pos() token.Pos // position of first character belonging to the node + //End() token.Pos // position of first character immediately after the node +} + // Expr is the abstract syntax tree for any AWK expression. type Expr interface { + Node expr() String() string } @@ -377,6 +384,7 @@ func IsLValue(expr Expr) bool { // Stmt is the abstract syntax tree for any AWK statement. type Stmt interface { + Node stmt() String() string } diff --git a/internal/ast/walk.go b/internal/ast/walk.go new file mode 100644 index 00000000..9bd2ac16 --- /dev/null +++ b/internal/ast/walk.go @@ -0,0 +1,160 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ast + +import "fmt" + +// A Visitor's Visit method is invoked for each node encountered by Walk. +// If the result visitor w is not nil, Walk visits each of the children +// of node with the visitor w, followed by a call of w.Visit(nil). +type Visitor interface { + Visit(node Node) (w Visitor) +} + +// Helper functions for common node lists. They may be empty. + +func walkExprList(v Visitor, list []Expr) { + for _, x := range list { + Walk(v, x) + } +} + +func walkStmtList(v Visitor, list []Stmt) { + for _, x := range list { + Walk(v, x) + } +} + +// Walk traverses an AST in depth-first order: It starts by calling +// v.Visit(node); node must not be nil. If the visitor w returned by +// v.Visit(node) is not nil, Walk is invoked recursively with visitor +// w for each of the non-nil children of node, followed by a call of +// w.Visit(nil). +// +func Walk(v Visitor, node Node) { + if v = v.Visit(node); v == nil { + return + } + + // walk children + // (the order of the cases matches the order + // of the corresponding node types in ast.go) + switch n := node.(type) { + + // expressions + case *FieldExpr: + Walk(v, n.Index) + + case *NamedFieldExpr: + Walk(v, n.Field) + + case *UnaryExpr: + Walk(v, n.Value) + + case *BinaryExpr: + Walk(v, n.Left) + Walk(v, n.Right) + + case *ArrayExpr: // leaf + case *InExpr: + walkExprList(v, n.Index) + Walk(v, n.Array) + + case *CondExpr: + Walk(v, n.Cond) + Walk(v, n.True) + Walk(v, n.False) + + case *NumExpr: // leaf + case *StrExpr: // leaf + case *RegExpr: // leaf + case *VarExpr: // leaf + case *IndexExpr: + Walk(v, n.Array) + walkExprList(v, n.Index) + + case *AssignExpr: + Walk(v, n.Left) + Walk(v, n.Right) + + case *AugAssignExpr: + Walk(v, n.Left) + Walk(v, n.Right) + + case *IncrExpr: + Walk(v, n.Expr) + + case *CallExpr: + walkExprList(v, n.Args) + + case *UserCallExpr: + walkExprList(v, n.Args) + + case *MultiExpr: + walkExprList(v, n.Exprs) + + case *GetlineExpr: + Walk(v, n.Command) + Walk(v, n.Target) + Walk(v, n.File) + + // statements + case *PrintStmt: + walkExprList(v, n.Args) + Walk(v, n.Dest) + + case *PrintfStmt: + walkExprList(v, n.Args) + Walk(v, n.Dest) + + case *ExprStmt: + Walk(v, n.Expr) + + case *IfStmt: + Walk(v, n.Cond) + walkStmtList(v, n.Body) + walkStmtList(v, n.Else) + + case *ForStmt: + Walk(v, n.Pre) + Walk(v, n.Cond) + Walk(v, n.Post) + walkStmtList(v, n.Body) + + case *ForInStmt: + Walk(v, n.Var) + Walk(v, n.Array) + walkStmtList(v, n.Body) + + case *WhileStmt: + Walk(v, n.Cond) + walkStmtList(v, n.Body) + + case *DoWhileStmt: + walkStmtList(v, n.Body) + Walk(v, n.Cond) + + case *BreakStmt: // leaf + case *ContinueStmt: // leaf + case *NextStmt: // leaf + case *ExitStmt: + Walk(v, n.Status) + + case *DeleteStmt: + Walk(v, n.Array) + walkExprList(v, n.Index) + + case *ReturnStmt: + Walk(v, n.Value) + + case *BlockStmt: + walkStmtList(v, n.Body) + + default: + panic(fmt.Sprintf("ast.Walk: unexpected node type %T", n)) + } + + v.Visit(nil) +} From 55b69ad1d1ef26ecbd1be6a63be3b62c006bfb84 Mon Sep 17 00:00:00 2001 From: xonix Date: Thu, 25 Aug 2022 23:26:07 +0300 Subject: [PATCH 08/57] decoupling parse/resolve --- internal/ast/ast.go | 2 -- internal/ast/walk.go | 4 ---- internal/compiler/compiler.go | 15 +++++++++++---- internal/compiler/disassembler.go | 4 ++-- internal/compiler/disassembler_test.go | 2 +- internal/resolver/resolver.go | 5 +---- parser/parser.go | 26 +++++++++++++++----------- 7 files changed, 30 insertions(+), 28 deletions(-) diff --git a/internal/ast/ast.go b/internal/ast/ast.go index cebb0943..b9dd4d48 100644 --- a/internal/ast/ast.go +++ b/internal/ast/ast.go @@ -16,8 +16,6 @@ type Program struct { Actions []Action End []Stmts Functions []Function - Scalars map[string]int - Arrays map[string]int } // String returns an indented, pretty-printed version of the parsed diff --git a/internal/ast/walk.go b/internal/ast/walk.go index 9bd2ac16..dc105f6f 100644 --- a/internal/ast/walk.go +++ b/internal/ast/walk.go @@ -1,7 +1,3 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - package ast import "fmt" diff --git a/internal/compiler/compiler.go b/internal/compiler/compiler.go index 40b2f89f..9fa8b681 100644 --- a/internal/compiler/compiler.go +++ b/internal/compiler/compiler.go @@ -10,8 +10,15 @@ import ( "github.com/benhoyt/goawk/lexer" ) -// Program holds an entire compiled program. +// Program is an entire AWK program used as an input for compiler. type Program struct { + ast.Program + Scalars map[string]int + Arrays map[string]int +} + +// CompiledProgram holds an entire compiled program. +type CompiledProgram struct { Begin []Opcode Actions []Action End []Opcode @@ -54,7 +61,7 @@ func (e *compileError) Error() string { } // Compile compiles an AST (parsed program) into virtual machine instructions. -func Compile(prog *ast.Program) (compiledProg *Program, err error) { +func Compile(prog *Program) (compiledProg *CompiledProgram, err error) { defer func() { // The compiler uses panic with a *compileError to signal compile // errors internally, and they're caught here. This avoids the @@ -65,7 +72,7 @@ func Compile(prog *ast.Program) (compiledProg *Program, err error) { } }() - p := &Program{} + p := &CompiledProgram{} // Reuse identical constants across entire program. indexes := constantIndexes{ @@ -166,7 +173,7 @@ type constantIndexes struct { // Holds the compilation state. type compiler struct { - program *Program + program *CompiledProgram indexes constantIndexes code []Opcode breaks [][]int diff --git a/internal/compiler/disassembler.go b/internal/compiler/disassembler.go index d5dc9594..e1975550 100644 --- a/internal/compiler/disassembler.go +++ b/internal/compiler/disassembler.go @@ -13,7 +13,7 @@ import ( // Disassemble writes a human-readable form of the program's virtual machine // instructions to writer. -func (p *Program) Disassemble(writer io.Writer) error { +func (p *CompiledProgram) Disassemble(writer io.Writer) error { if p.Begin != nil { d := &disassembler{ program: p, @@ -110,7 +110,7 @@ func (p *Program) Disassemble(writer io.Writer) error { // Disassembles a single block of opcodes. type disassembler struct { - program *Program + program *CompiledProgram writer io.Writer code []Opcode nativeFuncNames []string diff --git a/internal/compiler/disassembler_test.go b/internal/compiler/disassembler_test.go index 297224d5..db9b9770 100644 --- a/internal/compiler/disassembler_test.go +++ b/internal/compiler/disassembler_test.go @@ -12,7 +12,7 @@ func TestDisassembler(t *testing.T) { // disassembly includes the opcode name, to help catch silly typos. for op := Nop; op < EndOpcode; op++ { t.Run(op.String(), func(t *testing.T) { - p := Program{ + p := CompiledProgram{ Begin: []Opcode{op, 0, 0, 0, 0, 0, 0, 0}, Functions: []Function{ { diff --git a/internal/resolver/resolver.go b/internal/resolver/resolver.go index d492034f..74946662 100644 --- a/internal/resolver/resolver.go +++ b/internal/resolver/resolver.go @@ -27,10 +27,7 @@ type ResolveResult struct { } type Program struct { - Begin []ast.Stmts - Actions []ast.Action - End []ast.Stmts - Functions []ast.Function + ast.Program } type ResolverConfig struct { diff --git a/parser/parser.go b/parser/parser.go index 1f80074f..3d1e505c 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -74,13 +74,13 @@ func ParseProgram(src []byte, config *ParserConfig) (prog *Program, err error) { // Parse into abstract syntax tree prog = p.program() - result, err := resolver.Resolve(prog.toResolverProgram(), resolverConfig) + result, err := resolver.Resolve(prog.asResolverSource(), resolverConfig) if err != nil { return nil, err } // Compile to virtual machine code - prog.Compiled, err = compiler.Compile(prog.toAST()) + prog.Compiled, err = compiler.Compile(prog.asCompilerSource()) return prog, err } @@ -96,7 +96,7 @@ type Program struct { Functions []ast.Function Scalars map[string]int Arrays map[string]int - Compiled *compiler.Program + Compiled *compiler.CompiledProgram } // String returns an indented, pretty-printed version of the parsed @@ -118,18 +118,22 @@ func (p *Program) toAST() *ast.Program { Actions: p.Actions, End: p.End, Functions: p.Functions, - Scalars: p.Scalars, - Arrays: p.Arrays, } } -// toResolverProgram converts the *Program to an *resolver.Program. -func (p *Program) toResolverProgram() *resolver.Program { +// asCompilerSource converts the *Program to an *compiler.Program. +func (p *Program) asCompilerSource() *compiler.Program { + return &compiler.Program{ + Program: *p.toAST(), + Scalars: p.Scalars, + Arrays: p.Arrays, + } +} + +// asResolverSource converts the *Program to an *resolver.Program. +func (p *Program) asResolverSource() *resolver.Program { return &resolver.Program{ - Begin: p.Begin, - Actions: p.Actions, - End: p.End, - Functions: p.Functions, + Program: *p.toAST(), } } From bab57baa805f15a77a2a911c64297d0b89ca2d3b Mon Sep 17 00:00:00 2001 From: xonix Date: Thu, 25 Aug 2022 23:39:58 +0300 Subject: [PATCH 09/57] decoupling parse/resolve : visitor approach from golang --- internal/ast/walk.go | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/internal/ast/walk.go b/internal/ast/walk.go index dc105f6f..acf12ce1 100644 --- a/internal/ast/walk.go +++ b/internal/ast/walk.go @@ -148,6 +148,27 @@ func Walk(v Visitor, node Node) { case *BlockStmt: walkStmtList(v, n.Body) + case Program: + for _, stmts := range n.Begin { + walkStmtList(v, stmts) + } + for _, action := range n.Actions { + Walk(v, action) + } + for _, function := range n.Functions { + Walk(v, function) + } + for _, stmts := range n.End { + walkStmtList(v, stmts) + } + + case Action: + walkExprList(v, n.Pattern) + walkStmtList(v, n.Stmts) + + case Function: + walkStmtList(v, n.Body) + default: panic(fmt.Sprintf("ast.Walk: unexpected node type %T", n)) } From da3f570f2576708abb4b5adf1183d5e88de6aa29 Mon Sep 17 00:00:00 2001 From: xonix Date: Fri, 26 Aug 2022 00:05:12 +0300 Subject: [PATCH 10/57] decoupling parse/resolve --- internal/ast/walk.go | 44 +++++++++++++++++------------------ internal/resolver/resolve.go | 5 ++-- internal/resolver/resolver.go | 41 +++++++++++++++++++++++++------- parser/parser.go | 12 +++++----- 4 files changed, 63 insertions(+), 39 deletions(-) diff --git a/internal/ast/walk.go b/internal/ast/walk.go index acf12ce1..631cd191 100644 --- a/internal/ast/walk.go +++ b/internal/ast/walk.go @@ -11,13 +11,13 @@ type Visitor interface { // Helper functions for common node lists. They may be empty. -func walkExprList(v Visitor, list []Expr) { +func WalkExprList(v Visitor, list []Expr) { for _, x := range list { Walk(v, x) } } -func walkStmtList(v Visitor, list []Stmt) { +func WalkStmtList(v Visitor, list []Stmt) { for _, x := range list { Walk(v, x) } @@ -55,7 +55,7 @@ func Walk(v Visitor, node Node) { case *ArrayExpr: // leaf case *InExpr: - walkExprList(v, n.Index) + WalkExprList(v, n.Index) Walk(v, n.Array) case *CondExpr: @@ -69,7 +69,7 @@ func Walk(v Visitor, node Node) { case *VarExpr: // leaf case *IndexExpr: Walk(v, n.Array) - walkExprList(v, n.Index) + WalkExprList(v, n.Index) case *AssignExpr: Walk(v, n.Left) @@ -83,13 +83,13 @@ func Walk(v Visitor, node Node) { Walk(v, n.Expr) case *CallExpr: - walkExprList(v, n.Args) + WalkExprList(v, n.Args) case *UserCallExpr: - walkExprList(v, n.Args) + WalkExprList(v, n.Args) case *MultiExpr: - walkExprList(v, n.Exprs) + WalkExprList(v, n.Exprs) case *GetlineExpr: Walk(v, n.Command) @@ -98,11 +98,11 @@ func Walk(v Visitor, node Node) { // statements case *PrintStmt: - walkExprList(v, n.Args) + WalkExprList(v, n.Args) Walk(v, n.Dest) case *PrintfStmt: - walkExprList(v, n.Args) + WalkExprList(v, n.Args) Walk(v, n.Dest) case *ExprStmt: @@ -110,26 +110,26 @@ func Walk(v Visitor, node Node) { case *IfStmt: Walk(v, n.Cond) - walkStmtList(v, n.Body) - walkStmtList(v, n.Else) + WalkStmtList(v, n.Body) + WalkStmtList(v, n.Else) case *ForStmt: Walk(v, n.Pre) Walk(v, n.Cond) Walk(v, n.Post) - walkStmtList(v, n.Body) + WalkStmtList(v, n.Body) case *ForInStmt: Walk(v, n.Var) Walk(v, n.Array) - walkStmtList(v, n.Body) + WalkStmtList(v, n.Body) case *WhileStmt: Walk(v, n.Cond) - walkStmtList(v, n.Body) + WalkStmtList(v, n.Body) case *DoWhileStmt: - walkStmtList(v, n.Body) + WalkStmtList(v, n.Body) Walk(v, n.Cond) case *BreakStmt: // leaf @@ -140,17 +140,17 @@ func Walk(v Visitor, node Node) { case *DeleteStmt: Walk(v, n.Array) - walkExprList(v, n.Index) + WalkExprList(v, n.Index) case *ReturnStmt: Walk(v, n.Value) case *BlockStmt: - walkStmtList(v, n.Body) + WalkStmtList(v, n.Body) case Program: for _, stmts := range n.Begin { - walkStmtList(v, stmts) + WalkStmtList(v, stmts) } for _, action := range n.Actions { Walk(v, action) @@ -159,15 +159,15 @@ func Walk(v Visitor, node Node) { Walk(v, function) } for _, stmts := range n.End { - walkStmtList(v, stmts) + WalkStmtList(v, stmts) } case Action: - walkExprList(v, n.Pattern) - walkStmtList(v, n.Stmts) + WalkExprList(v, n.Pattern) + WalkStmtList(v, n.Stmts) case Function: - walkStmtList(v, n.Body) + WalkStmtList(v, n.Body) default: panic(fmt.Sprintf("ast.Walk: unexpected node type %T", n)) diff --git a/internal/resolver/resolve.go b/internal/resolver/resolve.go index ac7a2530..febd2f40 100644 --- a/internal/resolver/resolve.go +++ b/internal/resolver/resolve.go @@ -95,8 +95,9 @@ func (r *resolver) stopFunction() { } // Add function by name with given index -func (r *resolver) addFunction(name string, index int) { - r.functions[name] = index +func (r *resolver) addFunction(name string) { + r.functions[name] = r.funcIdx + r.funcIdx++ } // Records a call to a user function (for resolving indexes later) diff --git a/internal/resolver/resolver.go b/internal/resolver/resolver.go index 74946662..05ab004a 100644 --- a/internal/resolver/resolver.go +++ b/internal/resolver/resolver.go @@ -21,6 +21,8 @@ type resolver struct { functions map[string]int // map of function name to index userCalls []userCall // record calls so we can resolve them later nativeFuncs map[string]interface{} + + funcIdx int } type ResolveResult struct { @@ -38,14 +40,24 @@ func Resolve(prog *Program, config *ResolverConfig) (resolveResult *ResolveResul r := &resolver{} r.initResolve(config) - // TODO resolution step to iterate over AST - // 1. process functions - for i, function := range prog.Functions { + ast.Walk(r, prog) + + r.resolveUserCalls(prog) + r.resolveVars(prog) + r.checkMultiExprs() + +} + +func (r *resolver) Visit(node ast.Node) ast.Visitor { + switch n := node.(type) { + + case ast.Function: + function := n name := function.Name - r.addFunction(name, i) if _, ok := r.functions[name]; ok { panic(r.errorf("function %q already defined", name)) } + r.addFunction(name) r.locals = make(map[string]bool, 7) for _, param := range function.Params { if r.locals[param] { @@ -55,13 +67,24 @@ func Resolve(prog *Program, config *ResolverConfig) (resolveResult *ResolveResul } r.startFunction(name) - // TODO process body + + ast.WalkStmtList(r, function.Body) + r.stopFunction() r.locals = nil - } - r.resolveUserCalls(prog) - r.resolveVars(prog) - r.checkMultiExprs() + case *ast.UserCallExpr: + name := n.Name + if r.locals[name] { + panic(p.errorf("can't call local variable %q as function", name)) + } + for i, arg := range n.Args { + r.processUserCallArg(name, arg, i) + } + r.recordUserCall(n, pos) + default: + return r + } + return nil } diff --git a/parser/parser.go b/parser/parser.go index 3d1e505c..50661372 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -152,7 +152,7 @@ type parser struct { loopDepth int // current loop depth (0 if not in any loops) // Variable tracking and resolving - locals map[string]bool // current function's locals (for determining scope) + //locals map[string]bool // current function's locals (for determining scope) //varTypes map[string]map[string]typeInfo // map of func name to var name to type //varRefs []varRef // all variable references (usually scalars) //arrayRefs []arrayRef // all array references @@ -749,9 +749,9 @@ func (p *parser) primary() ast.Expr { p.expect(RBRACKET) return &ast.IndexExpr{p.arrayRef(name, namePos), index} } else if p.tok == LPAREN && !p.lexer.HadSpace() { - if p.locals[name] { - panic(p.errorf("can't call local variable %q as function", name)) - } + //if p.locals[name] { + // panic(p.errorf("can't call local variable %q as function", name)) + //} // Grammar requires no space between function name and // left paren for user function calls, hence the funky // lexer.HadSpace() method. @@ -1058,13 +1058,13 @@ func (p *parser) userCall(name string, pos Position) *ast.UserCallExpr { p.commaNewlines() } arg := p.expr() - p.processUserCallArg(name, arg, i) + //p.processUserCallArg(name, arg, i) args = append(args, arg) i++ } p.expect(RPAREN) call := &ast.UserCallExpr{false, -1, name, args} // index is resolved later - p.recordUserCall(call, pos) + //p.recordUserCall(call, pos) return call } From bf2a0ea5843d6ed343a85ff213591d0f7dac1102 Mon Sep 17 00:00:00 2001 From: xonix Date: Fri, 26 Aug 2022 00:32:17 +0300 Subject: [PATCH 11/57] decoupling parse/resolve : multiExpr --- internal/resolver/resolve.go | 28 ---------------------------- internal/resolver/resolver.go | 2 +- parser/parser.go | 32 ++++++++++++++++++++++++++++++-- 3 files changed, 31 insertions(+), 31 deletions(-) diff --git a/internal/resolver/resolve.go b/internal/resolver/resolve.go index febd2f40..08c1112c 100644 --- a/internal/resolver/resolve.go +++ b/internal/resolver/resolve.go @@ -435,31 +435,3 @@ func (r *resolver) getVarFuncName(prog *Program, name, inFunc string) string { } return "" } - -// Record a "multi expression" (comma-separated pseudo-expression -// used to allow commas around print/printf arguments). -func (r *resolver) multiExpr(exprs []ast.Expr, pos Position) ast.Expr { - expr := &ast.MultiExpr{exprs} - r.multiExprs[expr] = pos - return expr -} - -// Mark the multi expression as used (by a print/printf statement). -func (r *resolver) useMultiExpr(expr *ast.MultiExpr) { - delete(r.multiExprs, expr) -} - -// Check that there are no unused multi expressions (syntax error). -func (r *resolver) checkMultiExprs() { - if len(r.multiExprs) == 0 { - return - } - // Show error on first comma-separated expression - min := Position{1000000000, 1000000000} - for _, pos := range r.multiExprs { - if pos.Line < min.Line || pos.Line == min.Line && pos.Column < min.Column { - min = pos - } - } - panic(r.posErrorf(min, "unexpected comma-separated expression")) -} diff --git a/internal/resolver/resolver.go b/internal/resolver/resolver.go index 05ab004a..878551f8 100644 --- a/internal/resolver/resolver.go +++ b/internal/resolver/resolver.go @@ -44,7 +44,7 @@ func Resolve(prog *Program, config *ResolverConfig) (resolveResult *ResolveResul r.resolveUserCalls(prog) r.resolveVars(prog) - r.checkMultiExprs() + //r.checkMultiExprs() } diff --git a/parser/parser.go b/parser/parser.go index 50661372..45c77e6d 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -156,7 +156,7 @@ type parser struct { //varTypes map[string]map[string]typeInfo // map of func name to var name to type //varRefs []varRef // all variable references (usually scalars) //arrayRefs []arrayRef // all array references - //multiExprs map[*ast.MultiExpr]Position // tracks comma-separated expressions + multiExprs map[*ast.MultiExpr]Position // tracks comma-separated expressions // Function tracking //functions map[string]int // map of function name to index @@ -208,7 +208,7 @@ func (p *parser) program() *Program { //p.resolveUserCalls(prog) //p.resolveVars(prog) - //p.checkMultiExprs() + p.checkMultiExprs() return prog } @@ -1075,3 +1075,31 @@ func (p *parser) varRef(name string, pos Position) *ast.VarExpr { func (p *parser) arrayRef(name string, pos Position) *ast.ArrayExpr { return &ast.ArrayExpr{ast.ScopeUnresolved, 0, name} } + +// Record a "multi expression" (comma-separated pseudo-expression +// used to allow commas around print/printf arguments). +func (p *parser) multiExpr(exprs []ast.Expr, pos Position) ast.Expr { + expr := &ast.MultiExpr{exprs} + p.multiExprs[expr] = pos + return expr +} + +// Mark the multi expression as used (by a print/printf statement). +func (p *parser) useMultiExpr(expr *ast.MultiExpr) { + delete(p.multiExprs, expr) +} + +// Check that there are no unused multi expressions (syntax error). +func (p *parser) checkMultiExprs() { + if len(p.multiExprs) == 0 { + return + } + // Show error on first comma-separated expression + min := Position{1000000000, 1000000000} + for _, pos := range p.multiExprs { + if pos.Line < min.Line || pos.Line == min.Line && pos.Column < min.Column { + min = pos + } + } + panic(p.posErrorf(min, "unexpected comma-separated expression")) +} From 421478360f02726665ce95dfb25e856ab3ed41b4 Mon Sep 17 00:00:00 2001 From: xonix Date: Fri, 26 Aug 2022 01:01:37 +0300 Subject: [PATCH 12/57] decoupling parse/resolve : rfct --- internal/resolver/resolve.go | 9 +++++---- internal/resolver/resolver.go | 18 ++++++++---------- parser/parser.go | 26 +++++--------------------- 3 files changed, 18 insertions(+), 35 deletions(-) diff --git a/internal/resolver/resolve.go b/internal/resolver/resolve.go index 08c1112c..cda40133 100644 --- a/internal/resolver/resolve.go +++ b/internal/resolver/resolve.go @@ -5,6 +5,7 @@ package resolver import ( "fmt" + "github.com/benhoyt/goawk/internal/compiler" "reflect" "sort" @@ -115,7 +116,7 @@ func (r *resolver) recordUserCall(call *ast.UserCallExpr, pos Position) { // After parsing, resolve all user calls to their indexes. Also // ensures functions called have actually been defined, and that // they're not being called with too many arguments. -func (r *resolver) resolveUserCalls(prog *Program) { +func (r *resolver) resolveUserCalls(prog *ast.Program) { // Number the native funcs (order by name to get consistent order) nativeNames := make([]string, 0, len(r.nativeFuncs)) for name := range r.nativeFuncs { @@ -211,7 +212,7 @@ func (r *resolver) arrayRef(name string, pos Position) *ast.ArrayExpr { } // Print variable type information (for debugging) on p.debugWriter -func (r *resolver) printVarTypes(prog *Program) { +func (r *resolver) printVarTypes(prog *compiler.Program) { fmt.Fprintf(r.debugWriter, "scalars: %v\n", prog.Scalars) fmt.Fprintf(r.debugWriter, "arrays: %v\n", prog.Arrays) funcNames := []string{} @@ -239,7 +240,7 @@ func (r *resolver) printVarTypes(prog *Program) { // Resolve unknown variables types and generate variable indexes and // name-to-index mappings for interpreter -func (r *resolver) resolveVars(prog *Program) { +func (r *resolver) resolveVars(prog *compiler.Program) { // First go through all unknown types and try to determine the // type from the parameter type in that function definition. // Iterate through functions in topological order, for example @@ -424,7 +425,7 @@ func (r *resolver) resolveVars(prog *Program) { // If name refers to a local (in function inFunc), return that // function's name, otherwise return "" (meaning global). -func (r *resolver) getVarFuncName(prog *Program, name, inFunc string) string { +func (r *resolver) getVarFuncName(prog *compiler.Program, name, inFunc string) string { if inFunc == "" { return "" } diff --git a/internal/resolver/resolver.go b/internal/resolver/resolver.go index 878551f8..8b9e4fc1 100644 --- a/internal/resolver/resolver.go +++ b/internal/resolver/resolver.go @@ -2,6 +2,7 @@ package resolver import ( "github.com/benhoyt/goawk/internal/ast" + "github.com/benhoyt/goawk/internal/compiler" "github.com/benhoyt/goawk/lexer" ) @@ -25,27 +26,24 @@ type resolver struct { funcIdx int } -type ResolveResult struct { -} - -type Program struct { - ast.Program -} - type ResolverConfig struct { NativeFuncs map[string]interface{} } -func Resolve(prog *Program, config *ResolverConfig) (resolveResult *ResolveResult, err error) { +func Resolve(prog *ast.Program, config *ResolverConfig) (resolvedProg *compiler.Program, err error) { r := &resolver{} + resolvedProg = &compiler.Program{ + Program: *prog, + } r.initResolve(config) ast.Walk(r, prog) r.resolveUserCalls(prog) - r.resolveVars(prog) + r.resolveVars(resolvedProg) //r.checkMultiExprs() + return resolvedProg, nil } func (r *resolver) Visit(node ast.Node) ast.Visitor { @@ -76,7 +74,7 @@ func (r *resolver) Visit(node ast.Node) ast.Visitor { case *ast.UserCallExpr: name := n.Name if r.locals[name] { - panic(p.errorf("can't call local variable %q as function", name)) + panic(r.errorf("can't call local variable %q as function", name)) } for i, arg := range n.Args { r.processUserCallArg(name, arg, i) diff --git a/parser/parser.go b/parser/parser.go index 45c77e6d..c7d898bd 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -72,15 +72,15 @@ func ParseProgram(src []byte, config *ParserConfig) (prog *Program, err error) { p.next() // initialize p.tok // Parse into abstract syntax tree - prog = p.program() + astProg := p.program() - result, err := resolver.Resolve(prog.asResolverSource(), resolverConfig) + resolvedProgram, err := resolver.Resolve(astProg, resolverConfig) if err != nil { return nil, err } // Compile to virtual machine code - prog.Compiled, err = compiler.Compile(prog.asCompilerSource()) + prog.Compiled, err = compiler.Compile(resolvedProgram) return prog, err } @@ -121,22 +121,6 @@ func (p *Program) toAST() *ast.Program { } } -// asCompilerSource converts the *Program to an *compiler.Program. -func (p *Program) asCompilerSource() *compiler.Program { - return &compiler.Program{ - Program: *p.toAST(), - Scalars: p.Scalars, - Arrays: p.Arrays, - } -} - -// asResolverSource converts the *Program to an *resolver.Program. -func (p *Program) asResolverSource() *resolver.Program { - return &resolver.Program{ - Program: *p.toAST(), - } -} - // Parser state type parser struct { // Lexer instance and current token values @@ -169,8 +153,8 @@ type parser struct { } // Parse an entire AWK program. -func (p *parser) program() *Program { - prog := &Program{} +func (p *parser) program() *ast.Program { + prog := &ast.Program{} p.optionalNewlines() for p.tok != EOF { switch p.tok { From 04e70d7db295d3c89750b5936a0de1e53bcbb7ea Mon Sep 17 00:00:00 2001 From: xonix Date: Fri, 26 Aug 2022 01:11:53 +0300 Subject: [PATCH 13/57] decoupling parse/resolve : rfct --- internal/compiler/compiler.go | 16 +++++----------- internal/compiler/disassembler.go | 4 ++-- internal/compiler/disassembler_test.go | 2 +- internal/resolver/resolve.go | 7 +++---- internal/resolver/resolver.go | 12 +++++++++--- parser/parser.go | 9 ++------- 6 files changed, 22 insertions(+), 28 deletions(-) diff --git a/internal/compiler/compiler.go b/internal/compiler/compiler.go index 9fa8b681..6ede41a9 100644 --- a/internal/compiler/compiler.go +++ b/internal/compiler/compiler.go @@ -3,6 +3,7 @@ package compiler import ( "fmt" + "github.com/benhoyt/goawk/internal/resolver" "math" "regexp" @@ -10,15 +11,8 @@ import ( "github.com/benhoyt/goawk/lexer" ) -// Program is an entire AWK program used as an input for compiler. +// Program holds an entire compiled program. type Program struct { - ast.Program - Scalars map[string]int - Arrays map[string]int -} - -// CompiledProgram holds an entire compiled program. -type CompiledProgram struct { Begin []Opcode Actions []Action End []Opcode @@ -61,7 +55,7 @@ func (e *compileError) Error() string { } // Compile compiles an AST (parsed program) into virtual machine instructions. -func Compile(prog *Program) (compiledProg *CompiledProgram, err error) { +func Compile(prog *resolver.Program) (compiledProg *Program, err error) { defer func() { // The compiler uses panic with a *compileError to signal compile // errors internally, and they're caught here. This avoids the @@ -72,7 +66,7 @@ func Compile(prog *Program) (compiledProg *CompiledProgram, err error) { } }() - p := &CompiledProgram{} + p := &Program{} // Reuse identical constants across entire program. indexes := constantIndexes{ @@ -173,7 +167,7 @@ type constantIndexes struct { // Holds the compilation state. type compiler struct { - program *CompiledProgram + program *Program indexes constantIndexes code []Opcode breaks [][]int diff --git a/internal/compiler/disassembler.go b/internal/compiler/disassembler.go index e1975550..d5dc9594 100644 --- a/internal/compiler/disassembler.go +++ b/internal/compiler/disassembler.go @@ -13,7 +13,7 @@ import ( // Disassemble writes a human-readable form of the program's virtual machine // instructions to writer. -func (p *CompiledProgram) Disassemble(writer io.Writer) error { +func (p *Program) Disassemble(writer io.Writer) error { if p.Begin != nil { d := &disassembler{ program: p, @@ -110,7 +110,7 @@ func (p *CompiledProgram) Disassemble(writer io.Writer) error { // Disassembles a single block of opcodes. type disassembler struct { - program *CompiledProgram + program *Program writer io.Writer code []Opcode nativeFuncNames []string diff --git a/internal/compiler/disassembler_test.go b/internal/compiler/disassembler_test.go index db9b9770..297224d5 100644 --- a/internal/compiler/disassembler_test.go +++ b/internal/compiler/disassembler_test.go @@ -12,7 +12,7 @@ func TestDisassembler(t *testing.T) { // disassembly includes the opcode name, to help catch silly typos. for op := Nop; op < EndOpcode; op++ { t.Run(op.String(), func(t *testing.T) { - p := CompiledProgram{ + p := Program{ Begin: []Opcode{op, 0, 0, 0, 0, 0, 0, 0}, Functions: []Function{ { diff --git a/internal/resolver/resolve.go b/internal/resolver/resolve.go index cda40133..9a48527b 100644 --- a/internal/resolver/resolve.go +++ b/internal/resolver/resolve.go @@ -5,7 +5,6 @@ package resolver import ( "fmt" - "github.com/benhoyt/goawk/internal/compiler" "reflect" "sort" @@ -212,7 +211,7 @@ func (r *resolver) arrayRef(name string, pos Position) *ast.ArrayExpr { } // Print variable type information (for debugging) on p.debugWriter -func (r *resolver) printVarTypes(prog *compiler.Program) { +func (r *resolver) printVarTypes(prog *Program) { fmt.Fprintf(r.debugWriter, "scalars: %v\n", prog.Scalars) fmt.Fprintf(r.debugWriter, "arrays: %v\n", prog.Arrays) funcNames := []string{} @@ -240,7 +239,7 @@ func (r *resolver) printVarTypes(prog *compiler.Program) { // Resolve unknown variables types and generate variable indexes and // name-to-index mappings for interpreter -func (r *resolver) resolveVars(prog *compiler.Program) { +func (r *resolver) resolveVars(prog *Program) { // First go through all unknown types and try to determine the // type from the parameter type in that function definition. // Iterate through functions in topological order, for example @@ -425,7 +424,7 @@ func (r *resolver) resolveVars(prog *compiler.Program) { // If name refers to a local (in function inFunc), return that // function's name, otherwise return "" (meaning global). -func (r *resolver) getVarFuncName(prog *compiler.Program, name, inFunc string) string { +func (r *resolver) getVarFuncName(prog *Program, name, inFunc string) string { if inFunc == "" { return "" } diff --git a/internal/resolver/resolver.go b/internal/resolver/resolver.go index 8b9e4fc1..23caabe6 100644 --- a/internal/resolver/resolver.go +++ b/internal/resolver/resolver.go @@ -2,7 +2,6 @@ package resolver import ( "github.com/benhoyt/goawk/internal/ast" - "github.com/benhoyt/goawk/internal/compiler" "github.com/benhoyt/goawk/lexer" ) @@ -30,9 +29,16 @@ type ResolverConfig struct { NativeFuncs map[string]interface{} } -func Resolve(prog *ast.Program, config *ResolverConfig) (resolvedProg *compiler.Program, err error) { +// Program represents the resolved program. +type Program struct { + ast.Program + Scalars map[string]int + Arrays map[string]int +} + +func Resolve(prog *ast.Program, config *ResolverConfig) (resolvedProg *Program, err error) { r := &resolver{} - resolvedProg = &compiler.Program{ + resolvedProg = &Program{ Program: *prog, } r.initResolve(config) diff --git a/parser/parser.go b/parser/parser.go index c7d898bd..33704905 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -90,13 +90,8 @@ type Program struct { // but are exported for the interpreter (Program itself needs to // be exported in package "parser", otherwise these could live in // "internal/ast".) - Begin []ast.Stmts - Actions []ast.Action - End []ast.Stmts - Functions []ast.Function - Scalars map[string]int - Arrays map[string]int - Compiled *compiler.CompiledProgram + resolver.Program + Compiled *compiler.Program } // String returns an indented, pretty-printed version of the parsed From 6ff111c4462007aa61614b683b173a9c85524794 Mon Sep 17 00:00:00 2001 From: xonix Date: Fri, 26 Aug 2022 01:22:38 +0300 Subject: [PATCH 14/57] decoupling parse/resolve : rfct --- internal/resolver/resolver.go | 1 + parser/parser.go | 14 +++----------- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/internal/resolver/resolver.go b/internal/resolver/resolver.go index 23caabe6..844e5649 100644 --- a/internal/resolver/resolver.go +++ b/internal/resolver/resolver.go @@ -37,6 +37,7 @@ type Program struct { } func Resolve(prog *ast.Program, config *ResolverConfig) (resolvedProg *Program, err error) { + // TODO errors handling via panic recover r := &resolver{} resolvedProg = &Program{ Program: *prog, diff --git a/parser/parser.go b/parser/parser.go index 33704905..dc1fedce 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -75,12 +75,14 @@ func ParseProgram(src []byte, config *ParserConfig) (prog *Program, err error) { astProg := p.program() resolvedProgram, err := resolver.Resolve(astProg, resolverConfig) + prog.Program = *resolvedProgram if err != nil { return nil, err } // Compile to virtual machine code prog.Compiled, err = compiler.Compile(resolvedProgram) + return prog, err } @@ -97,7 +99,7 @@ type Program struct { // String returns an indented, pretty-printed version of the parsed // program. func (p *Program) String() string { - return p.toAST().String() + return p.Program.Program.String() } // Disassemble writes a human-readable form of the program's virtual machine @@ -106,16 +108,6 @@ func (p *Program) Disassemble(writer io.Writer) error { return p.Compiled.Disassemble(writer) } -// toAST converts the *Program to an *ast.Program. -func (p *Program) toAST() *ast.Program { - return &ast.Program{ - Begin: p.Begin, - Actions: p.Actions, - End: p.End, - Functions: p.Functions, - } -} - // Parser state type parser struct { // Lexer instance and current token values From d1d5edf841985165045aea0f815d781fa3e9a3d4 Mon Sep 17 00:00:00 2001 From: xonix Date: Tue, 30 Aug 2022 00:37:10 +0300 Subject: [PATCH 15/57] decoupling parse/resolve : work on sufficient position wiring for resolve --- internal/ast/ast.go | 2 ++ internal/resolver/resolve.go | 19 ++++++++++--------- internal/resolver/resolver.go | 13 +++++++------ parser/parser.go | 28 +++++++++++++++------------- 4 files changed, 34 insertions(+), 28 deletions(-) diff --git a/internal/ast/ast.go b/internal/ast/ast.go index b9dd4d48..2a27fb6f 100644 --- a/internal/ast/ast.go +++ b/internal/ast/ast.go @@ -322,6 +322,7 @@ type UserCallExpr struct { Index int Name string Args []Expr + Pos Position } func (e *UserCallExpr) String() string { @@ -593,6 +594,7 @@ type Function struct { Params []string Arrays []bool Body Stmts + Pos Position } func (f *Function) String() string { diff --git a/internal/resolver/resolve.go b/internal/resolver/resolve.go index 9a48527b..5c5d8e67 100644 --- a/internal/resolver/resolve.go +++ b/internal/resolver/resolve.go @@ -10,6 +10,7 @@ import ( "github.com/benhoyt/goawk/internal/ast" . "github.com/benhoyt/goawk/lexer" + "github.com/benhoyt/goawk/parser" ) type varType int @@ -133,11 +134,11 @@ func (r *resolver) resolveUserCalls(prog *ast.Program) { if !ok { f, haveNative := r.nativeFuncs[c.call.Name] if !haveNative { - panic(r.posErrorf(c.pos, "undefined function %q", c.call.Name)) + panic(parser.PosErrorf(c.pos, "undefined function %q", c.call.Name)) } typ := reflect.TypeOf(f) if !typ.IsVariadic() && len(c.call.Args) > typ.NumIn() { - panic(r.posErrorf(c.pos, "%q called with more arguments than declared", c.call.Name)) + panic(parser.PosErrorf(c.pos, "%q called with more arguments than declared", c.call.Name)) } c.call.Native = true c.call.Index = nativeIndexes[c.call.Name] @@ -145,7 +146,7 @@ func (r *resolver) resolveUserCalls(prog *ast.Program) { } function := prog.Functions[index] if len(c.call.Args) > len(function.Params) { - panic(r.posErrorf(c.pos, "%q called with more arguments than declared", c.call.Name)) + panic(parser.PosErrorf(c.pos, "%q called with more arguments than declared", c.call.Name)) } c.call.Index = index } @@ -373,7 +374,7 @@ func (r *resolver) resolveVars(prog *Program) { funcName := r.getVarFuncName(prog, varExpr.Name, c.inFunc) info := r.varTypes[funcName][varExpr.Name] if info.typ == typeArray { - panic(r.posErrorf(c.pos, "can't pass array %q to native function", varExpr.Name)) + panic(parser.PosErrorf(c.pos, "can't pass array %q to native function", varExpr.Name)) } } continue @@ -385,17 +386,17 @@ func (r *resolver) resolveVars(prog *Program) { varExpr, ok := arg.(*ast.VarExpr) if !ok { if function.Arrays[i] { - panic(r.posErrorf(c.pos, "can't pass scalar %s as array param", arg)) + panic(parser.PosErrorf(c.pos, "can't pass scalar %s as array param", arg)) } continue } funcName := r.getVarFuncName(prog, varExpr.Name, c.inFunc) info := r.varTypes[funcName][varExpr.Name] if info.typ == typeArray && !function.Arrays[i] { - panic(r.posErrorf(c.pos, "can't pass array %q as scalar param", varExpr.Name)) + panic(parser.PosErrorf(c.pos, "can't pass array %q as scalar param", varExpr.Name)) } if info.typ != typeArray && function.Arrays[i] { - panic(r.posErrorf(c.pos, "can't pass scalar %q as array param", varExpr.Name)) + panic(parser.PosErrorf(c.pos, "can't pass scalar %q as array param", varExpr.Name)) } } } @@ -409,14 +410,14 @@ func (r *resolver) resolveVars(prog *Program) { for _, varRef := range r.varRefs { info := r.varTypes[varRef.funcName][varRef.ref.Name] if info.typ == typeArray && !varRef.isArg { - panic(r.posErrorf(varRef.pos, "can't use array %q as scalar", varRef.ref.Name)) + panic(parser.PosErrorf(varRef.pos, "can't use array %q as scalar", varRef.ref.Name)) } varRef.ref.Index = info.index } for _, arrayRef := range r.arrayRefs { info := r.varTypes[arrayRef.funcName][arrayRef.ref.Name] if info.typ == typeScalar { - panic(r.posErrorf(arrayRef.pos, "can't use scalar %q as array", arrayRef.ref.Name)) + panic(parser.PosErrorf(arrayRef.pos, "can't use scalar %q as array", arrayRef.ref.Name)) } arrayRef.ref.Index = info.index } diff --git a/internal/resolver/resolver.go b/internal/resolver/resolver.go index 844e5649..419d12a2 100644 --- a/internal/resolver/resolver.go +++ b/internal/resolver/resolver.go @@ -3,6 +3,7 @@ package resolver import ( "github.com/benhoyt/goawk/internal/ast" "github.com/benhoyt/goawk/lexer" + "github.com/benhoyt/goawk/parser" ) type resolver struct { @@ -60,14 +61,14 @@ func (r *resolver) Visit(node ast.Node) ast.Visitor { function := n name := function.Name if _, ok := r.functions[name]; ok { - panic(r.errorf("function %q already defined", name)) + panic(parser.PosErrorf(function.Pos, "function %q already defined", name)) } r.addFunction(name) r.locals = make(map[string]bool, 7) for _, param := range function.Params { - if r.locals[param] { - panic(r.errorf("duplicate parameter name %q", param)) - } + //if r.locals[param] { + // panic(parser.PosErrorf(function.ParamsPos[i], "duplicate parameter name %q", param)) + //} r.locals[param] = true } @@ -81,12 +82,12 @@ func (r *resolver) Visit(node ast.Node) ast.Visitor { case *ast.UserCallExpr: name := n.Name if r.locals[name] { - panic(r.errorf("can't call local variable %q as function", name)) + panic(parser.PosErrorf(n.Pos, "can't call local variable %q as function", name)) } for i, arg := range n.Args { r.processUserCallArg(name, arg, i) } - r.recordUserCall(n, pos) + r.recordUserCall(n, n.Pos) default: return r } diff --git a/parser/parser.go b/parser/parser.go index dc1fedce..2ce41c87 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -75,11 +75,12 @@ func ParseProgram(src []byte, config *ParserConfig) (prog *Program, err error) { astProg := p.program() resolvedProgram, err := resolver.Resolve(astProg, resolverConfig) - prog.Program = *resolvedProgram if err != nil { return nil, err } + prog.Program = *resolvedProgram + // Compile to virtual machine code prog.Compiled, err = compiler.Compile(resolvedProgram) @@ -432,11 +433,12 @@ func (p *parser) function() ast.Function { //if _, ok := p.functions[name]; ok { // panic(p.errorf("function %q already defined", name)) //} + funcNamePos := p.pos p.expect(NAME) p.expect(LPAREN) first := true params := make([]string, 0, 7) // pre-allocate some to reduce allocations - //p.locals = make(map[string]bool, 7) + locals := make(map[string]bool, 7) for p.tok != RPAREN { if !first { p.commaNewlines() @@ -446,12 +448,12 @@ func (p *parser) function() ast.Function { if param == name { panic(p.errorf("can't use function name as parameter name")) } - //if p.locals[param] { - // panic(p.errorf("duplicate parameter name %q", param)) - //} + if locals[param] { + panic(p.errorf("duplicate parameter name %q", param)) + } p.expect(NAME) params = append(params, param) - //p.locals[param] = true + locals[param] = true } p.expect(RPAREN) p.optionalNewlines() @@ -462,7 +464,7 @@ func (p *parser) function() ast.Function { //p.stopFunction() //p.locals = nil - return ast.Function{name, params, nil, body} + return ast.Function{name, params, nil, body, funcNamePos} } // Parse expressions separated by commas: args to print[f] or user @@ -662,7 +664,7 @@ func (p *parser) preIncr() ast.Expr { exprPos := p.pos expr := p.preIncr() if !ast.IsLValue(expr) { - panic(p.posErrorf(exprPos, "expected lvalue after ++ or --")) + panic(PosErrorf(exprPos, "expected lvalue after ++ or --")) } return &ast.IncrExpr{expr, op, true} } @@ -777,7 +779,7 @@ func (p *parser) primary() ast.Expr { inPos := p.pos in := p.expr() if !ast.IsLValue(in) { - panic(p.posErrorf(inPos, "3rd arg to sub/gsub must be lvalue")) + panic(PosErrorf(inPos, "3rd arg to sub/gsub must be lvalue")) } args = append(args, in) } @@ -1009,11 +1011,11 @@ func (p *parser) matches(operators ...Token) bool { // Format given string and args with Sprintf and return *ParseError // with that message and the current position. func (p *parser) errorf(format string, args ...interface{}) error { - return p.posErrorf(p.pos, format, args...) + return PosErrorf(p.pos, format, args...) } // Like errorf, but with an explicit position. -func (p *parser) posErrorf(pos Position, format string, args ...interface{}) error { +func PosErrorf(pos Position, format string, args ...interface{}) error { message := fmt.Sprintf(format, args...) return &ParseError{pos, message} } @@ -1034,7 +1036,7 @@ func (p *parser) userCall(name string, pos Position) *ast.UserCallExpr { i++ } p.expect(RPAREN) - call := &ast.UserCallExpr{false, -1, name, args} // index is resolved later + call := &ast.UserCallExpr{false, -1, name, args, pos} // index is resolved later //p.recordUserCall(call, pos) return call } @@ -1072,5 +1074,5 @@ func (p *parser) checkMultiExprs() { min = pos } } - panic(p.posErrorf(min, "unexpected comma-separated expression")) + panic(PosErrorf(min, "unexpected comma-separated expression")) } From 945e48ea778f4ff2870cca8ad3dc4f74094cb997 Mon Sep 17 00:00:00 2001 From: xonix Date: Tue, 30 Aug 2022 01:13:40 +0300 Subject: [PATCH 16/57] decoupling parse/resolve : varRef/arrayRef --- internal/ast/ast.go | 15 +++++++++++++-- internal/resolver/resolve.go | 26 ++++++++++++-------------- parser/parser.go | 19 ++++++------------- 3 files changed, 31 insertions(+), 29 deletions(-) diff --git a/internal/ast/ast.go b/internal/ast/ast.go index 2a27fb6f..b1dcaa17 100644 --- a/internal/ast/ast.go +++ b/internal/ast/ast.go @@ -159,6 +159,7 @@ type ArrayExpr struct { Scope VarScope Index int Name string + Pos Position } func (e *ArrayExpr) String() string { @@ -226,14 +227,15 @@ func (e *RegExpr) String() string { return "/" + escaped + "/" } +// meaning it will be set during resolve step +const WillBeResolvedLater = -1 + type VarScope int const ( ScopeSpecial VarScope = iota ScopeGlobal ScopeLocal - - ScopeUnresolved VarScope = -1 ) // VarExpr is a variable reference (special var, global, or local). @@ -243,6 +245,7 @@ type VarExpr struct { Scope VarScope Index int Name string + Pos Position } func (e *VarExpr) String() string { @@ -608,3 +611,11 @@ func trimParens(s string) string { } return s } + +func VarRef(name string, pos Position) *VarExpr { + return &VarExpr{WillBeResolvedLater, WillBeResolvedLater, name, pos} +} + +func ArrayRef(name string, pos Position) *ArrayExpr { + return &ArrayExpr{WillBeResolvedLater, WillBeResolvedLater, name, pos} +} diff --git a/internal/resolver/resolve.go b/internal/resolver/resolve.go index 5c5d8e67..0b2e0758 100644 --- a/internal/resolver/resolve.go +++ b/internal/resolver/resolve.go @@ -62,14 +62,12 @@ type varRef struct { funcName string ref *ast.VarExpr isArg bool - pos Position } // A single array reference type arrayRef struct { funcName string ref *ast.ArrayExpr - pos Position } // Initialize the resolver @@ -78,9 +76,9 @@ func (r *resolver) initResolve(config *ResolverConfig) { r.varTypes = make(map[string]map[string]typeInfo) r.varTypes[""] = make(map[string]typeInfo) // globals r.functions = make(map[string]int) - r.arrayRef("ARGV", Position{1, 1}) // interpreter relies on ARGV being present - r.arrayRef("ENVIRON", Position{1, 1}) // and other built-in arrays - r.arrayRef("FIELDS", Position{1, 1}) + r.recordArrayRef(ast.ArrayRef("ARGV", Position{1, 1})) // interpreter relies on ARGV being present + r.recordArrayRef(ast.ArrayRef("ENVIRON", Position{1, 1})) // and other built-in arrays + r.recordArrayRef(ast.ArrayRef("FIELDS", Position{1, 1})) r.multiExprs = make(map[*ast.MultiExpr]Position, 3) } @@ -184,31 +182,31 @@ func (r *resolver) getScope(name string) (ast.VarScope, string) { // Record a variable (scalar) reference and return the *VarExpr (but // VarExpr.Index won't be set till later) -func (r *resolver) varRef(name string, pos Position) *ast.VarExpr { +func (r *resolver) recordVarRef(expr *ast.VarExpr) { + name := expr.Name scope, funcName := r.getScope(name) - expr := &ast.VarExpr{scope, 0, name} - r.varRefs = append(r.varRefs, varRef{funcName, expr, false, pos}) + r.varRefs = append(r.varRefs, varRef{funcName, expr, false}) info := r.varTypes[funcName][name] if info.typ == typeUnknown { r.varTypes[funcName][name] = typeInfo{typeScalar, expr, scope, 0, info.callName, 0} } - return expr } // Record an array reference and return the *ArrayExpr (but // ArrayExpr.Index won't be set till later) -func (r *resolver) arrayRef(name string, pos Position) *ast.ArrayExpr { +func (r *resolver) recordArrayRef(expr *ast.ArrayExpr) { + name := expr.Name scope, funcName := r.getScope(name) if scope == ast.ScopeSpecial { - panic(r.errorf("can't use scalar %q as array", name)) + panic(parser.PosErrorf(expr.Pos, "can't use scalar %q as array", name)) } - expr := &ast.ArrayExpr{scope, 0, name} - r.arrayRefs = append(r.arrayRefs, arrayRef{funcName, expr, pos}) + expr.Scope = scope + //expr := &ast.ArrayExpr{scope, 0, name} + r.arrayRefs = append(r.arrayRefs, arrayRef{funcName, expr}) info := r.varTypes[funcName][name] if info.typ == typeUnknown { r.varTypes[funcName][name] = typeInfo{typeArray, nil, scope, 0, info.callName, 0} } - return expr } // Print variable type information (for debugging) on p.debugWriter diff --git a/parser/parser.go b/parser/parser.go index 2ce41c87..5098d2b4 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -246,7 +246,7 @@ func (p *parser) simpleStmt() ast.Stmt { } case DELETE: p.next() - ref := p.arrayRef(p.val, p.pos) + ref := ast.ArrayRef(p.val, p.pos) p.expect(NAME) var index []ast.Expr if p.tok == LBRACKET { @@ -587,7 +587,7 @@ func (p *parser) _in(higher func() ast.Expr) ast.Expr { expr := higher() for p.tok == IN { p.next() - ref := p.arrayRef(p.val, p.pos) + ref := ast.ArrayRef(p.val, p.pos) p.expect(NAME) expr = &ast.InExpr{[]ast.Expr{expr}, ref} } @@ -907,9 +907,9 @@ func (p *parser) optionalLValue() ast.Expr { panic(p.errorf("expected expression instead of ]")) } p.expect(RBRACKET) - return &ast.IndexExpr{p.arrayRef(name, namePos), index} + return &ast.IndexExpr{ast.ArrayRef(name, namePos), index} } - return p.varRef(name, namePos) + return ast.VarRef(name, namePos) case DOLLAR: p.next() return &ast.FieldExpr{p.primary()} @@ -1015,6 +1015,7 @@ func (p *parser) errorf(format string, args ...interface{}) error { } // Like errorf, but with an explicit position. +// TODO this should be internal or not exported func PosErrorf(pos Position, format string, args ...interface{}) error { message := fmt.Sprintf(format, args...) return &ParseError{pos, message} @@ -1036,19 +1037,11 @@ func (p *parser) userCall(name string, pos Position) *ast.UserCallExpr { i++ } p.expect(RPAREN) - call := &ast.UserCallExpr{false, -1, name, args, pos} // index is resolved later + call := &ast.UserCallExpr{false, ast.WillBeResolvedLater, name, args, pos} //p.recordUserCall(call, pos) return call } -func (p *parser) varRef(name string, pos Position) *ast.VarExpr { - return &ast.VarExpr{ast.ScopeUnresolved, 0, name} -} - -func (p *parser) arrayRef(name string, pos Position) *ast.ArrayExpr { - return &ast.ArrayExpr{ast.ScopeUnresolved, 0, name} -} - // Record a "multi expression" (comma-separated pseudo-expression // used to allow commas around print/printf arguments). func (p *parser) multiExpr(exprs []ast.Expr, pos Position) ast.Expr { From 6727e86494e7bafd510f8dd8ccfd8bcc593d4c05 Mon Sep 17 00:00:00 2001 From: xonix Date: Tue, 30 Aug 2022 01:18:28 +0300 Subject: [PATCH 17/57] decoupling parse/resolve : varRef/arrayRef --- internal/resolver/resolver.go | 6 ++++++ parser/parser.go | 8 ++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/internal/resolver/resolver.go b/internal/resolver/resolver.go index 419d12a2..404989cc 100644 --- a/internal/resolver/resolver.go +++ b/internal/resolver/resolver.go @@ -79,6 +79,12 @@ func (r *resolver) Visit(node ast.Node) ast.Visitor { r.stopFunction() r.locals = nil + case *ast.VarExpr: + r.recordVarRef(n) + + case *ast.ArrayExpr: + r.recordArrayRef(n) + case *ast.UserCallExpr: name := n.Name if r.locals[name] { diff --git a/parser/parser.go b/parser/parser.go index 5098d2b4..5276fce5 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -720,7 +720,7 @@ func (p *parser) primary() ast.Expr { panic(p.errorf("expected expression instead of ]")) } p.expect(RBRACKET) - return &ast.IndexExpr{p.arrayRef(name, namePos), index} + return &ast.IndexExpr{ast.ArrayRef(name, namePos), index} } else if p.tok == LPAREN && !p.lexer.HadSpace() { //if p.locals[name] { // panic(p.errorf("can't call local variable %q as function", name)) @@ -730,7 +730,7 @@ func (p *parser) primary() ast.Expr { // lexer.HadSpace() method. return p.userCall(name, namePos) } - return p.varRef(name, namePos) + return ast.VarRef(name, namePos) case LPAREN: parenPos := p.pos p.next() @@ -746,7 +746,7 @@ func (p *parser) primary() ast.Expr { p.expect(RPAREN) if p.tok == IN { p.next() - ref := p.arrayRef(p.val, p.pos) + ref := ast.ArrayRef(p.val, p.pos) p.expect(NAME) return &ast.InExpr{exprs, ref} } @@ -790,7 +790,7 @@ func (p *parser) primary() ast.Expr { p.expect(LPAREN) str := p.expr() p.commaNewlines() - ref := p.arrayRef(p.val, p.pos) + ref := ast.ArrayRef(p.val, p.pos) p.expect(NAME) args := []ast.Expr{str, ref} if p.tok == COMMA { From d6685479b1ed4f9cf43fb767d14be1f081a197b3 Mon Sep 17 00:00:00 2001 From: xonix Date: Tue, 30 Aug 2022 01:29:05 +0300 Subject: [PATCH 18/57] decoupling parse/resolve --- internal/resolver/resolver.go | 5 ++--- parser/parser.go | 10 +++------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/internal/resolver/resolver.go b/internal/resolver/resolver.go index 404989cc..4fdb8afb 100644 --- a/internal/resolver/resolver.go +++ b/internal/resolver/resolver.go @@ -37,8 +37,7 @@ type Program struct { Arrays map[string]int } -func Resolve(prog *ast.Program, config *ResolverConfig) (resolvedProg *Program, err error) { - // TODO errors handling via panic recover +func Resolve(prog *ast.Program, config *ResolverConfig) (resolvedProg *Program) { r := &resolver{} resolvedProg = &Program{ Program: *prog, @@ -51,7 +50,7 @@ func Resolve(prog *ast.Program, config *ResolverConfig) (resolvedProg *Program, r.resolveVars(resolvedProg) //r.checkMultiExprs() - return resolvedProg, nil + return resolvedProg } func (r *resolver) Visit(node ast.Node) ast.Visitor { diff --git a/parser/parser.go b/parser/parser.go index 5276fce5..8034c016 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -74,15 +74,11 @@ func ParseProgram(src []byte, config *ParserConfig) (prog *Program, err error) { // Parse into abstract syntax tree astProg := p.program() - resolvedProgram, err := resolver.Resolve(astProg, resolverConfig) - if err != nil { - return nil, err - } - - prog.Program = *resolvedProgram + // Resolve step + prog.Program = *resolver.Resolve(astProg, resolverConfig) // Compile to virtual machine code - prog.Compiled, err = compiler.Compile(resolvedProgram) + prog.Compiled, err = compiler.Compile(&prog.Program) return prog, err } From 2b6f2e6ce24c740a313fa3c41fc6ce34c1ce2176 Mon Sep 17 00:00:00 2001 From: xonix Date: Tue, 30 Aug 2022 01:42:25 +0300 Subject: [PATCH 19/57] decoupling parse/resolve --- internal/ast/ast.go | 1 + internal/resolver/resolve.go | 14 +++++++++----- internal/resolver/resolver.go | 9 +++++---- parser/parser.go | 5 ++--- 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/internal/ast/ast.go b/internal/ast/ast.go index b1dcaa17..a73d6264 100644 --- a/internal/ast/ast.go +++ b/internal/ast/ast.go @@ -16,6 +16,7 @@ type Program struct { Actions []Action End []Stmts Functions []Function + EndPos Position } // String returns an indented, pretty-printed version of the parsed diff --git a/internal/resolver/resolve.go b/internal/resolver/resolve.go index 0b2e0758..81215373 100644 --- a/internal/resolver/resolve.go +++ b/internal/resolver/resolve.go @@ -71,8 +71,12 @@ type arrayRef struct { } // Initialize the resolver -func (r *resolver) initResolve(config *ResolverConfig) { - r.nativeFuncs = config.NativeFuncs +func (r *resolver) initResolve(config *parser.ParserConfig) { + if config != nil { + r.nativeFuncs = config.Funcs + r.debugTypes = config.DebugTypes + r.debugWriter = config.DebugWriter + } r.varTypes = make(map[string]map[string]typeInfo) r.varTypes[""] = make(map[string]typeInfo) // globals r.functions = make(map[string]int) @@ -286,7 +290,7 @@ func (r *resolver) resolveVars(prog *Program) { _, isFunc := r.functions[name] if isFunc { // Global var can't also be the name of a function - panic(r.errorf("global var %q can't also be a function", name)) + panic(parser.PosErrorf(prog.EndPos, "global var %q can't also be a function", name)) } var index int if info.scope == ast.ScopeSpecial { @@ -408,14 +412,14 @@ func (r *resolver) resolveVars(prog *Program) { for _, varRef := range r.varRefs { info := r.varTypes[varRef.funcName][varRef.ref.Name] if info.typ == typeArray && !varRef.isArg { - panic(parser.PosErrorf(varRef.pos, "can't use array %q as scalar", varRef.ref.Name)) + panic(parser.PosErrorf(varRef.ref.Pos, "can't use array %q as scalar", varRef.ref.Name)) } varRef.ref.Index = info.index } for _, arrayRef := range r.arrayRefs { info := r.varTypes[arrayRef.funcName][arrayRef.ref.Name] if info.typ == typeScalar { - panic(parser.PosErrorf(arrayRef.pos, "can't use scalar %q as array", arrayRef.ref.Name)) + panic(parser.PosErrorf(arrayRef.ref.Pos, "can't use scalar %q as array", arrayRef.ref.Name)) } arrayRef.ref.Index = info.index } diff --git a/internal/resolver/resolver.go b/internal/resolver/resolver.go index 4fdb8afb..e8123fea 100644 --- a/internal/resolver/resolver.go +++ b/internal/resolver/resolver.go @@ -4,6 +4,7 @@ import ( "github.com/benhoyt/goawk/internal/ast" "github.com/benhoyt/goawk/lexer" "github.com/benhoyt/goawk/parser" + "io" ) type resolver struct { @@ -24,10 +25,10 @@ type resolver struct { nativeFuncs map[string]interface{} funcIdx int -} -type ResolverConfig struct { - NativeFuncs map[string]interface{} + // Configuration and debugging + debugTypes bool // show variable types for debugging + debugWriter io.Writer // where the debug output goes } // Program represents the resolved program. @@ -37,7 +38,7 @@ type Program struct { Arrays map[string]int } -func Resolve(prog *ast.Program, config *ResolverConfig) (resolvedProg *Program) { +func Resolve(prog *ast.Program, config *parser.ParserConfig) (resolvedProg *Program) { r := &resolver{} resolvedProg = &Program{ Program: *prog, diff --git a/parser/parser.go b/parser/parser.go index 8034c016..cf33274a 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -62,11 +62,9 @@ func ParseProgram(src []byte, config *ParserConfig) (prog *Program, err error) { }() lexer := NewLexer(src) p := parser{lexer: lexer} - resolverConfig := &resolver.ResolverConfig{} if config != nil { p.debugTypes = config.DebugTypes p.debugWriter = config.DebugWriter - resolverConfig.NativeFuncs = config.Funcs } //p.initResolve() p.next() // initialize p.tok @@ -75,7 +73,7 @@ func ParseProgram(src []byte, config *ParserConfig) (prog *Program, err error) { astProg := p.program() // Resolve step - prog.Program = *resolver.Resolve(astProg, resolverConfig) + prog.Program = *resolver.Resolve(astProg, config) // Compile to virtual machine code prog.Compiled, err = compiler.Compile(&prog.Program) @@ -177,6 +175,7 @@ func (p *parser) program() *ast.Program { //p.resolveUserCalls(prog) //p.resolveVars(prog) p.checkMultiExprs() + prog.EndPos = p.pos return prog } From 2e26c0e5474e722e42a07d271974650d4dff48aa Mon Sep 17 00:00:00 2001 From: xonix Date: Tue, 30 Aug 2022 01:56:22 +0300 Subject: [PATCH 20/57] decoupling parse/resolve : fight with import cycles --- internal/ast/ast.go | 6 ++++++ internal/compiler/compiler.go | 3 +-- internal/resolver/resolver.go | 11 ++--------- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/internal/ast/ast.go b/internal/ast/ast.go index a73d6264..ad28be2b 100644 --- a/internal/ast/ast.go +++ b/internal/ast/ast.go @@ -19,6 +19,12 @@ type Program struct { EndPos Position } +type ResolvedProgram struct { + Program + Scalars map[string]int + Arrays map[string]int +} + // String returns an indented, pretty-printed version of the parsed // program. func (p *Program) String() string { diff --git a/internal/compiler/compiler.go b/internal/compiler/compiler.go index 6ede41a9..46872167 100644 --- a/internal/compiler/compiler.go +++ b/internal/compiler/compiler.go @@ -3,7 +3,6 @@ package compiler import ( "fmt" - "github.com/benhoyt/goawk/internal/resolver" "math" "regexp" @@ -55,7 +54,7 @@ func (e *compileError) Error() string { } // Compile compiles an AST (parsed program) into virtual machine instructions. -func Compile(prog *resolver.Program) (compiledProg *Program, err error) { +func Compile(prog *ast.ResolvedProgram) (compiledProg *Program, err error) { defer func() { // The compiler uses panic with a *compileError to signal compile // errors internally, and they're caught here. This avoids the diff --git a/internal/resolver/resolver.go b/internal/resolver/resolver.go index e8123fea..4f7f6f1a 100644 --- a/internal/resolver/resolver.go +++ b/internal/resolver/resolver.go @@ -31,16 +31,9 @@ type resolver struct { debugWriter io.Writer // where the debug output goes } -// Program represents the resolved program. -type Program struct { - ast.Program - Scalars map[string]int - Arrays map[string]int -} - -func Resolve(prog *ast.Program, config *parser.ParserConfig) (resolvedProg *Program) { +func Resolve(prog *ast.Program, config *parser.ParserConfig) (resolvedProg *ast.ResolvedProgram) { r := &resolver{} - resolvedProg = &Program{ + resolvedProg = &ast.ResolvedProgram{ Program: *prog, } r.initResolve(config) From b63e992a60059be8c8693ee355fb32a51680e809 Mon Sep 17 00:00:00 2001 From: xonix Date: Tue, 30 Aug 2022 02:17:26 +0300 Subject: [PATCH 21/57] decoupling parse/resolve : fight with import cycles --- internal/resolver/resolve.go | 30 +++++++++++++-------------- internal/resolver/resolver.go | 4 ++-- lexer/lexer.go | 20 ++++++++++++++++++ parser/parser.go | 38 ++++++++++------------------------- 4 files changed, 48 insertions(+), 44 deletions(-) diff --git a/internal/resolver/resolve.go b/internal/resolver/resolve.go index 81215373..4bd25fc8 100644 --- a/internal/resolver/resolve.go +++ b/internal/resolver/resolve.go @@ -5,12 +5,12 @@ package resolver import ( "fmt" + "github.com/benhoyt/goawk/parser" "reflect" "sort" "github.com/benhoyt/goawk/internal/ast" . "github.com/benhoyt/goawk/lexer" - "github.com/benhoyt/goawk/parser" ) type varType int @@ -136,11 +136,11 @@ func (r *resolver) resolveUserCalls(prog *ast.Program) { if !ok { f, haveNative := r.nativeFuncs[c.call.Name] if !haveNative { - panic(parser.PosErrorf(c.pos, "undefined function %q", c.call.Name)) + panic(c.pos.Errorf("undefined function %q", c.call.Name)) } typ := reflect.TypeOf(f) if !typ.IsVariadic() && len(c.call.Args) > typ.NumIn() { - panic(parser.PosErrorf(c.pos, "%q called with more arguments than declared", c.call.Name)) + panic(c.pos.Errorf("%q called with more arguments than declared", c.call.Name)) } c.call.Native = true c.call.Index = nativeIndexes[c.call.Name] @@ -148,7 +148,7 @@ func (r *resolver) resolveUserCalls(prog *ast.Program) { } function := prog.Functions[index] if len(c.call.Args) > len(function.Params) { - panic(parser.PosErrorf(c.pos, "%q called with more arguments than declared", c.call.Name)) + panic(c.pos.Errorf("%q called with more arguments than declared", c.call.Name)) } c.call.Index = index } @@ -202,7 +202,7 @@ func (r *resolver) recordArrayRef(expr *ast.ArrayExpr) { name := expr.Name scope, funcName := r.getScope(name) if scope == ast.ScopeSpecial { - panic(parser.PosErrorf(expr.Pos, "can't use scalar %q as array", name)) + panic(expr.Pos.Errorf("can't use scalar %q as array", name)) } expr.Scope = scope //expr := &ast.ArrayExpr{scope, 0, name} @@ -214,7 +214,7 @@ func (r *resolver) recordArrayRef(expr *ast.ArrayExpr) { } // Print variable type information (for debugging) on p.debugWriter -func (r *resolver) printVarTypes(prog *Program) { +func (r *resolver) printVarTypes(prog *ast.ResolvedProgram) { fmt.Fprintf(r.debugWriter, "scalars: %v\n", prog.Scalars) fmt.Fprintf(r.debugWriter, "arrays: %v\n", prog.Arrays) funcNames := []string{} @@ -242,7 +242,7 @@ func (r *resolver) printVarTypes(prog *Program) { // Resolve unknown variables types and generate variable indexes and // name-to-index mappings for interpreter -func (r *resolver) resolveVars(prog *Program) { +func (r *resolver) resolveVars(prog *ast.ResolvedProgram) { // First go through all unknown types and try to determine the // type from the parameter type in that function definition. // Iterate through functions in topological order, for example @@ -290,7 +290,7 @@ func (r *resolver) resolveVars(prog *Program) { _, isFunc := r.functions[name] if isFunc { // Global var can't also be the name of a function - panic(parser.PosErrorf(prog.EndPos, "global var %q can't also be a function", name)) + panic(prog.EndPos.Errorf("global var %q can't also be a function", name)) } var index int if info.scope == ast.ScopeSpecial { @@ -376,7 +376,7 @@ func (r *resolver) resolveVars(prog *Program) { funcName := r.getVarFuncName(prog, varExpr.Name, c.inFunc) info := r.varTypes[funcName][varExpr.Name] if info.typ == typeArray { - panic(parser.PosErrorf(c.pos, "can't pass array %q to native function", varExpr.Name)) + panic(c.pos.Errorf("can't pass array %q to native function", varExpr.Name)) } } continue @@ -388,17 +388,17 @@ func (r *resolver) resolveVars(prog *Program) { varExpr, ok := arg.(*ast.VarExpr) if !ok { if function.Arrays[i] { - panic(parser.PosErrorf(c.pos, "can't pass scalar %s as array param", arg)) + panic(c.pos.Errorf("can't pass scalar %s as array param", arg)) } continue } funcName := r.getVarFuncName(prog, varExpr.Name, c.inFunc) info := r.varTypes[funcName][varExpr.Name] if info.typ == typeArray && !function.Arrays[i] { - panic(parser.PosErrorf(c.pos, "can't pass array %q as scalar param", varExpr.Name)) + panic(c.pos.Errorf("can't pass array %q as scalar param", varExpr.Name)) } if info.typ != typeArray && function.Arrays[i] { - panic(parser.PosErrorf(c.pos, "can't pass scalar %q as array param", varExpr.Name)) + panic(c.pos.Errorf("can't pass scalar %q as array param", varExpr.Name)) } } } @@ -412,14 +412,14 @@ func (r *resolver) resolveVars(prog *Program) { for _, varRef := range r.varRefs { info := r.varTypes[varRef.funcName][varRef.ref.Name] if info.typ == typeArray && !varRef.isArg { - panic(parser.PosErrorf(varRef.ref.Pos, "can't use array %q as scalar", varRef.ref.Name)) + panic(varRef.ref.Pos.Errorf("can't use array %q as scalar", varRef.ref.Name)) } varRef.ref.Index = info.index } for _, arrayRef := range r.arrayRefs { info := r.varTypes[arrayRef.funcName][arrayRef.ref.Name] if info.typ == typeScalar { - panic(parser.PosErrorf(arrayRef.ref.Pos, "can't use scalar %q as array", arrayRef.ref.Name)) + panic(arrayRef.ref.Pos.Errorf("can't use scalar %q as array", arrayRef.ref.Name)) } arrayRef.ref.Index = info.index } @@ -427,7 +427,7 @@ func (r *resolver) resolveVars(prog *Program) { // If name refers to a local (in function inFunc), return that // function's name, otherwise return "" (meaning global). -func (r *resolver) getVarFuncName(prog *Program, name, inFunc string) string { +func (r *resolver) getVarFuncName(prog *ast.ResolvedProgram, name, inFunc string) string { if inFunc == "" { return "" } diff --git a/internal/resolver/resolver.go b/internal/resolver/resolver.go index 4f7f6f1a..28a36946 100644 --- a/internal/resolver/resolver.go +++ b/internal/resolver/resolver.go @@ -31,9 +31,9 @@ type resolver struct { debugWriter io.Writer // where the debug output goes } -func Resolve(prog *ast.Program, config *parser.ParserConfig) (resolvedProg *ast.ResolvedProgram) { +func Resolve(prog *ast.Program, config *parser.ParserConfig) *ast.ResolvedProgram { r := &resolver{} - resolvedProg = &ast.ResolvedProgram{ + resolvedProg := &ast.ResolvedProgram{ Program: *prog, } r.initResolve(config) diff --git a/lexer/lexer.go b/lexer/lexer.go index dc3a48dd..d30fb461 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -9,6 +9,7 @@ package lexer import ( "errors" + "fmt" ) // Lexer tokenizes a byte string of AWK source code. Use NewLexer to @@ -32,6 +33,25 @@ type Position struct { Column int } +type PositionError struct { + // Source line/column position where the error occurred. + Position Position + // Error message. + Message string +} + +// Like errorf, but with an explicit position. +func (pos Position) Errorf(format string, args ...interface{}) error { + message := fmt.Sprintf(format, args...) + return &PositionError{pos, message} +} + +// Error returns a formatted version of the error, including the line +// and column numbers. +func (e *PositionError) Error() string { + return fmt.Sprintf("parse error at %d:%d: %s", e.Position.Line, e.Position.Column, e.Message) +} + // NewLexer creates a new lexer that will tokenize the given source // code. See the module-level example for a working example. func NewLexer(src []byte) *Lexer { diff --git a/parser/parser.go b/parser/parser.go index cf33274a..8988e5b0 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -5,7 +5,6 @@ package parser import ( - "fmt" "github.com/benhoyt/goawk/internal/resolver" "io" "regexp" @@ -20,16 +19,7 @@ import ( // ParseError (actually *ParseError) is the type of error returned by // ParseProgram. type ParseError struct { - // Source line/column position where the error occurred. - Position Position - // Error message. - Message string -} - -// Error returns a formatted version of the error, including the line -// and column numbers. -func (e *ParseError) Error() string { - return fmt.Sprintf("parse error at %d:%d: %s", e.Position.Line, e.Position.Column, e.Message) + PositionError } // ParserConfig lets you specify configuration for the parsing @@ -51,13 +41,14 @@ type ParserConfig struct { // the parser configuration (and is allowed to be nil). func ParseProgram(src []byte, config *ParserConfig) (prog *Program, err error) { defer func() { + // TODO adjust description // The parser uses panic with a *ParseError to signal parsing // errors internally, and they're caught here. This // significantly simplifies the recursive descent calls as // we don't have to check errors everywhere. if r := recover(); r != nil { // Convert to ParseError or re-panic - err = r.(*ParseError) + err = &ParseError{*r.(*PositionError)} } }() lexer := NewLexer(src) @@ -73,10 +64,10 @@ func ParseProgram(src []byte, config *ParserConfig) (prog *Program, err error) { astProg := p.program() // Resolve step - prog.Program = *resolver.Resolve(astProg, config) + prog.ResolvedProgram = *resolver.Resolve(astProg, config) // Compile to virtual machine code - prog.Compiled, err = compiler.Compile(&prog.Program) + prog.Compiled, err = compiler.Compile(&prog.ResolvedProgram) return prog, err } @@ -87,14 +78,14 @@ type Program struct { // but are exported for the interpreter (Program itself needs to // be exported in package "parser", otherwise these could live in // "internal/ast".) - resolver.Program + ast.ResolvedProgram Compiled *compiler.Program } // String returns an indented, pretty-printed version of the parsed // program. func (p *Program) String() string { - return p.Program.Program.String() + return p.ResolvedProgram.Program.String() } // Disassemble writes a human-readable form of the program's virtual machine @@ -659,7 +650,7 @@ func (p *parser) preIncr() ast.Expr { exprPos := p.pos expr := p.preIncr() if !ast.IsLValue(expr) { - panic(PosErrorf(exprPos, "expected lvalue after ++ or --")) + panic(exprPos.Errorf("expected lvalue after ++ or --")) } return &ast.IncrExpr{expr, op, true} } @@ -774,7 +765,7 @@ func (p *parser) primary() ast.Expr { inPos := p.pos in := p.expr() if !ast.IsLValue(in) { - panic(PosErrorf(inPos, "3rd arg to sub/gsub must be lvalue")) + panic(inPos.Errorf("3rd arg to sub/gsub must be lvalue")) } args = append(args, in) } @@ -1006,14 +997,7 @@ func (p *parser) matches(operators ...Token) bool { // Format given string and args with Sprintf and return *ParseError // with that message and the current position. func (p *parser) errorf(format string, args ...interface{}) error { - return PosErrorf(p.pos, format, args...) -} - -// Like errorf, but with an explicit position. -// TODO this should be internal or not exported -func PosErrorf(pos Position, format string, args ...interface{}) error { - message := fmt.Sprintf(format, args...) - return &ParseError{pos, message} + return p.pos.Errorf(format, args...) } // Parse call to a user-defined function (and record call site for @@ -1062,5 +1046,5 @@ func (p *parser) checkMultiExprs() { min = pos } } - panic(PosErrorf(min, "unexpected comma-separated expression")) + panic(min.Errorf("unexpected comma-separated expression")) } From 3219855d14789a108fd59814f987af6fb9ba486a Mon Sep 17 00:00:00 2001 From: xonix Date: Tue, 30 Aug 2022 02:24:02 +0300 Subject: [PATCH 22/57] decoupling parse/resolve : fight with import cycles --- internal/resolver/resolve.go | 3 +-- internal/resolver/resolver.go | 21 ++++++++++++++++----- parser/parser.go | 13 ++++++++++++- 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/internal/resolver/resolve.go b/internal/resolver/resolve.go index 4bd25fc8..ce1ec48d 100644 --- a/internal/resolver/resolve.go +++ b/internal/resolver/resolve.go @@ -5,7 +5,6 @@ package resolver import ( "fmt" - "github.com/benhoyt/goawk/parser" "reflect" "sort" @@ -71,7 +70,7 @@ type arrayRef struct { } // Initialize the resolver -func (r *resolver) initResolve(config *parser.ParserConfig) { +func (r *resolver) initResolve(config *Config) { if config != nil { r.nativeFuncs = config.Funcs r.debugTypes = config.DebugTypes diff --git a/internal/resolver/resolver.go b/internal/resolver/resolver.go index 28a36946..4c2702db 100644 --- a/internal/resolver/resolver.go +++ b/internal/resolver/resolver.go @@ -3,7 +3,6 @@ package resolver import ( "github.com/benhoyt/goawk/internal/ast" "github.com/benhoyt/goawk/lexer" - "github.com/benhoyt/goawk/parser" "io" ) @@ -31,7 +30,19 @@ type resolver struct { debugWriter io.Writer // where the debug output goes } -func Resolve(prog *ast.Program, config *parser.ParserConfig) *ast.ResolvedProgram { +type Config struct { + // Enable printing of type information + DebugTypes bool + + // io.Writer to print type information on (for example, os.Stderr) + DebugWriter io.Writer + + // Map of named Go functions to allow calling from AWK. See docs + // on interp.Config.Funcs for details. + Funcs map[string]interface{} +} + +func Resolve(prog *ast.Program, config *Config) *ast.ResolvedProgram { r := &resolver{} resolvedProg := &ast.ResolvedProgram{ Program: *prog, @@ -54,13 +65,13 @@ func (r *resolver) Visit(node ast.Node) ast.Visitor { function := n name := function.Name if _, ok := r.functions[name]; ok { - panic(parser.PosErrorf(function.Pos, "function %q already defined", name)) + panic(function.Pos.Errorf("function %q already defined", name)) } r.addFunction(name) r.locals = make(map[string]bool, 7) for _, param := range function.Params { //if r.locals[param] { - // panic(parser.PosErrorf(function.ParamsPos[i], "duplicate parameter name %q", param)) + // panic(function.ParamsPos[i].Errorf( "duplicate parameter name %q", param)) //} r.locals[param] = true @@ -81,7 +92,7 @@ func (r *resolver) Visit(node ast.Node) ast.Visitor { case *ast.UserCallExpr: name := n.Name if r.locals[name] { - panic(parser.PosErrorf(n.Pos, "can't call local variable %q as function", name)) + panic(n.Pos.Errorf("can't call local variable %q as function", name)) } for i, arg := range n.Args { r.processUserCallArg(name, arg, i) diff --git a/parser/parser.go b/parser/parser.go index 8988e5b0..b351a747 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -36,6 +36,17 @@ type ParserConfig struct { Funcs map[string]interface{} } +func (c *ParserConfig) toResolverConfig() *resolver.Config { + if c == nil { + return nil + } + return &resolver.Config{ + DebugTypes: c.DebugTypes, + DebugWriter: c.DebugWriter, + Funcs: c.Funcs, + } +} + // ParseProgram parses an entire AWK program, returning the *Program // abstract syntax tree or a *ParseError on error. "config" describes // the parser configuration (and is allowed to be nil). @@ -64,7 +75,7 @@ func ParseProgram(src []byte, config *ParserConfig) (prog *Program, err error) { astProg := p.program() // Resolve step - prog.ResolvedProgram = *resolver.Resolve(astProg, config) + prog.ResolvedProgram = *resolver.Resolve(astProg, config.toResolverConfig()) // Compile to virtual machine code prog.Compiled, err = compiler.Compile(&prog.ResolvedProgram) From 25d2860c73bfe00238770336bd51c722248e4932 Mon Sep 17 00:00:00 2001 From: xonix Date: Tue, 30 Aug 2022 20:55:07 +0300 Subject: [PATCH 23/57] decoupling parse/resolve : fixing failing tests --- internal/ast/walk.go | 5 ++++- parser/parser.go | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/internal/ast/walk.go b/internal/ast/walk.go index 631cd191..7345e49e 100644 --- a/internal/ast/walk.go +++ b/internal/ast/walk.go @@ -30,6 +30,9 @@ func WalkStmtList(v Visitor, list []Stmt) { // w.Visit(nil). // func Walk(v Visitor, node Node) { + if node == nil { + return + } if v = v.Visit(node); v == nil { return } @@ -148,7 +151,7 @@ func Walk(v Visitor, node Node) { case *BlockStmt: WalkStmtList(v, n.Body) - case Program: + case *Program: for _, stmts := range n.Begin { WalkStmtList(v, stmts) } diff --git a/parser/parser.go b/parser/parser.go index b351a747..542e8679 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -74,6 +74,8 @@ func ParseProgram(src []byte, config *ParserConfig) (prog *Program, err error) { // Parse into abstract syntax tree astProg := p.program() + prog = &Program{} + // Resolve step prog.ResolvedProgram = *resolver.Resolve(astProg, config.toResolverConfig()) From fdd96be7a6a7d44d1dc0db2401a7663e1e830fe2 Mon Sep 17 00:00:00 2001 From: xonix Date: Tue, 30 Aug 2022 21:03:46 +0300 Subject: [PATCH 24/57] decoupling parse/resolve : fixing failing tests --- internal/resolver/resolve.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/resolver/resolve.go b/internal/resolver/resolve.go index ce1ec48d..fb1bc79a 100644 --- a/internal/resolver/resolve.go +++ b/internal/resolver/resolve.go @@ -188,6 +188,7 @@ func (r *resolver) getScope(name string) (ast.VarScope, string) { func (r *resolver) recordVarRef(expr *ast.VarExpr) { name := expr.Name scope, funcName := r.getScope(name) + expr.Scope = scope r.varRefs = append(r.varRefs, varRef{funcName, expr, false}) info := r.varTypes[funcName][name] if info.typ == typeUnknown { From 9399ab1ade943cd9b47aa4ac6b4c699357690f1d Mon Sep 17 00:00:00 2001 From: xonix Date: Tue, 30 Aug 2022 21:07:26 +0300 Subject: [PATCH 25/57] decoupling parse/resolve : fixing failing tests --- internal/resolver/resolve.go | 2 +- internal/resolver/resolver.go | 11 +++++------ parser/parser.go | 2 ++ 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/internal/resolver/resolve.go b/internal/resolver/resolve.go index fb1bc79a..e117538c 100644 --- a/internal/resolver/resolve.go +++ b/internal/resolver/resolve.go @@ -82,7 +82,7 @@ func (r *resolver) initResolve(config *Config) { r.recordArrayRef(ast.ArrayRef("ARGV", Position{1, 1})) // interpreter relies on ARGV being present r.recordArrayRef(ast.ArrayRef("ENVIRON", Position{1, 1})) // and other built-in arrays r.recordArrayRef(ast.ArrayRef("FIELDS", Position{1, 1})) - r.multiExprs = make(map[*ast.MultiExpr]Position, 3) + //r.multiExprs = make(map[*ast.MultiExpr]Position, 3) } // Signal the start of a function diff --git a/internal/resolver/resolver.go b/internal/resolver/resolver.go index 4c2702db..7788afec 100644 --- a/internal/resolver/resolver.go +++ b/internal/resolver/resolver.go @@ -2,7 +2,6 @@ package resolver import ( "github.com/benhoyt/goawk/internal/ast" - "github.com/benhoyt/goawk/lexer" "io" ) @@ -12,11 +11,11 @@ type resolver struct { funcName string // function name if parsing a func, else "" // Variable tracking and resolving - locals map[string]bool // current function's locals (for determining scope) - varTypes map[string]map[string]typeInfo // map of func name to var name to type - varRefs []varRef // all variable references (usually scalars) - arrayRefs []arrayRef // all array references - multiExprs map[*ast.MultiExpr]lexer.Position // tracks comma-separated expressions + locals map[string]bool // current function's locals (for determining scope) + varTypes map[string]map[string]typeInfo // map of func name to var name to type + varRefs []varRef // all variable references (usually scalars) + arrayRefs []arrayRef // all array references + //multiExprs map[*ast.MultiExpr]lexer.Position // tracks comma-separated expressions // Function tracking functions map[string]int // map of function name to index diff --git a/parser/parser.go b/parser/parser.go index 542e8679..939232d3 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -69,6 +69,8 @@ func ParseProgram(src []byte, config *ParserConfig) (prog *Program, err error) { p.debugWriter = config.DebugWriter } //p.initResolve() + p.multiExprs = make(map[*ast.MultiExpr]Position, 3) + p.next() // initialize p.tok // Parse into abstract syntax tree From d801d4bc04a5e5371f7cc6dc88fc8845c6c3442c Mon Sep 17 00:00:00 2001 From: xonix Date: Tue, 30 Aug 2022 21:14:53 +0300 Subject: [PATCH 26/57] decoupling parse/resolve : fixing failing tests --- parser/parser.go | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index 939232d3..3a543390 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -460,14 +460,24 @@ func (p *parser) function() ast.Function { p.optionalNewlines() // Parse the body - //p.startFunction(name, params) + p.startFunction(name) body := p.stmtsBrace() - //p.stopFunction() + p.stopFunction() //p.locals = nil return ast.Function{name, params, nil, body, funcNamePos} } +// Signal the start of a function +func (p *parser) startFunction(name string) { + p.funcName = name +} + +// Signal the end of a function +func (p *parser) stopFunction() { + p.funcName = "" +} + // Parse expressions separated by commas: args to print[f] or user // function call, or multi-dimensional index. func (p *parser) exprList(parse func() ast.Expr) []ast.Expr { From 2fc47493cd706351b93b23e7eb78ecad93bc83be Mon Sep 17 00:00:00 2001 From: xonix Date: Tue, 30 Aug 2022 21:30:02 +0300 Subject: [PATCH 27/57] decoupling parse/resolve : fixing failing tests --- internal/resolver/resolver.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/resolver/resolver.go b/internal/resolver/resolver.go index 7788afec..eaf66054 100644 --- a/internal/resolver/resolver.go +++ b/internal/resolver/resolver.go @@ -89,6 +89,7 @@ func (r *resolver) Visit(node ast.Node) ast.Visitor { r.recordArrayRef(n) case *ast.UserCallExpr: + ast.WalkExprList(r, n.Args) name := n.Name if r.locals[name] { panic(n.Pos.Errorf("can't call local variable %q as function", name)) From e7279c41a799557a4448ebfd80aadd1e8648292b Mon Sep 17 00:00:00 2001 From: xonix Date: Tue, 30 Aug 2022 21:37:39 +0300 Subject: [PATCH 28/57] decoupling parse/resolve : fixing failing tests --- internal/ast/ast.go | 1 + internal/resolver/resolver.go | 2 +- parser/parser.go | 6 +++--- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/internal/ast/ast.go b/internal/ast/ast.go index ad28be2b..200b5e9c 100644 --- a/internal/ast/ast.go +++ b/internal/ast/ast.go @@ -333,6 +333,7 @@ type UserCallExpr struct { Name string Args []Expr Pos Position + EndPos Position } func (e *UserCallExpr) String() string { diff --git a/internal/resolver/resolver.go b/internal/resolver/resolver.go index eaf66054..db70f4c0 100644 --- a/internal/resolver/resolver.go +++ b/internal/resolver/resolver.go @@ -92,7 +92,7 @@ func (r *resolver) Visit(node ast.Node) ast.Visitor { ast.WalkExprList(r, n.Args) name := n.Name if r.locals[name] { - panic(n.Pos.Errorf("can't call local variable %q as function", name)) + panic(n.EndPos.Errorf("can't call local variable %q as function", name)) } for i, arg := range n.Args { r.processUserCallArg(name, arg, i) diff --git a/parser/parser.go b/parser/parser.go index 3a543390..25b71a8c 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -739,7 +739,7 @@ func (p *parser) primary() ast.Expr { // Grammar requires no space between function name and // left paren for user function calls, hence the funky // lexer.HadSpace() method. - return p.userCall(name, namePos) + return p.userCall(name, namePos, p.pos) } return ast.VarRef(name, namePos) case LPAREN: @@ -1027,7 +1027,7 @@ func (p *parser) errorf(format string, args ...interface{}) error { // Parse call to a user-defined function (and record call site for // resolving later). -func (p *parser) userCall(name string, pos Position) *ast.UserCallExpr { +func (p *parser) userCall(name string, pos Position, endPos Position) *ast.UserCallExpr { p.expect(LPAREN) args := []ast.Expr{} i := 0 @@ -1041,7 +1041,7 @@ func (p *parser) userCall(name string, pos Position) *ast.UserCallExpr { i++ } p.expect(RPAREN) - call := &ast.UserCallExpr{false, ast.WillBeResolvedLater, name, args, pos} + call := &ast.UserCallExpr{false, ast.WillBeResolvedLater, name, args, pos, endPos} //p.recordUserCall(call, pos) return call } From ecbdeeab07524b40d2fca78e5501cd8cd26657d1 Mon Sep 17 00:00:00 2001 From: xonix Date: Wed, 31 Aug 2022 00:20:24 +0300 Subject: [PATCH 29/57] decoupling parse/resolve : fixing failing tests --- internal/resolver/resolver.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/resolver/resolver.go b/internal/resolver/resolver.go index db70f4c0..5fbf8501 100644 --- a/internal/resolver/resolver.go +++ b/internal/resolver/resolver.go @@ -89,12 +89,12 @@ func (r *resolver) Visit(node ast.Node) ast.Visitor { r.recordArrayRef(n) case *ast.UserCallExpr: - ast.WalkExprList(r, n.Args) name := n.Name if r.locals[name] { panic(n.EndPos.Errorf("can't call local variable %q as function", name)) } for i, arg := range n.Args { + ast.Walk(r, arg) r.processUserCallArg(name, arg, i) } r.recordUserCall(n, n.Pos) From 6078dd50e847786f4db6cb5ef666ba7c1bec8044 Mon Sep 17 00:00:00 2001 From: xonix Date: Wed, 31 Aug 2022 00:22:59 +0300 Subject: [PATCH 30/57] decoupling parse/resolve : rfct --- internal/resolver/resolve.go | 3 +-- internal/resolver/resolver.go | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/internal/resolver/resolve.go b/internal/resolver/resolve.go index e117538c..2d2162a5 100644 --- a/internal/resolver/resolve.go +++ b/internal/resolver/resolve.go @@ -98,8 +98,7 @@ func (r *resolver) stopFunction() { // Add function by name with given index func (r *resolver) addFunction(name string) { - r.functions[name] = r.funcIdx - r.funcIdx++ + r.functions[name] = len(r.functions) } // Records a call to a user function (for resolving indexes later) diff --git a/internal/resolver/resolver.go b/internal/resolver/resolver.go index 5fbf8501..037a38e1 100644 --- a/internal/resolver/resolver.go +++ b/internal/resolver/resolver.go @@ -22,8 +22,6 @@ type resolver struct { userCalls []userCall // record calls so we can resolve them later nativeFuncs map[string]interface{} - funcIdx int - // Configuration and debugging debugTypes bool // show variable types for debugging debugWriter io.Writer // where the debug output goes From 3e33f1d3d1828a52426e1891223ce94cae91e152 Mon Sep 17 00:00:00 2001 From: xonix Date: Wed, 31 Aug 2022 00:29:04 +0300 Subject: [PATCH 31/57] decoupling parse/resolve : rfct --- internal/resolver/resolver.go | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/internal/resolver/resolver.go b/internal/resolver/resolver.go index 037a38e1..46d19944 100644 --- a/internal/resolver/resolver.go +++ b/internal/resolver/resolver.go @@ -6,8 +6,7 @@ import ( ) type resolver struct { - // Parsing state - // TODO this reflects the var in parser - is this needed? + // Resolving state funcName string // function name if parsing a func, else "" // Variable tracking and resolving @@ -15,7 +14,6 @@ type resolver struct { varTypes map[string]map[string]typeInfo // map of func name to var name to type varRefs []varRef // all variable references (usually scalars) arrayRefs []arrayRef // all array references - //multiExprs map[*ast.MultiExpr]lexer.Position // tracks comma-separated expressions // Function tracking functions map[string]int // map of function name to index @@ -41,16 +39,14 @@ type Config struct { func Resolve(prog *ast.Program, config *Config) *ast.ResolvedProgram { r := &resolver{} - resolvedProg := &ast.ResolvedProgram{ - Program: *prog, - } r.initResolve(config) + resolvedProg := &ast.ResolvedProgram{Program: *prog} + ast.Walk(r, prog) r.resolveUserCalls(prog) r.resolveVars(resolvedProg) - //r.checkMultiExprs() return resolvedProg } @@ -67,17 +63,14 @@ func (r *resolver) Visit(node ast.Node) ast.Visitor { r.addFunction(name) r.locals = make(map[string]bool, 7) for _, param := range function.Params { - //if r.locals[param] { - // panic(function.ParamsPos[i].Errorf( "duplicate parameter name %q", param)) - //} r.locals[param] = true - } - r.startFunction(name) + r.funcName = name + r.varTypes[name] = make(map[string]typeInfo) ast.WalkStmtList(r, function.Body) - r.stopFunction() + r.funcName = "" r.locals = nil case *ast.VarExpr: @@ -95,7 +88,7 @@ func (r *resolver) Visit(node ast.Node) ast.Visitor { ast.Walk(r, arg) r.processUserCallArg(name, arg, i) } - r.recordUserCall(n, n.Pos) + r.userCalls = append(r.userCalls, userCall{n, n.Pos, r.funcName}) default: return r } From 67558900c05e0b7c314e5acfb795c01cd3d281fe Mon Sep 17 00:00:00 2001 From: xonix Date: Wed, 31 Aug 2022 00:35:13 +0300 Subject: [PATCH 32/57] decoupling parse/resolve : rfct --- internal/resolver/resolve.go | 111 ++++++++++++++++++++++++++++------ internal/resolver/resolver.go | 97 ----------------------------- 2 files changed, 92 insertions(+), 116 deletions(-) delete mode 100644 internal/resolver/resolver.go diff --git a/internal/resolver/resolve.go b/internal/resolver/resolve.go index 2d2162a5..18b8c1ca 100644 --- a/internal/resolver/resolve.go +++ b/internal/resolver/resolve.go @@ -1,10 +1,9 @@ // Resolve function calls and variable types package resolver -// TODO put all into resolver.go - import ( "fmt" + "io" "reflect" "sort" @@ -12,6 +11,97 @@ import ( . "github.com/benhoyt/goawk/lexer" ) +type resolver struct { + // Resolving state + funcName string // function name if parsing a func, else "" + + // Variable tracking and resolving + locals map[string]bool // current function's locals (for determining scope) + varTypes map[string]map[string]typeInfo // map of func name to var name to type + varRefs []varRef // all variable references (usually scalars) + arrayRefs []arrayRef // all array references + + // Function tracking + functions map[string]int // map of function name to index + userCalls []userCall // record calls so we can resolve them later + nativeFuncs map[string]interface{} + + // Configuration and debugging + debugTypes bool // show variable types for debugging + debugWriter io.Writer // where the debug output goes +} + +type Config struct { + // Enable printing of type information + DebugTypes bool + + // io.Writer to print type information on (for example, os.Stderr) + DebugWriter io.Writer + + // Map of named Go functions to allow calling from AWK. See docs + // on interp.Config.Funcs for details. + Funcs map[string]interface{} +} + +func Resolve(prog *ast.Program, config *Config) *ast.ResolvedProgram { + r := &resolver{} + r.initResolve(config) + + resolvedProg := &ast.ResolvedProgram{Program: *prog} + + ast.Walk(r, prog) + + r.resolveUserCalls(prog) + r.resolveVars(resolvedProg) + + return resolvedProg +} + +func (r *resolver) Visit(node ast.Node) ast.Visitor { + switch n := node.(type) { + + case ast.Function: + function := n + name := function.Name + if _, ok := r.functions[name]; ok { + panic(function.Pos.Errorf("function %q already defined", name)) + } + r.addFunction(name) + r.locals = make(map[string]bool, 7) + for _, param := range function.Params { + r.locals[param] = true + } + r.funcName = name + r.varTypes[name] = make(map[string]typeInfo) + + ast.WalkStmtList(r, function.Body) + + r.funcName = "" + r.locals = nil + + case *ast.VarExpr: + r.recordVarRef(n) + + case *ast.ArrayExpr: + r.recordArrayRef(n) + + case *ast.UserCallExpr: + name := n.Name + if r.locals[name] { + panic(n.EndPos.Errorf("can't call local variable %q as function", name)) + } + for i, arg := range n.Args { + ast.Walk(r, arg) + r.processUserCallArg(name, arg, i) + } + r.userCalls = append(r.userCalls, userCall{n, n.Pos, r.funcName}) + default: + return r + } + + return nil +} + type varType int const ( @@ -82,18 +172,6 @@ func (r *resolver) initResolve(config *Config) { r.recordArrayRef(ast.ArrayRef("ARGV", Position{1, 1})) // interpreter relies on ARGV being present r.recordArrayRef(ast.ArrayRef("ENVIRON", Position{1, 1})) // and other built-in arrays r.recordArrayRef(ast.ArrayRef("FIELDS", Position{1, 1})) - //r.multiExprs = make(map[*ast.MultiExpr]Position, 3) -} - -// Signal the start of a function -func (r *resolver) startFunction(name string) { - r.funcName = name - r.varTypes[name] = make(map[string]typeInfo) -} - -// Signal the end of a function -func (r *resolver) stopFunction() { - r.funcName = "" } // Add function by name with given index @@ -108,11 +186,6 @@ type userCall struct { inFunc string } -// Record a user call site -func (r *resolver) recordUserCall(call *ast.UserCallExpr, pos Position) { - r.userCalls = append(r.userCalls, userCall{call, pos, r.funcName}) -} - // After parsing, resolve all user calls to their indexes. Also // ensures functions called have actually been defined, and that // they're not being called with too many arguments. diff --git a/internal/resolver/resolver.go b/internal/resolver/resolver.go deleted file mode 100644 index 46d19944..00000000 --- a/internal/resolver/resolver.go +++ /dev/null @@ -1,97 +0,0 @@ -package resolver - -import ( - "github.com/benhoyt/goawk/internal/ast" - "io" -) - -type resolver struct { - // Resolving state - funcName string // function name if parsing a func, else "" - - // Variable tracking and resolving - locals map[string]bool // current function's locals (for determining scope) - varTypes map[string]map[string]typeInfo // map of func name to var name to type - varRefs []varRef // all variable references (usually scalars) - arrayRefs []arrayRef // all array references - - // Function tracking - functions map[string]int // map of function name to index - userCalls []userCall // record calls so we can resolve them later - nativeFuncs map[string]interface{} - - // Configuration and debugging - debugTypes bool // show variable types for debugging - debugWriter io.Writer // where the debug output goes -} - -type Config struct { - // Enable printing of type information - DebugTypes bool - - // io.Writer to print type information on (for example, os.Stderr) - DebugWriter io.Writer - - // Map of named Go functions to allow calling from AWK. See docs - // on interp.Config.Funcs for details. - Funcs map[string]interface{} -} - -func Resolve(prog *ast.Program, config *Config) *ast.ResolvedProgram { - r := &resolver{} - r.initResolve(config) - - resolvedProg := &ast.ResolvedProgram{Program: *prog} - - ast.Walk(r, prog) - - r.resolveUserCalls(prog) - r.resolveVars(resolvedProg) - - return resolvedProg -} - -func (r *resolver) Visit(node ast.Node) ast.Visitor { - switch n := node.(type) { - - case ast.Function: - function := n - name := function.Name - if _, ok := r.functions[name]; ok { - panic(function.Pos.Errorf("function %q already defined", name)) - } - r.addFunction(name) - r.locals = make(map[string]bool, 7) - for _, param := range function.Params { - r.locals[param] = true - } - r.funcName = name - r.varTypes[name] = make(map[string]typeInfo) - - ast.WalkStmtList(r, function.Body) - - r.funcName = "" - r.locals = nil - - case *ast.VarExpr: - r.recordVarRef(n) - - case *ast.ArrayExpr: - r.recordArrayRef(n) - - case *ast.UserCallExpr: - name := n.Name - if r.locals[name] { - panic(n.EndPos.Errorf("can't call local variable %q as function", name)) - } - for i, arg := range n.Args { - ast.Walk(r, arg) - r.processUserCallArg(name, arg, i) - } - r.userCalls = append(r.userCalls, userCall{n, n.Pos, r.funcName}) - default: - return r - } - - return nil -} From 4d1a2f66ffaba387c7dd16935ef2a6d86492ba2c Mon Sep 17 00:00:00 2001 From: xonix Date: Wed, 31 Aug 2022 00:38:55 +0300 Subject: [PATCH 33/57] decoupling parse/resolve : rfct --- internal/resolver/resolve.go | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/internal/resolver/resolve.go b/internal/resolver/resolve.go index 18b8c1ca..c8b02513 100644 --- a/internal/resolver/resolve.go +++ b/internal/resolver/resolve.go @@ -66,7 +66,7 @@ func (r *resolver) Visit(node ast.Node) ast.Visitor { if _, ok := r.functions[name]; ok { panic(function.Pos.Errorf("function %q already defined", name)) } - r.addFunction(name) + r.functions[name] = len(r.functions) r.locals = make(map[string]bool, 7) for _, param := range function.Params { r.locals[param] = true @@ -174,11 +174,6 @@ func (r *resolver) initResolve(config *Config) { r.recordArrayRef(ast.ArrayRef("FIELDS", Position{1, 1})) } -// Add function by name with given index -func (r *resolver) addFunction(name string) { - r.functions[name] = len(r.functions) -} - // Records a call to a user function (for resolving indexes later) type userCall struct { call *ast.UserCallExpr From 2c4470717881fd6da1999a97a32eef0ffac40802 Mon Sep 17 00:00:00 2001 From: xonix Date: Wed, 31 Aug 2022 00:53:42 +0300 Subject: [PATCH 34/57] decoupling parse/resolve : cleanup --- parser/parser.go | 41 +++++------------------------------------ 1 file changed, 5 insertions(+), 36 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index 25b71a8c..cba0c62a 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -68,7 +68,6 @@ func ParseProgram(src []byte, config *ParserConfig) (prog *Program, err error) { p.debugTypes = config.DebugTypes p.debugWriter = config.DebugWriter } - //p.initResolve() p.multiExprs = make(map[*ast.MultiExpr]Position, 3) p.next() // initialize p.tok @@ -124,17 +123,8 @@ type parser struct { loopDepth int // current loop depth (0 if not in any loops) // Variable tracking and resolving - //locals map[string]bool // current function's locals (for determining scope) - //varTypes map[string]map[string]typeInfo // map of func name to var name to type - //varRefs []varRef // all variable references (usually scalars) - //arrayRefs []arrayRef // all array references multiExprs map[*ast.MultiExpr]Position // tracks comma-separated expressions - // Function tracking - //functions map[string]int // map of function name to index - //userCalls []userCall // record calls so we can resolve them later - //nativeFuncs map[string]interface{} - // Configuration and debugging debugTypes bool // show variable types for debugging debugWriter io.Writer // where the debug output goes @@ -154,7 +144,6 @@ func (p *parser) program() *ast.Program { prog.End = append(prog.End, p.stmtsBrace()) case FUNCTION: function := p.function() - //p.addFunction(function.Name, len(prog.Functions)) prog.Functions = append(prog.Functions, function) default: p.inAction = true @@ -178,8 +167,6 @@ func (p *parser) program() *ast.Program { p.optionalNewlines() } - //p.resolveUserCalls(prog) - //p.resolveVars(prog) p.checkMultiExprs() prog.EndPos = p.pos @@ -431,9 +418,6 @@ func (p *parser) function() ast.Function { } p.next() name := p.val - //if _, ok := p.functions[name]; ok { - // panic(p.errorf("function %q already defined", name)) - //} funcNamePos := p.pos p.expect(NAME) p.expect(LPAREN) @@ -460,22 +444,13 @@ func (p *parser) function() ast.Function { p.optionalNewlines() // Parse the body - p.startFunction(name) - body := p.stmtsBrace() - p.stopFunction() - //p.locals = nil - - return ast.Function{name, params, nil, body, funcNamePos} -} - -// Signal the start of a function -func (p *parser) startFunction(name string) { p.funcName = name -} -// Signal the end of a function -func (p *parser) stopFunction() { + body := p.stmtsBrace() + p.funcName = "" + + return ast.Function{name, params, nil, body, funcNamePos} } // Parse expressions separated by commas: args to print[f] or user @@ -733,9 +708,6 @@ func (p *parser) primary() ast.Expr { p.expect(RBRACKET) return &ast.IndexExpr{ast.ArrayRef(name, namePos), index} } else if p.tok == LPAREN && !p.lexer.HadSpace() { - //if p.locals[name] { - // panic(p.errorf("can't call local variable %q as function", name)) - //} // Grammar requires no space between function name and // left paren for user function calls, hence the funky // lexer.HadSpace() method. @@ -1036,14 +1008,11 @@ func (p *parser) userCall(name string, pos Position, endPos Position) *ast.UserC p.commaNewlines() } arg := p.expr() - //p.processUserCallArg(name, arg, i) args = append(args, arg) i++ } p.expect(RPAREN) - call := &ast.UserCallExpr{false, ast.WillBeResolvedLater, name, args, pos, endPos} - //p.recordUserCall(call, pos) - return call + return &ast.UserCallExpr{false, ast.WillBeResolvedLater, name, args, pos, endPos} } // Record a "multi expression" (comma-separated pseudo-expression From f8e6e70f0176b49120a0ecda70a3371441b233af Mon Sep 17 00:00:00 2001 From: xonix Date: Wed, 31 Aug 2022 00:54:40 +0300 Subject: [PATCH 35/57] decoupling parse/resolve : cleanup --- decouple_parse_resolve.txt | 15 --------------- 1 file changed, 15 deletions(-) delete mode 100644 decouple_parse_resolve.txt diff --git a/decouple_parse_resolve.txt b/decouple_parse_resolve.txt deleted file mode 100644 index b7caae50..00000000 --- a/decouple_parse_resolve.txt +++ /dev/null @@ -1,15 +0,0 @@ - - -struct parser - -parser.ParseProgram() -> Program - -struct Program - -Program.state: parsed/resolved/compiled - -Program.Resolve() - -Program.Compile() - ------ \ No newline at end of file From 7c1029e1e614f99d17ce746121fe0d4ec2fc72ef Mon Sep 17 00:00:00 2001 From: xonix Date: Wed, 31 Aug 2022 01:01:35 +0300 Subject: [PATCH 36/57] decoupling parse/resolve : rfct --- internal/ast/ast.go | 5 +---- internal/resolver/resolve.go | 2 +- lexer/lexer.go | 7 +++++++ parser/parser.go | 2 +- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/internal/ast/ast.go b/internal/ast/ast.go index 200b5e9c..bdf91f06 100644 --- a/internal/ast/ast.go +++ b/internal/ast/ast.go @@ -81,9 +81,7 @@ func (a *Action) String() string { } type Node interface { - // TODO positions below - //Pos() token.Pos // position of first character belonging to the node - //End() token.Pos // position of first character immediately after the node + // In the future we may want to place positions information here } // Expr is the abstract syntax tree for any AWK expression. @@ -333,7 +331,6 @@ type UserCallExpr struct { Name string Args []Expr Pos Position - EndPos Position } func (e *UserCallExpr) String() string { diff --git a/internal/resolver/resolve.go b/internal/resolver/resolve.go index c8b02513..fbc9caee 100644 --- a/internal/resolver/resolve.go +++ b/internal/resolver/resolve.go @@ -88,7 +88,7 @@ func (r *resolver) Visit(node ast.Node) ast.Visitor { case *ast.UserCallExpr: name := n.Name if r.locals[name] { - panic(n.EndPos.Errorf("can't call local variable %q as function", name)) + panic(n.Pos.Add(0, len(name)).Errorf("can't call local variable %q as function", name)) } for i, arg := range n.Args { ast.Walk(r, arg) diff --git a/lexer/lexer.go b/lexer/lexer.go index d30fb461..4b9f2a95 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -33,6 +33,13 @@ type Position struct { Column int } +func (pos Position) Add(lines int, cols int) Position { + return Position{ + Line: pos.Line + lines, + Column: pos.Column + cols, + } +} + type PositionError struct { // Source line/column position where the error occurred. Position Position diff --git a/parser/parser.go b/parser/parser.go index cba0c62a..ee2241f6 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -1012,7 +1012,7 @@ func (p *parser) userCall(name string, pos Position, endPos Position) *ast.UserC i++ } p.expect(RPAREN) - return &ast.UserCallExpr{false, ast.WillBeResolvedLater, name, args, pos, endPos} + return &ast.UserCallExpr{false, ast.WillBeResolvedLater, name, args, pos} } // Record a "multi expression" (comma-separated pseudo-expression From 05aaace89b56d502282b752a804beea1a50d5148 Mon Sep 17 00:00:00 2001 From: xonix Date: Wed, 31 Aug 2022 01:08:07 +0300 Subject: [PATCH 37/57] decoupling parse/resolve : cleanup --- parser/parser.go | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index ee2241f6..6d9f4bae 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -52,11 +52,9 @@ func (c *ParserConfig) toResolverConfig() *resolver.Config { // the parser configuration (and is allowed to be nil). func ParseProgram(src []byte, config *ParserConfig) (prog *Program, err error) { defer func() { - // TODO adjust description - // The parser uses panic with a *ParseError to signal parsing - // errors internally, and they're caught here. This - // significantly simplifies the recursive descent calls as - // we don't have to check errors everywhere. + // The parser and resolver use panic with a *PositionError to signal parsing + // errors internally, and they're caught here. This significantly simplifies + // the recursive descent calls as we don't have to check errors everywhere. if r := recover(); r != nil { // Convert to ParseError or re-panic err = &ParseError{*r.(*PositionError)} From c858f36a29a8e04580671acc66d4800405e9ca0d Mon Sep 17 00:00:00 2001 From: xonix Date: Wed, 31 Aug 2022 01:17:12 +0300 Subject: [PATCH 38/57] decoupling parse/resolve : rfct --- internal/resolver/resolve.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/internal/resolver/resolve.go b/internal/resolver/resolve.go index fbc9caee..ad96a99f 100644 --- a/internal/resolver/resolve.go +++ b/internal/resolver/resolve.go @@ -169,9 +169,10 @@ func (r *resolver) initResolve(config *Config) { r.varTypes = make(map[string]map[string]typeInfo) r.varTypes[""] = make(map[string]typeInfo) // globals r.functions = make(map[string]int) - r.recordArrayRef(ast.ArrayRef("ARGV", Position{1, 1})) // interpreter relies on ARGV being present - r.recordArrayRef(ast.ArrayRef("ENVIRON", Position{1, 1})) // and other built-in arrays - r.recordArrayRef(ast.ArrayRef("FIELDS", Position{1, 1})) + initialPos := Position{1, 1} + r.recordArrayRef(ast.ArrayRef("ARGV", initialPos)) // interpreter relies on ARGV being present + r.recordArrayRef(ast.ArrayRef("ENVIRON", initialPos)) // and other built-in arrays + r.recordArrayRef(ast.ArrayRef("FIELDS", initialPos)) } // Records a call to a user function (for resolving indexes later) From b55f6f4d7a378eacbccb85b0af380f83875cae4a Mon Sep 17 00:00:00 2001 From: xonix Date: Wed, 31 Aug 2022 01:41:46 +0300 Subject: [PATCH 39/57] decoupling parse/resolve : rfct --- parser/parser.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index 6d9f4bae..995b7733 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -709,7 +709,7 @@ func (p *parser) primary() ast.Expr { // Grammar requires no space between function name and // left paren for user function calls, hence the funky // lexer.HadSpace() method. - return p.userCall(name, namePos, p.pos) + return p.userCall(name, namePos) } return ast.VarRef(name, namePos) case LPAREN: @@ -997,7 +997,7 @@ func (p *parser) errorf(format string, args ...interface{}) error { // Parse call to a user-defined function (and record call site for // resolving later). -func (p *parser) userCall(name string, pos Position, endPos Position) *ast.UserCallExpr { +func (p *parser) userCall(name string, pos Position) *ast.UserCallExpr { p.expect(LPAREN) args := []ast.Expr{} i := 0 From 787ffd3f2698ba1ad7517239bbedad785d5515bd Mon Sep 17 00:00:00 2001 From: xonix Date: Tue, 13 Sep 2022 00:06:30 +0300 Subject: [PATCH 40/57] decoupling parse/resolve : CR : adding `node()` to `Node` interface --- internal/ast/ast.go | 43 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/internal/ast/ast.go b/internal/ast/ast.go index bdf91f06..29ec3916 100644 --- a/internal/ast/ast.go +++ b/internal/ast/ast.go @@ -81,8 +81,47 @@ func (a *Action) String() string { } type Node interface { - // In the future we may want to place positions information here -} + node() +} + +// All these types implement the Node interface. +func (p *Program) node() {} +func (a Action) node() {} +func (f Function) node() {} +func (e *FieldExpr) node() {} +func (e *NamedFieldExpr) node() {} +func (e *UnaryExpr) node() {} +func (e *BinaryExpr) node() {} +func (e *ArrayExpr) node() {} +func (e *InExpr) node() {} +func (e *CondExpr) node() {} +func (e *NumExpr) node() {} +func (e *StrExpr) node() {} +func (e *RegExpr) node() {} +func (e *VarExpr) node() {} +func (e *IndexExpr) node() {} +func (e *AssignExpr) node() {} +func (e *AugAssignExpr) node() {} +func (e *IncrExpr) node() {} +func (e *CallExpr) node() {} +func (e *UserCallExpr) node() {} +func (e *MultiExpr) node() {} +func (e *GetlineExpr) node() {} +func (s *PrintStmt) node() {} +func (s *PrintfStmt) node() {} +func (s *ExprStmt) node() {} +func (s *IfStmt) node() {} +func (s *ForStmt) node() {} +func (s *ForInStmt) node() {} +func (s *WhileStmt) node() {} +func (s *DoWhileStmt) node() {} +func (s *BreakStmt) node() {} +func (s *ContinueStmt) node() {} +func (s *NextStmt) node() {} +func (s *ExitStmt) node() {} +func (s *DeleteStmt) node() {} +func (s *ReturnStmt) node() {} +func (s *BlockStmt) node() {} // Expr is the abstract syntax tree for any AWK expression. type Expr interface { From 7faa3a895c14dd5fba360ad4c61bc718d2c92e81 Mon Sep 17 00:00:00 2001 From: xonix Date: Tue, 13 Sep 2022 00:15:14 +0300 Subject: [PATCH 41/57] decoupling parse/resolve : CR : renaming --- internal/ast/walk.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/internal/ast/walk.go b/internal/ast/walk.go index 7345e49e..a18af166 100644 --- a/internal/ast/walk.go +++ b/internal/ast/walk.go @@ -11,15 +11,15 @@ type Visitor interface { // Helper functions for common node lists. They may be empty. -func WalkExprList(v Visitor, list []Expr) { - for _, x := range list { - Walk(v, x) +func WalkExprList(v Visitor, exprs []Expr) { + for _, expr := range exprs { + Walk(v, expr) } } -func WalkStmtList(v Visitor, list []Stmt) { - for _, x := range list { - Walk(v, x) +func WalkStmtList(v Visitor, stmts []Stmt) { + for _, stmt := range stmts { + Walk(v, stmt) } } From 478b6451ff5660ab553513e5f14dfbcf374c1741 Mon Sep 17 00:00:00 2001 From: xonix Date: Tue, 13 Sep 2022 00:17:14 +0300 Subject: [PATCH 42/57] decoupling parse/resolve : CR : adjust comment for Walk function --- internal/ast/walk.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/ast/walk.go b/internal/ast/walk.go index a18af166..c50aeaf5 100644 --- a/internal/ast/walk.go +++ b/internal/ast/walk.go @@ -24,7 +24,7 @@ func WalkStmtList(v Visitor, stmts []Stmt) { } // Walk traverses an AST in depth-first order: It starts by calling -// v.Visit(node); node must not be nil. If the visitor w returned by +// v.Visit(node); if node is nil, it does nothing. If the visitor w returned by // v.Visit(node) is not nil, Walk is invoked recursively with visitor // w for each of the non-nil children of node, followed by a call of // w.Visit(nil). From 5a58be1cd1b9217b35d2cc157debcab8f39d0917 Mon Sep 17 00:00:00 2001 From: xonix Date: Tue, 13 Sep 2022 00:25:13 +0300 Subject: [PATCH 43/57] decoupling parse/resolve : CR : `WillBeResolvedLater` -> `resolvedLater` --- internal/ast/ast.go | 10 +++++++--- parser/parser.go | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/internal/ast/ast.go b/internal/ast/ast.go index 29ec3916..9ffdf65d 100644 --- a/internal/ast/ast.go +++ b/internal/ast/ast.go @@ -272,7 +272,7 @@ func (e *RegExpr) String() string { } // meaning it will be set during resolve step -const WillBeResolvedLater = -1 +const resolvedLater = -1 type VarScope int @@ -657,9 +657,13 @@ func trimParens(s string) string { } func VarRef(name string, pos Position) *VarExpr { - return &VarExpr{WillBeResolvedLater, WillBeResolvedLater, name, pos} + return &VarExpr{resolvedLater, resolvedLater, name, pos} } func ArrayRef(name string, pos Position) *ArrayExpr { - return &ArrayExpr{WillBeResolvedLater, WillBeResolvedLater, name, pos} + return &ArrayExpr{resolvedLater, resolvedLater, name, pos} +} + +func UserCall(name string, args []Expr, pos Position) *UserCallExpr { + return &UserCallExpr{false, resolvedLater, name, args, pos} } diff --git a/parser/parser.go b/parser/parser.go index 0fb63db1..3b8c13bb 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -1032,7 +1032,7 @@ func (p *parser) userCall(name string, pos Position) *ast.UserCallExpr { i++ } p.expect(RPAREN) - return &ast.UserCallExpr{false, ast.WillBeResolvedLater, name, args, pos} + return ast.UserCall(name, args, pos) } // Record a "multi expression" (comma-separated pseudo-expression From 2c07a7aa7b2f55fa473a61a3e69e236aa33d4ddd Mon Sep 17 00:00:00 2001 From: xonix Date: Tue, 13 Sep 2022 00:28:45 +0300 Subject: [PATCH 44/57] decoupling parse/resolve : CR : cleanup comment --- internal/resolver/resolve.go | 1 - 1 file changed, 1 deletion(-) diff --git a/internal/resolver/resolve.go b/internal/resolver/resolve.go index ad96a99f..cbef12ee 100644 --- a/internal/resolver/resolve.go +++ b/internal/resolver/resolve.go @@ -273,7 +273,6 @@ func (r *resolver) recordArrayRef(expr *ast.ArrayExpr) { panic(expr.Pos.Errorf("can't use scalar %q as array", name)) } expr.Scope = scope - //expr := &ast.ArrayExpr{scope, 0, name} r.arrayRefs = append(r.arrayRefs, arrayRef{funcName, expr}) info := r.varTypes[funcName][name] if info.typ == typeUnknown { From b039569c3338b9f50c0bd751b365786ef8b3a68d Mon Sep 17 00:00:00 2001 From: xonix Date: Tue, 13 Sep 2022 00:45:46 +0300 Subject: [PATCH 45/57] decoupling parse/resolve : CR : Let's change `parser.function()` to return a `*ast.Function` instead, to make things consistent. The same for `Action` --- internal/ast/ast.go | 8 ++++---- internal/ast/walk.go | 4 ++-- internal/resolver/resolve.go | 2 +- parser/parser.go | 6 +++--- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/internal/ast/ast.go b/internal/ast/ast.go index 9ffdf65d..ddbed871 100644 --- a/internal/ast/ast.go +++ b/internal/ast/ast.go @@ -13,9 +13,9 @@ import ( // Program is an entire AWK program. type Program struct { Begin []Stmts - Actions []Action + Actions []*Action End []Stmts - Functions []Function + Functions []*Function EndPos Position } @@ -86,8 +86,8 @@ type Node interface { // All these types implement the Node interface. func (p *Program) node() {} -func (a Action) node() {} -func (f Function) node() {} +func (a *Action) node() {} +func (f *Function) node() {} func (e *FieldExpr) node() {} func (e *NamedFieldExpr) node() {} func (e *UnaryExpr) node() {} diff --git a/internal/ast/walk.go b/internal/ast/walk.go index c50aeaf5..6d72f11f 100644 --- a/internal/ast/walk.go +++ b/internal/ast/walk.go @@ -165,11 +165,11 @@ func Walk(v Visitor, node Node) { WalkStmtList(v, stmts) } - case Action: + case *Action: WalkExprList(v, n.Pattern) WalkStmtList(v, n.Stmts) - case Function: + case *Function: WalkStmtList(v, n.Body) default: diff --git a/internal/resolver/resolve.go b/internal/resolver/resolve.go index cbef12ee..071cd5eb 100644 --- a/internal/resolver/resolve.go +++ b/internal/resolver/resolve.go @@ -60,7 +60,7 @@ func Resolve(prog *ast.Program, config *Config) *ast.ResolvedProgram { func (r *resolver) Visit(node ast.Node) ast.Visitor { switch n := node.(type) { - case ast.Function: + case *ast.Function: function := n name := function.Name if _, ok := r.functions[name]; ok { diff --git a/parser/parser.go b/parser/parser.go index 3b8c13bb..e7d04f36 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -176,7 +176,7 @@ func (p *parser) program() *ast.Program { pattern = append(pattern, p.expr()) } // Or an empty action (equivalent to { print $0 }) - action := ast.Action{pattern, nil} + action := &ast.Action{pattern, nil} if p.tok == LBRACE { action.Stmts = p.stmtsBrace() } else { @@ -430,7 +430,7 @@ func (p *parser) loopStmts() ast.Stmts { // Parse a function definition and body. As it goes, this resolves // the local variable indexes and tracks which parameters are array // parameters. -func (p *parser) function() ast.Function { +func (p *parser) function() *ast.Function { if p.funcName != "" { // Should never actually get here (FUNCTION token is only // handled at the top level), but just in case. @@ -470,7 +470,7 @@ func (p *parser) function() ast.Function { p.funcName = "" - return ast.Function{name, params, nil, body, funcNamePos} + return &ast.Function{name, params, nil, body, funcNamePos} } // Parse expressions separated by commas: args to print[f] or user From ee75278098d2098b518a05ff20af87390c5bab4f Mon Sep 17 00:00:00 2001 From: xonix Date: Tue, 13 Sep 2022 00:54:44 +0300 Subject: [PATCH 46/57] decoupling parse/resolve : CR : removing `EndPos` since was used in single place that is not that important --- internal/ast/ast.go | 1 - internal/resolver/resolve.go | 3 ++- interp/interp_test.go | 2 +- parser/parser.go | 1 - testdata/gawk/delfunc.ok | 2 +- testdata/gawk/fnamedat.ok | 2 +- testdata/gawk/fnarray.ok | 2 +- testdata/gawk/fnarray2.ok | 2 +- testdata/gawk/fnasgnm.ok | 2 +- testdata/gawk/gsubasgn.ok | 2 +- 10 files changed, 9 insertions(+), 10 deletions(-) diff --git a/internal/ast/ast.go b/internal/ast/ast.go index ddbed871..0508e6ad 100644 --- a/internal/ast/ast.go +++ b/internal/ast/ast.go @@ -16,7 +16,6 @@ type Program struct { Actions []*Action End []Stmts Functions []*Function - EndPos Position } type ResolvedProgram struct { diff --git a/internal/resolver/resolve.go b/internal/resolver/resolve.go index 071cd5eb..56cbbf84 100644 --- a/internal/resolver/resolve.go +++ b/internal/resolver/resolve.go @@ -357,7 +357,8 @@ func (r *resolver) resolveVars(prog *ast.ResolvedProgram) { _, isFunc := r.functions[name] if isFunc { // Global var can't also be the name of a function - panic(prog.EndPos.Errorf("global var %q can't also be a function", name)) + pos := Position{1, 1} // Ideally it'd be the position of the global var. + panic(pos.Errorf("global var %q can't also be a function", name)) } var index int if info.scope == ast.ScopeSpecial { diff --git a/interp/interp_test.go b/interp/interp_test.go index 13de6ff8..19ce90a8 100644 --- a/interp/interp_test.go +++ b/interp/interp_test.go @@ -718,7 +718,7 @@ function bar(foo) { print "bar", foo } BEGIN { foo(5); bar(10) } `, "", "", `parse error at 2:14: can't use function name as parameter name`, "function name"}, {`function foo() { print foo } BEGIN { foo() }`, - "", "", `parse error at 1:46: global var "foo" can't also be a function`, "function"}, + "", "", `parse error at 1:1: global var "foo" can't also be a function`, "function"}, {`function f(x) { print x, x(); } BEGIN { f() }`, "", "", `parse error at 1:27: can't call local variable "x" as function`, "function"}, // Redirected I/O diff --git a/parser/parser.go b/parser/parser.go index e7d04f36..b021f0f1 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -188,7 +188,6 @@ func (p *parser) program() *ast.Program { } p.checkMultiExprs() - prog.EndPos = p.pos return prog } diff --git a/testdata/gawk/delfunc.ok b/testdata/gawk/delfunc.ok index 3f53761c..b83f6237 100644 --- a/testdata/gawk/delfunc.ok +++ b/testdata/gawk/delfunc.ok @@ -1 +1 @@ -parse error at 8:1: global var "f" can't also be a function \ No newline at end of file +parse error at 1:1: global var "f" can't also be a function \ No newline at end of file diff --git a/testdata/gawk/fnamedat.ok b/testdata/gawk/fnamedat.ok index 29ef4c54..8449c0a4 100644 --- a/testdata/gawk/fnamedat.ok +++ b/testdata/gawk/fnamedat.ok @@ -1 +1 @@ -parse error at 2:1: global var "foo" can't also be a function \ No newline at end of file +parse error at 1:1: global var "foo" can't also be a function \ No newline at end of file diff --git a/testdata/gawk/fnarray.ok b/testdata/gawk/fnarray.ok index 0037a9b7..8449c0a4 100644 --- a/testdata/gawk/fnarray.ok +++ b/testdata/gawk/fnarray.ok @@ -1 +1 @@ -parse error at 8:1: global var "foo" can't also be a function \ No newline at end of file +parse error at 1:1: global var "foo" can't also be a function \ No newline at end of file diff --git a/testdata/gawk/fnarray2.ok b/testdata/gawk/fnarray2.ok index 5e562562..cfee62cd 100644 --- a/testdata/gawk/fnarray2.ok +++ b/testdata/gawk/fnarray2.ok @@ -1 +1 @@ -parse error at 6:1: global var "pile" can't also be a function \ No newline at end of file +parse error at 1:1: global var "pile" can't also be a function \ No newline at end of file diff --git a/testdata/gawk/fnasgnm.ok b/testdata/gawk/fnasgnm.ok index 2a45aeb1..957db6c9 100644 --- a/testdata/gawk/fnasgnm.ok +++ b/testdata/gawk/fnasgnm.ok @@ -1 +1 @@ -parse error at 15:1: global var "ShowMe" can't also be a function \ No newline at end of file +parse error at 1:1: global var "ShowMe" can't also be a function \ No newline at end of file diff --git a/testdata/gawk/gsubasgn.ok b/testdata/gawk/gsubasgn.ok index 99be4d8b..264371fe 100644 --- a/testdata/gawk/gsubasgn.ok +++ b/testdata/gawk/gsubasgn.ok @@ -1 +1 @@ -parse error at 6:1: global var "test1" can't also be a function \ No newline at end of file +parse error at 1:1: global var "test1" can't also be a function \ No newline at end of file From 7cd38ccd12768a1a2b2ad2747c04daab98ce8fca Mon Sep 17 00:00:00 2001 From: xonix Date: Tue, 13 Sep 2022 01:00:17 +0300 Subject: [PATCH 47/57] decoupling parse/resolve : CR : remove `Position.Add()` and adjust error msgs in tests --- internal/resolver/resolve.go | 2 +- interp/interp_test.go | 2 +- lexer/lexer.go | 7 ------- testdata/gawk/callparam.ok | 2 +- testdata/gawk/paramasfunc1.ok | 2 +- testdata/gawk/paramasfunc2.ok | 2 +- 6 files changed, 5 insertions(+), 12 deletions(-) diff --git a/internal/resolver/resolve.go b/internal/resolver/resolve.go index 56cbbf84..d20fed18 100644 --- a/internal/resolver/resolve.go +++ b/internal/resolver/resolve.go @@ -88,7 +88,7 @@ func (r *resolver) Visit(node ast.Node) ast.Visitor { case *ast.UserCallExpr: name := n.Name if r.locals[name] { - panic(n.Pos.Add(0, len(name)).Errorf("can't call local variable %q as function", name)) + panic(n.Pos.Errorf("can't call local variable %q as function", name)) } for i, arg := range n.Args { ast.Walk(r, arg) diff --git a/interp/interp_test.go b/interp/interp_test.go index 19ce90a8..b359f8d8 100644 --- a/interp/interp_test.go +++ b/interp/interp_test.go @@ -719,7 +719,7 @@ BEGIN { foo(5); bar(10) } `, "", "", `parse error at 2:14: can't use function name as parameter name`, "function name"}, {`function foo() { print foo } BEGIN { foo() }`, "", "", `parse error at 1:1: global var "foo" can't also be a function`, "function"}, - {`function f(x) { print x, x(); } BEGIN { f() }`, "", "", `parse error at 1:27: can't call local variable "x" as function`, "function"}, + {`function f(x) { print x, x(); } BEGIN { f() }`, "", "", `parse error at 1:26: can't call local variable "x" as function`, "function"}, // Redirected I/O {`BEGIN { getline x; print x }`, "foo", "foo\n", "", ""}, diff --git a/lexer/lexer.go b/lexer/lexer.go index 4b9f2a95..d30fb461 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -33,13 +33,6 @@ type Position struct { Column int } -func (pos Position) Add(lines int, cols int) Position { - return Position{ - Line: pos.Line + lines, - Column: pos.Column + cols, - } -} - type PositionError struct { // Source line/column position where the error occurred. Position Position diff --git a/testdata/gawk/callparam.ok b/testdata/gawk/callparam.ok index 8b074db9..9590f8b2 100644 --- a/testdata/gawk/callparam.ok +++ b/testdata/gawk/callparam.ok @@ -1 +1 @@ -parse error at 5:7: can't call local variable "b" as function \ No newline at end of file +parse error at 5:6: can't call local variable "b" as function \ No newline at end of file diff --git a/testdata/gawk/paramasfunc1.ok b/testdata/gawk/paramasfunc1.ok index c88f020e..3a5a1a47 100644 --- a/testdata/gawk/paramasfunc1.ok +++ b/testdata/gawk/paramasfunc1.ok @@ -1 +1 @@ -parse error at 6:15: can't call local variable "abc" as function \ No newline at end of file +parse error at 6:12: can't call local variable "abc" as function \ No newline at end of file diff --git a/testdata/gawk/paramasfunc2.ok b/testdata/gawk/paramasfunc2.ok index 543388c7..5efab748 100644 --- a/testdata/gawk/paramasfunc2.ok +++ b/testdata/gawk/paramasfunc2.ok @@ -1 +1 @@ -parse error at 8:15: can't call local variable "abc" as function \ No newline at end of file +parse error at 8:12: can't call local variable "abc" as function \ No newline at end of file From 8b65e3271712731a94a4de7c891707984b41be88 Mon Sep 17 00:00:00 2001 From: xonix Date: Tue, 13 Sep 2022 16:59:36 +0300 Subject: [PATCH 48/57] decoupling parse/resolve : CR : move `PositionError` to `ast` --- internal/ast/ast.go | 19 +++++++++++++++++++ internal/resolver/resolve.go | 26 +++++++++++++------------- lexer/lexer.go | 20 -------------------- parser/parser.go | 12 ++++++------ 4 files changed, 38 insertions(+), 39 deletions(-) diff --git a/internal/ast/ast.go b/internal/ast/ast.go index 0508e6ad..2418e2cc 100644 --- a/internal/ast/ast.go +++ b/internal/ast/ast.go @@ -666,3 +666,22 @@ func ArrayRef(name string, pos Position) *ArrayExpr { func UserCall(name string, args []Expr, pos Position) *UserCallExpr { return &UserCallExpr{false, resolvedLater, name, args, pos} } + +type PositionError struct { + // Source line/column position where the error occurred. + Position Position + // Error message. + Message string +} + +// PosErrorf like errorf, but with an explicit position. +func PosErrorf(pos Position, format string, args ...interface{}) error { + message := fmt.Sprintf(format, args...) + return &PositionError{pos, message} +} + +// Error returns a formatted version of the error, including the line +// and column numbers. +func (e *PositionError) Error() string { + return fmt.Sprintf("parse error at %d:%d: %s", e.Position.Line, e.Position.Column, e.Message) +} diff --git a/internal/resolver/resolve.go b/internal/resolver/resolve.go index d20fed18..58b1c735 100644 --- a/internal/resolver/resolve.go +++ b/internal/resolver/resolve.go @@ -64,7 +64,7 @@ func (r *resolver) Visit(node ast.Node) ast.Visitor { function := n name := function.Name if _, ok := r.functions[name]; ok { - panic(function.Pos.Errorf("function %q already defined", name)) + panic(ast.PosErrorf(function.Pos, "function %q already defined", name)) } r.functions[name] = len(r.functions) r.locals = make(map[string]bool, 7) @@ -88,7 +88,7 @@ func (r *resolver) Visit(node ast.Node) ast.Visitor { case *ast.UserCallExpr: name := n.Name if r.locals[name] { - panic(n.Pos.Errorf("can't call local variable %q as function", name)) + panic(ast.PosErrorf(n.Pos, "can't call local variable %q as function", name)) } for i, arg := range n.Args { ast.Walk(r, arg) @@ -203,11 +203,11 @@ func (r *resolver) resolveUserCalls(prog *ast.Program) { if !ok { f, haveNative := r.nativeFuncs[c.call.Name] if !haveNative { - panic(c.pos.Errorf("undefined function %q", c.call.Name)) + panic(ast.PosErrorf(c.pos, "undefined function %q", c.call.Name)) } typ := reflect.TypeOf(f) if !typ.IsVariadic() && len(c.call.Args) > typ.NumIn() { - panic(c.pos.Errorf("%q called with more arguments than declared", c.call.Name)) + panic(ast.PosErrorf(c.pos, "%q called with more arguments than declared", c.call.Name)) } c.call.Native = true c.call.Index = nativeIndexes[c.call.Name] @@ -215,7 +215,7 @@ func (r *resolver) resolveUserCalls(prog *ast.Program) { } function := prog.Functions[index] if len(c.call.Args) > len(function.Params) { - panic(c.pos.Errorf("%q called with more arguments than declared", c.call.Name)) + panic(ast.PosErrorf(c.pos, "%q called with more arguments than declared", c.call.Name)) } c.call.Index = index } @@ -270,7 +270,7 @@ func (r *resolver) recordArrayRef(expr *ast.ArrayExpr) { name := expr.Name scope, funcName := r.getScope(name) if scope == ast.ScopeSpecial { - panic(expr.Pos.Errorf("can't use scalar %q as array", name)) + panic(ast.PosErrorf(expr.Pos, "can't use scalar %q as array", name)) } expr.Scope = scope r.arrayRefs = append(r.arrayRefs, arrayRef{funcName, expr}) @@ -358,7 +358,7 @@ func (r *resolver) resolveVars(prog *ast.ResolvedProgram) { if isFunc { // Global var can't also be the name of a function pos := Position{1, 1} // Ideally it'd be the position of the global var. - panic(pos.Errorf("global var %q can't also be a function", name)) + panic(ast.PosErrorf(pos, "global var %q can't also be a function", name)) } var index int if info.scope == ast.ScopeSpecial { @@ -444,7 +444,7 @@ func (r *resolver) resolveVars(prog *ast.ResolvedProgram) { funcName := r.getVarFuncName(prog, varExpr.Name, c.inFunc) info := r.varTypes[funcName][varExpr.Name] if info.typ == typeArray { - panic(c.pos.Errorf("can't pass array %q to native function", varExpr.Name)) + panic(ast.PosErrorf(c.pos, "can't pass array %q to native function", varExpr.Name)) } } continue @@ -456,17 +456,17 @@ func (r *resolver) resolveVars(prog *ast.ResolvedProgram) { varExpr, ok := arg.(*ast.VarExpr) if !ok { if function.Arrays[i] { - panic(c.pos.Errorf("can't pass scalar %s as array param", arg)) + panic(ast.PosErrorf(c.pos, "can't pass scalar %s as array param", arg)) } continue } funcName := r.getVarFuncName(prog, varExpr.Name, c.inFunc) info := r.varTypes[funcName][varExpr.Name] if info.typ == typeArray && !function.Arrays[i] { - panic(c.pos.Errorf("can't pass array %q as scalar param", varExpr.Name)) + panic(ast.PosErrorf(c.pos, "can't pass array %q as scalar param", varExpr.Name)) } if info.typ != typeArray && function.Arrays[i] { - panic(c.pos.Errorf("can't pass scalar %q as array param", varExpr.Name)) + panic(ast.PosErrorf(c.pos, "can't pass scalar %q as array param", varExpr.Name)) } } } @@ -480,14 +480,14 @@ func (r *resolver) resolveVars(prog *ast.ResolvedProgram) { for _, varRef := range r.varRefs { info := r.varTypes[varRef.funcName][varRef.ref.Name] if info.typ == typeArray && !varRef.isArg { - panic(varRef.ref.Pos.Errorf("can't use array %q as scalar", varRef.ref.Name)) + panic(ast.PosErrorf(varRef.ref.Pos, "can't use array %q as scalar", varRef.ref.Name)) } varRef.ref.Index = info.index } for _, arrayRef := range r.arrayRefs { info := r.varTypes[arrayRef.funcName][arrayRef.ref.Name] if info.typ == typeScalar { - panic(arrayRef.ref.Pos.Errorf("can't use scalar %q as array", arrayRef.ref.Name)) + panic(ast.PosErrorf(arrayRef.ref.Pos, "can't use scalar %q as array", arrayRef.ref.Name)) } arrayRef.ref.Index = info.index } diff --git a/lexer/lexer.go b/lexer/lexer.go index d30fb461..dc3a48dd 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -9,7 +9,6 @@ package lexer import ( "errors" - "fmt" ) // Lexer tokenizes a byte string of AWK source code. Use NewLexer to @@ -33,25 +32,6 @@ type Position struct { Column int } -type PositionError struct { - // Source line/column position where the error occurred. - Position Position - // Error message. - Message string -} - -// Like errorf, but with an explicit position. -func (pos Position) Errorf(format string, args ...interface{}) error { - message := fmt.Sprintf(format, args...) - return &PositionError{pos, message} -} - -// Error returns a formatted version of the error, including the line -// and column numbers. -func (e *PositionError) Error() string { - return fmt.Sprintf("parse error at %d:%d: %s", e.Position.Line, e.Position.Column, e.Message) -} - // NewLexer creates a new lexer that will tokenize the given source // code. See the module-level example for a working example. func NewLexer(src []byte) *Lexer { diff --git a/parser/parser.go b/parser/parser.go index b021f0f1..1c145d9f 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -19,7 +19,7 @@ import ( // ParseError (actually *ParseError) is the type of error returned by // ParseProgram. type ParseError struct { - PositionError + ast.PositionError } // ParserConfig lets you specify configuration for the parsing @@ -57,7 +57,7 @@ func ParseProgram(src []byte, config *ParserConfig) (prog *Program, err error) { // the recursive descent calls as we don't have to check errors everywhere. if r := recover(); r != nil { // Convert to ParseError or re-panic - err = &ParseError{*r.(*PositionError)} + err = &ParseError{*r.(*ast.PositionError)} } }() lexer := NewLexer(src) @@ -669,7 +669,7 @@ func (p *parser) preIncr() ast.Expr { exprPos := p.pos expr := p.preIncr() if !ast.IsLValue(expr) { - panic(exprPos.Errorf("expected lvalue after ++ or --")) + panic(ast.PosErrorf(exprPos, "expected lvalue after ++ or --")) } return &ast.IncrExpr{expr, op, true} } @@ -781,7 +781,7 @@ func (p *parser) primary() ast.Expr { inPos := p.pos in := p.expr() if !ast.IsLValue(in) { - panic(inPos.Errorf("3rd arg to sub/gsub must be lvalue")) + panic(ast.PosErrorf(inPos, "3rd arg to sub/gsub must be lvalue")) } args = append(args, in) } @@ -1013,7 +1013,7 @@ func (p *parser) matches(operators ...Token) bool { // Format given string and args with Sprintf and return *ParseError // with that message and the current position. func (p *parser) errorf(format string, args ...interface{}) error { - return p.pos.Errorf(format, args...) + return ast.PosErrorf(p.pos, format, args...) } // Parse call to a user-defined function (and record call site for @@ -1059,5 +1059,5 @@ func (p *parser) checkMultiExprs() { min = pos } } - panic(min.Errorf("unexpected comma-separated expression")) + panic(ast.PosErrorf(min, "unexpected comma-separated expression")) } From 7b6e450d06d8241b3dfb3362351093e7019bef42 Mon Sep 17 00:00:00 2001 From: xonix Date: Tue, 13 Sep 2022 17:03:33 +0300 Subject: [PATCH 49/57] decoupling parse/resolve : CR : rfct --- parser/parser.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parser/parser.go b/parser/parser.go index 1c145d9f..81c1a4a0 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -5,7 +5,6 @@ package parser import ( - "github.com/benhoyt/goawk/internal/resolver" "io" "regexp" "strconv" @@ -13,6 +12,7 @@ import ( "github.com/benhoyt/goawk/internal/ast" "github.com/benhoyt/goawk/internal/compiler" + "github.com/benhoyt/goawk/internal/resolver" . "github.com/benhoyt/goawk/lexer" ) From 8794caa6259265950110f736e6d99f083eabd8f0 Mon Sep 17 00:00:00 2001 From: xonix Date: Tue, 13 Sep 2022 17:09:20 +0300 Subject: [PATCH 50/57] decoupling parse/resolve : CR : restore fields back to `ParseError` --- parser/parser.go | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index 81c1a4a0..1ccd44f4 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -5,6 +5,7 @@ package parser import ( + "fmt" "io" "regexp" "strconv" @@ -19,7 +20,16 @@ import ( // ParseError (actually *ParseError) is the type of error returned by // ParseProgram. type ParseError struct { - ast.PositionError + // Source line/column position where the error occurred. + Position Position + // Error message. + Message string +} + +// Error returns a formatted version of the error, including the line +// and column numbers. +func (e *ParseError) Error() string { + return fmt.Sprintf("parse error at %d:%d: %s", e.Position.Line, e.Position.Column, e.Message) } // ParserConfig lets you specify configuration for the parsing @@ -52,12 +62,16 @@ func (c *ParserConfig) toResolverConfig() *resolver.Config { // the parser configuration (and is allowed to be nil). func ParseProgram(src []byte, config *ParserConfig) (prog *Program, err error) { defer func() { - // The parser and resolver use panic with a *PositionError to signal parsing + // The parser and resolver use panic with an *ast.PositionError to signal parsing // errors internally, and they're caught here. This significantly simplifies // the recursive descent calls as we don't have to check errors everywhere. if r := recover(); r != nil { // Convert to ParseError or re-panic - err = &ParseError{*r.(*ast.PositionError)} + positionError := *r.(*ast.PositionError) + err = &ParseError{ + Position: positionError.Position, + Message: positionError.Message, + } } }() lexer := NewLexer(src) From 3be851f9fa71d74c1f0c19953c4d1a700274d2dd Mon Sep 17 00:00:00 2001 From: xonix Date: Tue, 13 Sep 2022 17:10:44 +0300 Subject: [PATCH 51/57] decoupling parse/resolve : CR : restore fields back to `ParseError` --- parser/parser.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index 1ccd44f4..6cd11876 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -67,10 +67,10 @@ func ParseProgram(src []byte, config *ParserConfig) (prog *Program, err error) { // the recursive descent calls as we don't have to check errors everywhere. if r := recover(); r != nil { // Convert to ParseError or re-panic - positionError := *r.(*ast.PositionError) + posError := *r.(*ast.PositionError) err = &ParseError{ - Position: positionError.Position, - Message: positionError.Message, + Position: posError.Position, + Message: posError.Message, } } }() From 198768e167bc789112262118e9427ec50cabb3db Mon Sep 17 00:00:00 2001 From: xonix Date: Wed, 14 Sep 2022 00:27:02 +0300 Subject: [PATCH 52/57] decoupling parse/resolve : CR : add missing comment --- internal/ast/ast.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/internal/ast/ast.go b/internal/ast/ast.go index 2418e2cc..583a44ef 100644 --- a/internal/ast/ast.go +++ b/internal/ast/ast.go @@ -18,6 +18,8 @@ type Program struct { Functions []*Function } +// ResolvedProgram is a parsed AWK program + additional data prepared by resolve step +// needed for subsequent interpretation type ResolvedProgram struct { Program Scalars map[string]int From 40bb5ffcc03eb2b2407f0d132a963fc4e14f104d Mon Sep 17 00:00:00 2001 From: xonix Date: Wed, 14 Sep 2022 00:31:28 +0300 Subject: [PATCH 53/57] decoupling parse/resolve : CR : add missing comment --- internal/ast/ast.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/internal/ast/ast.go b/internal/ast/ast.go index 583a44ef..8817ae9f 100644 --- a/internal/ast/ast.go +++ b/internal/ast/ast.go @@ -81,6 +81,8 @@ func (a *Action) String() string { return strings.Join(patterns, ", ") + sep + stmtsStr } +// Node is an interface to be satisfied by all AST elements. +// We need it to be able to work with AST in a generic way, like in ast.Walk(). type Node interface { node() } From f641fbfabc037f674f4a95d64303f8014bb9e147 Mon Sep 17 00:00:00 2001 From: xonix Date: Wed, 14 Sep 2022 00:36:54 +0300 Subject: [PATCH 54/57] decoupling parse/resolve : CR : add missing comment --- internal/ast/ast.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/internal/ast/ast.go b/internal/ast/ast.go index 8817ae9f..de19ce32 100644 --- a/internal/ast/ast.go +++ b/internal/ast/ast.go @@ -659,18 +659,22 @@ func trimParens(s string) string { return s } +// VarRef is a constructor for *VarExpr func VarRef(name string, pos Position) *VarExpr { return &VarExpr{resolvedLater, resolvedLater, name, pos} } +// ArrayRef is a constructor for *ArrayExpr func ArrayRef(name string, pos Position) *ArrayExpr { return &ArrayExpr{resolvedLater, resolvedLater, name, pos} } +// UserCall is a constructor for *UserCallExpr func UserCall(name string, args []Expr, pos Position) *UserCallExpr { return &UserCallExpr{false, resolvedLater, name, args, pos} } +// PositionError represents an error bound to specific position in source. type PositionError struct { // Source line/column position where the error occurred. Position Position @@ -678,7 +682,7 @@ type PositionError struct { Message string } -// PosErrorf like errorf, but with an explicit position. +// PosErrorf like fmt.Errorf, but with an explicit position. func PosErrorf(pos Position, format string, args ...interface{}) error { message := fmt.Sprintf(format, args...) return &PositionError{pos, message} From b590a5c6cdf8e83f00a5bfba2080d9df3b8c6426 Mon Sep 17 00:00:00 2001 From: xonix Date: Wed, 14 Sep 2022 00:42:19 +0300 Subject: [PATCH 55/57] decoupling parse/resolve : CR : add missing comment --- internal/ast/walk.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/ast/walk.go b/internal/ast/walk.go index 6d72f11f..a209ac88 100644 --- a/internal/ast/walk.go +++ b/internal/ast/walk.go @@ -9,14 +9,14 @@ type Visitor interface { Visit(node Node) (w Visitor) } -// Helper functions for common node lists. They may be empty. - +// WalkExprList walks a visitor over a list of expression AST nodes func WalkExprList(v Visitor, exprs []Expr) { for _, expr := range exprs { Walk(v, expr) } } +// WalkStmtList walks a visitor over a list of statement AST nodes func WalkStmtList(v Visitor, stmts []Stmt) { for _, stmt := range stmts { Walk(v, stmt) From 3cf49056ee550fab0549333906320cf668e296c5 Mon Sep 17 00:00:00 2001 From: xonix Date: Wed, 14 Sep 2022 00:46:49 +0300 Subject: [PATCH 56/57] decoupling parse/resolve : CR : `newResolver` --- internal/resolver/resolve.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/internal/resolver/resolve.go b/internal/resolver/resolve.go index 58b1c735..155bd537 100644 --- a/internal/resolver/resolve.go +++ b/internal/resolver/resolve.go @@ -44,8 +44,7 @@ type Config struct { } func Resolve(prog *ast.Program, config *Config) *ast.ResolvedProgram { - r := &resolver{} - r.initResolve(config) + r := newResolver(config) resolvedProg := &ast.ResolvedProgram{Program: *prog} @@ -159,8 +158,9 @@ type arrayRef struct { ref *ast.ArrayExpr } -// Initialize the resolver -func (r *resolver) initResolve(config *Config) { +// Constructs the resolver +func newResolver(config *Config) *resolver { + r := &resolver{} if config != nil { r.nativeFuncs = config.Funcs r.debugTypes = config.DebugTypes @@ -173,6 +173,7 @@ func (r *resolver) initResolve(config *Config) { r.recordArrayRef(ast.ArrayRef("ARGV", initialPos)) // interpreter relies on ARGV being present r.recordArrayRef(ast.ArrayRef("ENVIRON", initialPos)) // and other built-in arrays r.recordArrayRef(ast.ArrayRef("FIELDS", initialPos)) + return r } // Records a call to a user function (for resolving indexes later) From 5f10a7d4f324648e8345701e77e91fecefc94e4a Mon Sep 17 00:00:00 2001 From: xonix Date: Wed, 14 Sep 2022 00:48:37 +0300 Subject: [PATCH 57/57] decoupling parse/resolve : CR : fix comment --- parser/parser.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parser/parser.go b/parser/parser.go index 6cd11876..1a2475d4 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -66,7 +66,7 @@ func ParseProgram(src []byte, config *ParserConfig) (prog *Program, err error) { // errors internally, and they're caught here. This significantly simplifies // the recursive descent calls as we don't have to check errors everywhere. if r := recover(); r != nil { - // Convert to ParseError or re-panic + // Convert to PositionError or re-panic posError := *r.(*ast.PositionError) err = &ParseError{ Position: posError.Position,