Skip to content

Commit

Permalink
Reduce scope of keywords during lexing (#404)
Browse files Browse the repository at this point in the history
This means keywords in one section (i.e. "name") may be used as ids in other sections.
  • Loading branch information
asmaloney authored Jan 20, 2024
1 parent 411d148 commit 51b4659
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 47 deletions.
13 changes: 13 additions & 0 deletions amod/amod_config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -457,3 +457,16 @@ func Example_proceduralFieldUnrecognized() {
// Output:
// ERROR: unrecognized option "foo" in procedural config (line 6, col 15)
}

// Tests that we can use a keyword from one section as an id in another
func Example_keywordInDifferentSection() {
generateToStdout(`
~~ model ~~
name: Test
~~ config ~~
chunks { [name: first last] }
~~ init ~~
~~ productions ~~`)

// Output:
}
116 changes: 88 additions & 28 deletions amod/lex.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package amod
import (
"fmt"
"io"
"slices"
"strings"
"unicode"
"unicode/utf8"
Expand Down Expand Up @@ -99,18 +100,31 @@ type lexeme struct {
pos int // position within the line
}

// sectionType is used to keep track of what section we are lexing
// We use this to limit the scope of keywords.
type sectionType int

const (
sectionModel sectionType = iota
sectionConfig
sectionInit
sectionProduction
)

// lexer_amod tracks our lexing and provides a channel to emit lexemes
type lexer_amod struct {
name string // used only for error reports
input string // the string being scanned.
line int // the line number
lastNewlinePos int
start int // start position of this lexeme (offset from beginning of file)
pos int // current position in the input (offset from beginning of file)
width int // width of last rune read from input
lexemes chan lexeme // channel of scanned lexemes
keywords map[string]bool // used to lookup identifier to see if they are keywords
inPattern bool // state: a pattern - delimited by [] is lexed specially
start int // start position of this lexeme (offset from beginning of file)
pos int // current position in the input (offset from beginning of file)
width int // width of last rune read from input
lexemes chan lexeme // channel of scanned lexemes

inSectionHeader bool // state: switch currentSection based on ~~ section headers
currentSection sectionType // which section are we lexing? used to switch out keywords
inPattern bool // state: a pattern - delimited by [] is lexed specially
}

// stateFn is used to move through the lexing states
Expand All @@ -122,26 +136,40 @@ const (
commentDelim = "//"
)

var keywords []string = []string{
// keywordsModel are only keywords for the model section
var keywordsModel []string = []string{
"authors",
"description",
"examples",
"name",
}

// keywordsModel are only keywords for the config section
var keywordsConfig []string = []string{
"chunks",
"gactar",
"modules",
}

// keywordsModel are only keywords for the init section
var keywordsInit []string = []string{
"similar",
}

// keywordsModel are only keywords for the productions section
var keywordsProductions []string = []string{
"and",
"any",
"authors",
"buffer_state",
"chunks",
"clear",
"description",
"do",
"examples",
"gactar",
"match",
"module_state",
"modules",
"name",
"nil",
"print",
"recall",
"set",
"similar",
"stop",
"to",
"when",
Expand Down Expand Up @@ -185,17 +213,14 @@ func lex(filename string, data string) *lexer_amod {
cleanData(&data)

l := &lexer_amod{
name: filename,
input: data,
line: 1,
lastNewlinePos: 1, // start @ 1 so first line gets 0 (see emit())
lexemes: make(chan lexeme),
keywords: make(map[string]bool),
inPattern: false,
}

for _, v := range keywords {
l.keywords[v] = true
name: filename,
input: data,
line: 1,
lastNewlinePos: 1, // start @ 1 so first line gets 0 (see emit())
lexemes: make(chan lexeme),
currentSection: sectionModel,
inSectionHeader: false,
inPattern: false,
}

go l.run()
Expand Down Expand Up @@ -252,9 +277,20 @@ func (l *lexer_amod) next() rune {
return r
}

// lookupKeyword looks up "id" to see if it is a keyword based on which section we are lexing
func (l *lexer_amod) lookupKeyword(id string) bool {
v, ok := l.keywords[id]
return v && ok
switch l.currentSection {
case sectionModel:
return slices.Contains(keywordsModel, id)
case sectionConfig:
return slices.Contains(keywordsConfig, id)
case sectionInit:
return slices.Contains(keywordsInit, id)
case sectionProduction:
return slices.Contains(keywordsProductions, id)
}

return false
}

// skip over the pending input before this point
Expand Down Expand Up @@ -428,6 +464,7 @@ func lexStart(l *lexer_amod) stateFn {
if l.nextIs('~') {
l.next()
l.emit(lexemeSectionDelim)
l.inSectionHeader = !l.inSectionHeader
} else {
l.emit(lexemeChar)
}
Expand Down Expand Up @@ -495,9 +532,32 @@ func lexIdentifier(l *lexer_amod) stateFn {
l.next()
}

id := l.input[l.start:l.pos]
isKeyword := false

// If we are in a section header, then change our current section
if l.inSectionHeader {
switch id {
case "model":
l.currentSection = sectionModel
case "config":
l.currentSection = sectionConfig
case "init":
l.currentSection = sectionInit
case "productions":
l.currentSection = sectionProduction
default:
return l.errorf("unrecognized section")
}

// these are keywords in this context
isKeyword = true
} else {
isKeyword = l.lookupKeyword(id)
}

// Perhaps not the best way to do this.
// I'm sure there's a char-by-char way we could implement which would be faster.
isKeyword := l.lookupKeyword(l.input[l.start:l.pos])
switch {
case isKeyword:
l.emit(lexemeKeyword)
Expand Down
9 changes: 9 additions & 0 deletions amod/lex_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,15 @@ func TestInvalidSection(t *testing.T) {
if token.Type != lexer.TokenType(lexemeSectionDelim) {
t.Errorf("expected to lex '%s' as section delimiter (%d) - got type %d", token.Value, lexemeSectionDelim, token.Type)
}

expected := "ERROR on line 1 at position 9: unrecognized section"

token, err = l.Next()
if err == nil {
t.Errorf("expected error: %q", expected)
} else if err.Error() != expected {
t.Errorf("expected error: %q but got %q", expected, err.Error())
}
}

func TestUnterminatedQuote(t *testing.T) {
Expand Down
38 changes: 19 additions & 19 deletions amod/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,26 +25,26 @@ import (
// paste in the generated EBNF above, click "Convert" and then click "View Diagram"

type amodFile struct {
ModelHeader string `parser:"'~~':SectionDelim 'model' '~~':SectionDelim"`
ModelHeader string `parser:"'~~':SectionDelim 'model':Keyword '~~':SectionDelim"`
Model *modelSection `parser:"@@"`

ConfigHeader string `parser:"'~~':SectionDelim 'config' '~~':SectionDelim"`
ConfigHeader string `parser:"'~~':SectionDelim 'config':Keyword '~~':SectionDelim"`
Config *configSection `parser:"(@@)?"`

InitHeader string `parser:"'~~':SectionDelim 'init' '~~':SectionDelim"`
InitHeader string `parser:"'~~':SectionDelim 'init':Keyword '~~':SectionDelim"`
Init *initSection `parser:"(@@)?"`

ProductionsHeader string `parser:"'~~':SectionDelim 'productions' '~~':SectionDelim"`
ProductionsHeader string `parser:"'~~':SectionDelim 'productions':Keyword '~~':SectionDelim"`
Productions *productionSection `parser:"(@@)?"`

Tokens []lexer.Token
}

type modelSection struct {
Name string `parser:"'name' ':' (@String|@Ident)"`
Description string `parser:"('description' ':' @String)?"`
Authors []string `parser:"('authors' '{' @String* '}')?"`
Examples []*pattern `parser:"('examples' '{' @@* '}')?"`
Name string `parser:"'name':Keyword ':' (@String|@Ident)"`
Description string `parser:"('description':Keyword ':' @String)?"`
Authors []string `parser:"('authors':Keyword '{' @String* '}')?"`
Examples []*pattern `parser:"('examples':Keyword '{' @@* '}')?"`

Tokens []lexer.Token
}
Expand Down Expand Up @@ -165,7 +165,7 @@ type field struct {
}

type gactarConfig struct {
GactarFields []*field `parser:"'gactar' '{' @@* '}'"`
GactarFields []*field `parser:"'gactar':Keyword '{' @@* '}'"`

Tokens []lexer.Token
}
Expand All @@ -178,7 +178,7 @@ type module struct {
}

type moduleConfig struct {
Modules []*module `parser:"'modules' '{' @@* '}'"`
Modules []*module `parser:"'modules':Keyword '{' @@* '}'"`

Tokens []lexer.Token
}
Expand All @@ -193,7 +193,7 @@ type chunkDecl struct {
}

type chunkConfig struct {
ChunkDecls []*chunkDecl `parser:"'chunks' '{' @@* '}'"`
ChunkDecls []*chunkDecl `parser:"'chunks':Keyword '{' @@* '}'"`

Tokens []lexer.Token
}
Expand Down Expand Up @@ -347,19 +347,19 @@ type matchItem struct {
}

type match struct {
Items []*matchItem `parser:"'match' '{' @@+ '}'"`
Items []*matchItem `parser:"'match':Keyword '{' @@+ '}'"`

Tokens []lexer.Token
}

type clearStatement struct {
BufferNames []string `parser:"'clear' ( @Ident ','? )+"`
BufferNames []string `parser:"'clear':Keyword ( @Ident ','? )+"`

Tokens []lexer.Token
}

type printStatement struct {
Args []*printArg `parser:"'print' ( @@ ','? )*"`
Args []*printArg `parser:"'print':Keyword ( @@ ','? )*"`

Tokens []lexer.Token
}
Expand All @@ -381,17 +381,17 @@ type withClause struct {
}

type recallStatement struct {
Pattern *pattern `parser:"'recall' @@"`
Pattern *pattern `parser:"'recall':Keyword @@"`
With *withClause `parser:"@@?"`

Tokens []lexer.Token
}

type setStatement struct {
Set string `parser:"'set'"` // not used, but must be visible for parse to work
Set string `parser:"'set':Keyword"` // not used, but must be visible for parse to work
BufferRef bufferRef `parser:"@@"`

To string `parser:"'to'"` // not used, but must be visible for parse to work
To string `parser:"'to':Keyword"` // not used, but must be visible for parse to work
Value *setArg `parser:"( @@"`
Pattern *pattern `parser:"| @@)"`

Expand All @@ -415,15 +415,15 @@ type statement struct {
}

type do struct {
Do string `parser:"'do'"` // not used, but must be visible for parse to work
Do string `parser:"'do':Keyword"` // not used, but must be visible for parse to work
Statements *[]*statement `parser:"'{' @@+ '}'"`

Tokens []lexer.Token
}

type production struct {
Name string `parser:"@Ident '{'"`
Description *string `parser:"('description' ':' @String)?"`
Description *string `parser:"('description':Keyword ':' @String)?"`
Match *match `parser:"@@"`
Do *do `parser:"@@"`
End string `parser:"'}'"` // not used, but must be visible for parse to work
Expand Down

0 comments on commit 51b4659

Please sign in to comment.