Skip to content

Commit

Permalink
update bracket parsing code
Browse files Browse the repository at this point in the history
  • Loading branch information
rhaeguard committed Sep 19, 2023
1 parent 9d7a642 commit 41e543b
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 22 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ a very simple regex engine written in go.
- [ ] bracket notation
- [x] `[ ]` bracket notation/ranges
- [x] `[^ ]` bracket negation notation
- [ ] better handling of the bracket expressions: e.g., `[ab-exy12]`
- [ ] special characters in the bracket
- [x] better handling of the bracket expressions: e.g., `[ab-exy12]`
- [x] special characters in the bracket
- [ ] support escape character
- [x] quantifiers
- [x] `*` none or more times
- [x] `+` one or more times
Expand Down
14 changes: 7 additions & 7 deletions lib_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,12 @@ func TestCheck(t *testing.T) {
{"(ha$|^hi)", "ahaa", false},
{"(ha$|^hi)", "ahii", false},
// capturing groups, numeric groups and named groups
{"([0-9])\\1?hi", "h2hi", true},
{"([0-9])([a-d](hello))\\1", "bazoo23", false},
{"(dog)-(cat)-\\2-\\1", "nonsensedog-cat-cat-dognonsense", true},
{"(?<anim>cat)-\\k<anim>", "nonsensedog-cat-cat-dognonsense", true},
{"(?<letter>[cxv])-[a-z]+-\\k<letter>", "c-abcd-c", true},
{"(?<letter>[cxv])-[a-z]+-\\k<letter>", "c-abcd-d", false},
{`([0-9])\1?hi`, "h2hi", true},
{`([0-9])([a-d](hello))\1`, "bazoo23", false},
{`(dog)-(cat)-\2-\1`, "nonsensedog-cat-cat-dognonsense", true},
{`(?<anim>cat)-\k<anim>`, "nonsensedog-cat-cat-dognonsense", true},
{`(?<letter>[cxv])-[a-z]+-\k<letter>`, "c-abcd-c", true},
{`(?<letter>[cxv])-[a-z]+-\k<letter>`, "c-abcd-d", false},
// quantifiers
{"(hi){2,3}", "hi hihi hihi", true},
{`ab{0,}bc`, `abbbbc`, true},
Expand Down Expand Up @@ -138,7 +138,7 @@ func TestCheckForDev(t *testing.T) {
regexString, input string
expected bool
}{
{"he(ya)*o", "heo", true},
{"[0-c-^[_$hello]", "heo", true},
}

for _, test := range data {
Expand Down
8 changes: 4 additions & 4 deletions nfa.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ type State struct {
}

const (
StartOfText = 1 // ascii: null char
EndOfText = 2 // ascii: end of text
AnyChar = 3 // ascii: substitute
EpsilonChar = 0 // ascii: null char
EpsilonChar = 0
StartOfText = 1
EndOfText = 2
AnyChar = 3
)

func toNfa(memory *parsingContext) *State {
Expand Down
29 changes: 20 additions & 9 deletions parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,22 +146,33 @@ func parseBracket(regexString string, memory *parsingContext) {
ch := regexString[memory.loc()]

if ch == '-' {
prevChar := pieces[len(pieces)-1][0] // TODO: maybe do smth better?
nextChar := regexString[memory.adv()]
bothNumeric := isNumeric(prevChar) && isNumeric(nextChar)
bothLowercase := isAlphabetLowercase(prevChar) && isAlphabetLowercase(nextChar)
bothUppercase := isAlphabetUppercase(prevChar) && isAlphabetUppercase(nextChar)
if bothNumeric || bothLowercase || bothUppercase {
pieces[len(pieces)-1] = fmt.Sprintf("%c%c", prevChar, nextChar)
nextChar := regexString[memory.adv()] // TODO: this might fail if we are at the end of the string
// if - is the first character OR is the last character, it's a literal
if len(pieces) == 0 || nextChar == ']' {
pieces = append(pieces, fmt.Sprintf("%c", ch))
} else {
panic(fmt.Sprintf("'%c-%c' range is invalid", prevChar, nextChar))
piece := pieces[len(pieces)-1]
if len(piece) == 1 {
prevChar := piece[0]
if prevChar <= nextChar {
pieces[len(pieces)-1] = fmt.Sprintf("%c%c", prevChar, nextChar)
} else {
panic(fmt.Sprintf("'%c-%c' range is invalid", prevChar, nextChar))
}
} else {
pieces = append(pieces, fmt.Sprintf("%c", ch))
}
}
} else {
pieces = append(pieces, fmt.Sprintf("%c", ch))
}

memory.adv()
}

if len(pieces) == 0 {
panic(fmt.Sprintf("bracket should not be empty"))
}

var uniqueCharacterPieces []string
for _, piece := range pieces {
if !sliceContains(uniqueCharacterPieces, piece) {
Expand Down

0 comments on commit 41e543b

Please sign in to comment.