Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

regex: bug fix for issue #19789 #19793

Merged
merged 2 commits into from
Nov 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 21 additions & 13 deletions vlib/regex/regex.v
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ pub type FnValidator = fn (u8) bool

struct Token {
mut:
ist rune
ist u32
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hm, is not rune the same? Is there a bug with it?

// char
ch rune // char of the token if any
ch_len u8 // char len
Expand Down Expand Up @@ -553,7 +553,7 @@ fn (re RE) check_char_class(pc int, ch rune) bool {
}

// parse_char_class return (index, str_len, cc_type) of a char class [abcm-p], char class start after the [ char
fn (mut re RE) parse_char_class(in_txt string, in_i int) (int, int, rune) {
fn (mut re RE) parse_char_class(in_txt string, in_i int) (int, int, u32) {
mut status := CharClass_parse_state.start
mut i := in_i

Expand Down Expand Up @@ -1259,7 +1259,7 @@ fn (mut re RE) impl_compile(in_txt string) (int, int) {
if re.prog[pc2].ist == regex.ist_dot_char {
return regex.err_syntax_error, 0
}
if re.prog[pc2].ist !in [rune(regex.ist_prog_end), regex.ist_group_end,
if re.prog[pc2].ist !in [u32(regex.ist_prog_end), regex.ist_group_end,
regex.ist_group_start] {
// println("Next dot char check is PC: ${pc2}")
re.prog[pc1].dot_check_pc = pc2
Expand All @@ -1276,7 +1276,7 @@ fn (mut re RE) impl_compile(in_txt string) (int, int) {
pc1 = last_dot_char_pc + 1
mut is_last_dot := true
for pc1 < pc {
if re.prog[pc1].ist !in [rune(regex.ist_prog_end), regex.ist_group_end] {
if re.prog[pc1].ist !in [u32(regex.ist_prog_end), regex.ist_group_end] {
is_last_dot = false
break
}
Expand All @@ -1302,7 +1302,7 @@ fn (mut re RE) impl_compile(in_txt string) (int, int) {
bsls_char_count++
mut pc2 := pc1 + 1
for pc2 < pc {
if re.prog[pc2].ist !in [rune(regex.ist_prog_end), regex.ist_group_end,
if re.prog[pc2].ist !in [u32(regex.ist_prog_end), regex.ist_group_end,
regex.ist_group_start] {
// println("Next bsls check is PC: ${pc2}")
re.prog[pc1].bsls_check_pc = pc2
Expand All @@ -1319,7 +1319,7 @@ fn (mut re RE) impl_compile(in_txt string) (int, int) {
pc1 = last_bsls_char_pc + 1
mut is_last_bsls := true
for pc1 < pc {
if re.prog[pc1].ist !in [rune(regex.ist_prog_end), regex.ist_group_end] {
if re.prog[pc1].ist !in [u32(regex.ist_prog_end), regex.ist_group_end] {
is_last_bsls = false
break
}
Expand All @@ -1337,12 +1337,12 @@ fn (mut re RE) impl_compile(in_txt string) (int, int) {
mut cc_char_count := 0
mut last_cc_char_pc := -1
for pc1 < pc {
if re.prog[pc1].ist in [rune(regex.ist_char_class_pos), regex.ist_char_class_neg] {
if re.prog[pc1].ist in [u32(regex.ist_char_class_pos), regex.ist_char_class_neg] {
last_cc_char_pc = pc1
cc_char_count++
mut pc2 := pc1 + 1
for pc2 < pc {
if re.prog[pc2].ist !in [rune(regex.ist_prog_end), regex.ist_group_end,
if re.prog[pc2].ist !in [u32(regex.ist_prog_end), regex.ist_group_end,
regex.ist_group_start] {
// println("Next CC check is PC: ${pc2}")
re.prog[pc1].cc_check_pc = pc2
Expand All @@ -1359,7 +1359,7 @@ fn (mut re RE) impl_compile(in_txt string) (int, int) {
pc1 = last_cc_char_pc + 1
mut is_last_cc := true
for pc1 < pc {
if re.prog[pc1].ist !in [rune(regex.ist_prog_end), regex.ist_group_end] {
if re.prog[pc1].ist !in [u32(regex.ist_prog_end), regex.ist_group_end] {
is_last_cc = false
break
}
Expand Down Expand Up @@ -1727,8 +1727,8 @@ pub fn (mut re RE) match_base(in_txt &u8, in_txt_len int) (int, int) {
mut last_fnd_pc := -1

mut state := StateObj{} // actual state
mut ist := rune(0) // actual instruction
mut l_ist := rune(0) // last matched instruction
mut ist := u32(0) // actual instruction
mut l_ist := u32(0) // last matched instruction

mut step_count := 0 // stats for debug
mut dbg_line := 0 // count debug line printed
Expand Down Expand Up @@ -1904,7 +1904,7 @@ pub fn (mut re RE) match_base(in_txt &u8, in_txt_len int) (int, int) {
}

if l_ist in [
rune(regex.ist_char_class_neg),
u32(regex.ist_char_class_neg),
regex.ist_char_class_pos,
regex.ist_bsls_char,
regex.ist_dot_char,
Expand Down Expand Up @@ -2273,6 +2273,8 @@ pub fn (mut re RE) match_base(in_txt &u8, in_txt_len int) (int, int) {
continue
}

// println("HERE WE MUST STAY! ${state.i} >= ${in_txt_len}")

state.match_flag = false
mut cc_neg := false

Expand All @@ -2281,6 +2283,12 @@ pub fn (mut re RE) match_base(in_txt &u8, in_txt_len int) (int, int) {
}
mut cc_res := re.check_char_class(state.pc, ch)

// manage out of text on char class parse
if state.i >= (in_txt_len - 1) && cc_neg && re.prog[state.pc].last_dot_flag {
m_state = .ist_quant_n
continue
}

if cc_neg {
cc_res = !cc_res
}
Expand Down Expand Up @@ -2606,7 +2614,7 @@ pub fn (mut re RE) match_base(in_txt &u8, in_txt_len int) (int, int) {
}

rep := re.prog[state.pc].rep
// println(rep)
// println("ist_quant_p rep: ${rep} rep_min: ${re.prog[state.pc].rep_min}")

// under range
if rep > 0 && rep < re.prog[state.pc].rep_min {
Expand Down
4 changes: 4 additions & 0 deletions vlib/regex/regex_test.v
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,10 @@ match_test_suite = [
TestItem{"ab.c", r"[^\s]*\.",0,3},
TestItem{"ab c", r"[\S]+\s",0,3},
TestItem{"ab c", r"[^\s]+\s",0,3},

// test last charr classes neg class
TestItem{"/a/", r"^/a/[^/]+$", -1,3},
TestItem{"/a/b",r"^/a/[^/]+$", 0,4},
]
)

Expand Down
Loading