Skip to content

Commit

Permalink
Merge branch 'exp'
Browse files Browse the repository at this point in the history
  • Loading branch information
ShinobuAmasaki committed Aug 24, 2024
2 parents 223d62d + e826881 commit 497c281
Show file tree
Hide file tree
Showing 6 changed files with 245 additions and 225 deletions.
43 changes: 28 additions & 15 deletions src/api_internal_m.f90
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,9 @@ pure subroutine do_matching_including (automaton, string, from, to, prefix, suff

do_brute_force = .false.
runs_engine = .false.
str = string

str = char(0)//string//char(0)

from = 0
to = 0
do_brute_force = prefix == ''
Expand All @@ -63,7 +65,7 @@ pure subroutine do_matching_including (automaton, string, from, to, prefix, suff
error stop "DFA have not been initialized."
end if

if (string == char(0)//char(0)) then
if (len(string) <= 1 .and. string == '') then
if (automaton%dfa%nodes(cur_i)%accepted) then
from = ACCEPTED_EMPTY
to = ACCEPTED_EMPTY
Expand All @@ -72,7 +74,7 @@ pure subroutine do_matching_including (automaton, string, from, to, prefix, suff
end if

if (.not. do_brute_force) then
call get_index_list_forward(string, prefix, suffix, index_list)
call get_index_list_forward(str, prefix, suffix, index_list)
if (.not. allocated(index_list)) return
if (index_list(1) == INVALID_CHAR_INDEX) then
do_brute_force = .true.
Expand Down Expand Up @@ -123,16 +125,21 @@ pure subroutine do_matching_including (automaton, string, from, to, prefix, suff

next_ci = idxutf8(str, ci) + 1

call automaton%construct(cur_i, dst_i, string(ci:next_ci-1))
call automaton%construct(cur_i, dst_i, str(ci:next_ci-1))

cur_i = dst_i
ci = next_ci
end do

! Update match position if a match is found.
if (max_match > 1) then
from = start
to = max_match - 1
if (max_match > 0) then
from = start-1
if (from == 0) from = 1 ! handle leading NULL character.
if (max_match >= len(str)) then
to = len(string)
else
to = max_match-2
end if
return
end if

Expand Down Expand Up @@ -168,6 +175,7 @@ pure subroutine do_matching_exactly(automaton, string, res, prefix, suffix, runs
integer :: ci ! character index
integer :: next_ci ! next character index
integer :: max_match !
character(:), allocatable :: str

integer :: len_pre, len_post, n
logical :: empty_pre, empty_post, matches_pre, matches_post
Expand All @@ -186,7 +194,7 @@ pure subroutine do_matching_exactly(automaton, string, res, prefix, suffix, runs
n = len(string)
matches_pre = .true.
matches_post = .true.

! Returns true immediately if the given prefix exactly matches the string.
if (len(string) > 0 .and. len(prefix) >0 ) then
if (prefix == string .and. len_pre == n) then
Expand All @@ -206,8 +214,6 @@ pure subroutine do_matching_exactly(automaton, string, res, prefix, suffix, runs
(empty_pre .and. empty_post), matches_pre])




if (.not. runs_engine) then
res = .false.
return
Expand All @@ -231,6 +237,7 @@ pure subroutine do_matching_exactly(automaton, string, res, prefix, suffix, runs
! Initialize counter variables.
max_match = 0
ci = 1
str = char(0)//string//char(0)

! Loop and proceed with matching unless the current index is DFA_INVALID_INDEX.
do while (cur_i /= DFA_INVALID_INDEX)
Expand All @@ -240,24 +247,30 @@ pure subroutine do_matching_exactly(automaton, string, res, prefix, suffix, runs
max_match = ci
end if

if (ci > len(string)) exit
if (ci > len(str)) exit

! Get the index of the next character and assign it to `next_ci`.
next_ci = idxutf8(string, ci) + 1
next_ci = idxutf8(str, ci) + 1

! Lazy evaluation is performed by calling this procedure here.
! The index of destination DFA node is stored in the `dst_i` variable.
call automaton%construct(cur_i, dst_i, string(ci:next_ci-1))
call automaton%construct(cur_i, dst_i, str(ci:next_ci-1))

! If there is mismatch in the first byte of the NULL character, try again with the second byte.
if (dst_i == DFA_INVALID_INDEX .and. ci == 1) then
ci = 2
next_ci = idxutf8(str, ci) + 1
call automaton%construct(cur_i, dst_i, str(ci:next_ci-1))
end if

! update counters
cur_i = dst_i
ci = next_ci

end do

! If the maximum index of the match is one larger than length of the string,
! this function returns true, otherwise it returns false.
if (max_match == len(string)+1) then
if (max_match >= len(string)+2) then
res = .true.
else
res = .false.
Expand Down
50 changes: 25 additions & 25 deletions src/forgex.F90
Original file line number Diff line number Diff line change
Expand Up @@ -90,32 +90,32 @@ pure elemental function operator__in(pattern, str) result(res)

prefix = get_prefix_literal(tree)
suffix = get_suffix_literal(tree)

call automaton%preprocess(tree)

! Initialize automaton with tree and root.
call automaton%init()

! Call the internal procedure to match string, and store the result in logical `res`.
call do_matching_including(automaton, char(0)//str//char(0), from, to, prefix, suffix, unused)
call do_matching_including(automaton, str, from, to, prefix, suffix, unused)
! キャレットとダラーへの対応するために、strの前後に改行文字を追加する。

if (from == ACCEPTED_EMPTY .and. to == ACCEPTED_EMPTY) then
res = .true.
return
end if

if (is_there_caret_at_the_top(pattern)) then
from = from
else
from = from -1
end if
! if (is_there_caret_at_the_top(pattern)) then
! from = from
! else
! from = from -1
! end if

if (is_there_dollar_at_the_end(pattern)) then
to = to - 2
else
to = to - 1
end if
! if (is_there_dollar_at_the_end(pattern)) then
! to = to - 2
! else
! to = to - 1
! end if

if (from > 0 .and. to > 0) then
res = .true.
Expand Down Expand Up @@ -204,7 +204,7 @@ pure subroutine subroutine__regex(pattern, text, res, length, from, to)
entirely_fixed_string = ''
from_l = INVALID_CHAR_INDEX
to_l = INVALID_CHAR_INDEX

buff = trim(pattern)

! call build_syntax_tree(buff, tape, tree, root)
Expand Down Expand Up @@ -234,7 +234,7 @@ pure subroutine subroutine__regex(pattern, text, res, length, from, to)
call automaton%preprocess(tree)
call automaton%init()

call do_matching_including(automaton, char(0)//text//char(0), from_l, to_l, prefix, suffix, unused)
call do_matching_including(automaton, text, from_l, to_l, prefix, suffix, unused)

if (from_l == ACCEPTED_EMPTY .and. to_l == ACCEPTED_EMPTY) then
res = ''
Expand All @@ -244,17 +244,17 @@ pure subroutine subroutine__regex(pattern, text, res, length, from, to)
return
end if

if (is_there_caret_at_the_top(pattern)) then
from_l = from_l
else
from_l = from_l - 1
end if

if (is_there_dollar_at_the_end(pattern)) then
to_l = to_l - 2
else
to_l = to_l - 1
end if
! if (is_there_caret_at_the_top(pattern)) then
! from_l = from_l
! else
! from_l = from_l - 1
! end if

! if (is_there_dollar_at_the_end(pattern)) then
! to_l = to_l - 2
! else
! to_l = to_l - 1
! end if


if (from_l > 0 .and. to_l > 0) then
Expand Down
47 changes: 31 additions & 16 deletions src/forgex_cli/cli_api_internal_no_opts_m.f90
Original file line number Diff line number Diff line change
Expand Up @@ -33,23 +33,24 @@ subroutine do_matching_including_no_literal_opts (automaton, string, from, to)

str = string
from = 0
to = 0
to = 0

str = char(0)//string//char(0)

cur_i = automaton%initial_index

if (cur_i == DFA_NOT_INIT) then
error stop "DFA have not been initialized."
end if

if (string == char(10)//char(10)) then
if (len(string) <= 1 .and. string == '') then
if (automaton%dfa%nodes(cur_i)%accepted) then
from = 1
to = 1
from = ACCEPTED_EMPTY
to = ACCEPTED_EMPTY
end if
return
end if


loop_init: block
i = 1
start = i
Expand All @@ -71,25 +72,30 @@ subroutine do_matching_including_no_literal_opts (automaton, string, from, to)

next_ci = idxutf8(str, ci) + 1

call automaton%construct(cur_i, dst_i, string(ci:next_ci-1))
call automaton%construct(cur_i, dst_i, str(ci:next_ci-1))

cur_i = dst_i
ci = next_ci
end do

! Update match position if a match is found.
if (max_match > 1) then
from = start
to = max_match - 1
if (max_match > 0) then
from = start-1
if (from == 0) from = 1 ! handle leading NULL character.
if (max_match >= len(str)) then
to = len(string)
else
to = max_match-2
end if
return
end if

start = idxutf8(str, start) + 1 ! Bruteforce searching
start = idxutf8(str, start) + 1 ! Bruteforce searching

end do
end subroutine do_matching_including_no_literal_opts


!> This subroutine is intended to be called from the `forgex_cli_find_m` module.
subroutine do_matching_exactly_no_literal_opts(automaton, string, res)
implicit none
Expand All @@ -101,6 +107,7 @@ subroutine do_matching_exactly_no_literal_opts(automaton, string, res)
integer :: ci ! character index
integer :: next_ci ! next character index
integer :: max_match !
character(:), allocatable :: str

! Initialize `cur_i` with automaton's initial index.
cur_i = automaton%initial_index
Expand All @@ -120,6 +127,7 @@ subroutine do_matching_exactly_no_literal_opts(automaton, string, res)
! Initialize counter variables.
max_match = 0
ci = 1
str = char(0)//string//char(0)

! Loop and proceed with matching unless the current index is DFA_INVALID_INDEX.
do while (cur_i /= DFA_INVALID_INDEX)
Expand All @@ -129,14 +137,21 @@ subroutine do_matching_exactly_no_literal_opts(automaton, string, res)
max_match = ci
end if

if (ci > len(string)) exit
if (ci > len(str)) exit

! Get the index of the next character and assign it to `next_ci`.
next_ci = idxutf8(string, ci) + 1
next_ci = idxutf8(str, ci) + 1

! Lazy evaluation is performed by calling this procedure here.
! The index of destination DFA node is stored in the `dst_i` variable.
call automaton%construct(cur_i, dst_i, string(ci:next_ci-1))
call automaton%construct(cur_i, dst_i, str(ci:next_ci-1))

! If there is mismatch in the first byte of the NULL character, try again with the second byte.
if (dst_i == DFA_INVALID_INDEX .and. ci == 1) then
ci = 2
next_ci = idxutf8(str, ci) + 1
call automaton%construct(cur_i, dst_i, str(ci:next_ci-1))
end if

! update counters
cur_i = dst_i
Expand All @@ -146,12 +161,12 @@ subroutine do_matching_exactly_no_literal_opts(automaton, string, res)

! If the maximum index of the match is one larger than length of the string,
! this function returns true, otherwise it returns false.
if (max_match == len(string)+1) then
if (max_match >= len(string)+2) then
res = .true.
else
res = .false.
end if
end subroutine do_matching_exactly_no_literal_opts


end module forgex_cli_api_internal_no_opts_m
Loading

0 comments on commit 497c281

Please sign in to comment.