Skip to content

Commit

Permalink
Remove experimental regex syntax, see PR #722.
Browse files Browse the repository at this point in the history
The regex code to find words in current document is actually much slow than
plain find. the removal makes it possible to use external regex engine.
  • Loading branch information
zufuliu committed Oct 6, 2023
1 parent 4c0f58d commit 631662f
Show file tree
Hide file tree
Showing 17 changed files with 38 additions and 170 deletions.
7 changes: 1 addition & 6 deletions locale/de/Notepad2.rc
Original file line number Diff line number Diff line change
Expand Up @@ -1764,12 +1764,7 @@ $\tEnd of a line\n\
*? or +?\tNon-greedy matching of quantifiers ""?"" and ""+""\n\
(\tStart of a region\n\
)\tEnd of a region\n\
\\n\tRefers to a region when replacing (n is 1-9)\n\n\
Experimental Syntax:\n\
\\h\tStart of a word, according to IsWordStartAt()\n\
\\H\tEnd of a word, according to IsWordEndAt()\n\
\\i\tMatches 1 or more characters to the end of a word\n\
\\i?\tMatches 0 or more characters to the end of a word"
\\n\tRefers to a region when replacing (n is 1-9)"

IDS_WILDCARDHELP "Wildcard Search\n\n\
*\tMatches zero or more characters.\n\
Expand Down
7 changes: 1 addition & 6 deletions locale/fr-FR/Notepad2.rc
Original file line number Diff line number Diff line change
Expand Up @@ -1764,12 +1764,7 @@ $\tfind e ligne\n\
*? or +?\tNon-greedy matching of quantifiers ""?"" and ""+""\n\
(\tDébut d'une région\n\
)\tFin d'une région\n\
\\n\tSe réfère à une région lors du remplacement (avec n étant compris entre 1 et 9)\n\n\
Syntaxe expérimentale :\n\
\\h\tDébut d'un mot, en accord avec IsWordStartAt()\n\
\\H\tFin d'un mot, en accord avec IsWordEndAt()\n\
\\i\tTrouvera 1 caractère ou plus jusqu'à la fin du mot\n\
\\i?\tTrouvera 0 caractère ou plus jusqu'à la fin du mot"
\\n\tSe réfère à une région lors du remplacement (avec n étant compris entre 1 et 9)"

IDS_WILDCARDHELP "Jocker de recherche\n\n\
*\tTrouvera zéro ou plus de caractères.\n\
Expand Down
7 changes: 1 addition & 6 deletions locale/it/Notepad2.rc
Original file line number Diff line number Diff line change
Expand Up @@ -1764,12 +1764,7 @@ $\tFine di una linea\n\
*? o +?\tCorrispondenza inaccurata di quantificatori ""?"" e ""+""\n\
(\tInizio di un'area\n\
)\tFine di un'area\n\
\\n\tRiferimento a un'area quando sostituita (n è 1-9)\n\n\
Sintassi sperimentale:\n\
\\h\tInizio di una parola, secondo IsWordStartAt()\n\
\\H\tFine di una parola, secondo IsWordEndAt()\n\
\\i\tCorrisponde a 1 o più caratteri alla fine di una \t\tparola\n\
\\i?\tCorrisponde a 0 o più caratteri alla fine di una \t\tparola"
\\n\tRiferimento a un'area quando sostituita (n è 1-9)"

IDS_WILDCARDHELP "Ricerca con caratteri Jolly\n\n\
*\tCorrisponde a zero o più caratteri.\n\
Expand Down
7 changes: 1 addition & 6 deletions locale/ja/Notepad2.rc
Original file line number Diff line number Diff line change
Expand Up @@ -1764,12 +1764,7 @@ $\t行の末尾\n\
*? か +?\t上記記号の ""?"" と ""+"" を最長合致ではなくする\n\
(\t範囲指定の括弧開始\n\
)\t範囲指定の閉じ括弧\n\
\\n\t置換時に対応括弧内の文字列を呼出 (n は 1-9)\n\n\
試験段階の構文:\n\
\\h\tIsWordStartAt() による単語の開始\n\
\\H\tIsWordEndAt() による単語の終わり\n\
\\i\t単語の終わりまで1文字以上に合致\n\
\\i?\t同 0文字以上に合致"
\\n\t置換時に対応括弧内の文字列を呼出 (n は 1-9)"

IDS_WILDCARDHELP "ワイルドカード検索\n\n\
*\t0文字以上の文字列に合致\n\
Expand Down
7 changes: 1 addition & 6 deletions locale/ko/Notepad2.rc
Original file line number Diff line number Diff line change
Expand Up @@ -1764,12 +1764,7 @@ $\t줄의 끝\n\
*? or +?\t""?"" 및 ""+""의 게으른 수량자\n\
(\t그룹의 시작\n\
)\t그룹의 끝\n\
\\n\t교체시 영역을 나타냄 (n은 1-9)\n\n\
시험적 구문:\n\
\\h\tIsWordStartAt()에 따른 단어 시작\n\
\\H\tIsWordEndAt()에 따른 단어의 끝\n\
\\i\t단어의 끝까지 1개 이상의 문자를 찾습니다\n\
\\i?\t단어의 끝까지 0개 이상의 문자를 찾습니다"
\\n\t교체시 영역을 나타냄 (n은 1-9)"

IDS_WILDCARDHELP "와일드카드 검색\n\n\
*\t0개 이상의 문자와 일치합니다.\n\
Expand Down
7 changes: 1 addition & 6 deletions locale/pt-BR/Notepad2.rc
Original file line number Diff line number Diff line change
Expand Up @@ -1764,12 +1764,7 @@ $\tEnd of a line\n\
*? or +?\tNon-greedy matching of quantifiers ""?"" and ""+""\n\
(\tStart of a region\n\
)\tEnd of a region\n\
\\n\tRefers to a region when replacing (n is 1-9)\n\n\
Experimental Syntax:\n\
\\h\tStart of a word, according to IsWordStartAt()\n\
\\H\tEnd of a word, according to IsWordEndAt()\n\
\\i\tMatches 1 or more characters to the end of a word\n\
\\i?\tMatches 0 or more characters to the end of a word"
\\n\tRefers to a region when replacing (n is 1-9)"

IDS_WILDCARDHELP "Wildcard Search\n\n\
*\tMatches zero or more characters.\n\
Expand Down
7 changes: 1 addition & 6 deletions locale/zh-Hans/Notepad2.rc
Original file line number Diff line number Diff line change
Expand Up @@ -1764,12 +1764,7 @@ $\t行结束\n\
*? 或 +?\t对限量词 ""?"" 和 ""+"" 做非贪婪匹配\n\
(\t区域开始\n\
)\t区域结束\n\
\\n\t替换时引用的区域(n 为 1-9)\n\n\
实验性语法:\n\
\\h\t单词开始,根据 IsWordStartAt()\n\
\\H\t单词结束,根据 IsWordEndAt()\n\
\\i\t匹配 1 个或多个字符到单词结束\n\
\\i?\t匹配 0 个或多个字符到单词结束"
\\n\t替换时引用的区域(n 为 1-9)"

IDS_WILDCARDHELP "通配符搜索\n\n\
*\t匹配 0 个或多个字符\n\
Expand Down
7 changes: 1 addition & 6 deletions locale/zh-Hant/Notepad2.rc
Original file line number Diff line number Diff line change
Expand Up @@ -1764,12 +1764,7 @@ $\t行結束。\n\
*? 或 +?\t對限量詞 ""?"" 和 ""+"" 做非貪婪比對。\n\
(\t區域開始。\n\
)\t區域結束。\n\
\\n\t取代時參考的區域(n 為 1-9)。\n\n\
實驗性語法:\n\
\\h\t單詞開始,根據 IsWordStartAt()。\n\
\\H\t單詞結束,根據 IsWordEndAt()。\n\
\\i\t符合 1 個或多個字元到單詞結束。\n\
\\i?\t符合 0 個或多個字元到單詞結束。"
\\n\t取代時參考的區域(n 為 1-9)。"

IDS_WILDCARDHELP "通配符搜尋\n\n\
*\t符合 0 個或多個字元。\n\
Expand Down
1 change: 1 addition & 0 deletions scintilla/include/Scintilla.h
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,7 @@ typedef sptr_t (*SciFnDirectStatus)(sptr_t ptr, unsigned int iMessage, uptr_t wP
#define SCFIND_NONE 0x0
#define SCFIND_WHOLEWORD 0x2
#define SCFIND_MATCHCASE 0x4
#define SCFIND_MATCH_TO_WORD_END 0x8
#define SCFIND_WORDSTART 0x00100000
#define SCFIND_REGEXP 0x00200000
#define SCFIND_POSIX 0x00400000
Expand Down
1 change: 1 addition & 0 deletions scintilla/include/Scintilla.iface
Original file line number Diff line number Diff line change
Expand Up @@ -1259,6 +1259,7 @@ enu FindOption=SCFIND_
val SCFIND_NONE=0x0
val SCFIND_WHOLEWORD=0x2
val SCFIND_MATCHCASE=0x4
val SCFIND_MATCH_TO_WORD_END=0x8
val SCFIND_WORDSTART=0x00100000
val SCFIND_REGEXP=0x00200000
val SCFIND_POSIX=0x00400000
Expand Down
1 change: 1 addition & 0 deletions scintilla/include/ScintillaTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,7 @@ enum class FindOption {
None = 0x0,
WholeWord = 0x2,
MatchCase = 0x4,
MatchToWordEnd = 0x8,
WordStart = 0x00100000,
RegExp = 0x00200000,
Posix = 0x00400000,
Expand Down
29 changes: 5 additions & 24 deletions scintilla/src/Document.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -2951,16 +2951,12 @@ class BuiltinRegex final : public RegexSearchBase {

const char *SubstituteByPosition(Document *doc, const char *text, Sci::Position *length) override;

#ifdef NO_CXX11_REGEX
void ClearCache() noexcept override {
search.ClearCache();
}
#endif

private:
#ifdef NO_CXX11_REGEX
RESearch search;
#endif
std::string substituted;
};

Expand Down Expand Up @@ -3020,25 +3016,9 @@ class DocumentIndexer final : public CharacterIndexer {
return '\0';
}

bool IsWordStartAt(Sci::Position pos) const noexcept override {
return pdoc->IsWordStartAt(pos);
}

bool IsWordEndAt(Sci::Position pos) const noexcept override {
return pdoc->IsWordEndAt(pos);
}

Sci::Position MovePositionOutsideChar(Sci::Position pos, Sci::Position moveDir) const noexcept override {
return pdoc->MovePositionOutsideChar(pos, moveDir, true);
}

Sci::Position NextPosition(Sci::Position pos, int moveDir) const noexcept override {
return pdoc->NextPosition(pos, moveDir);
}

Sci::Position ExtendWordSelect(Sci::Position pos, int delta) const noexcept override {
return pdoc->ExtendWordSelect(pos, delta, true);
}
};

#ifndef NO_CXX11_REGEX
Expand Down Expand Up @@ -3365,11 +3345,13 @@ Sci::Position Cxx11RegexFindText(const Document *doc, Sci::Position minPos, Sci:
#endif // NO_CXX11_REGEX

Sci::Position BuiltinRegex::FindText(const Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s,
bool caseSensitive, [[maybe_unused]] FindOption flags, Sci::Position *length) {
bool caseSensitive, FindOption flags, Sci::Position *length) {

#ifndef NO_CXX11_REGEX
return Cxx11RegexFindText(doc, minPos, maxPos, s, caseSensitive, length, search);
#else
if (FlagSet(flags, FindOption::Cxx11RegEx)) {
return Cxx11RegexFindText(doc, minPos, maxPos, s, caseSensitive, length, search);
}
#endif

const RESearchRange resr(doc, minPos, maxPos);

Expand Down Expand Up @@ -3442,7 +3424,6 @@ Sci::Position BuiltinRegex::FindText(const Document *doc, Sci::Position minPos,
}
*length = lenRet;
return pos;
#endif // NO_CXX11_REGEX
}

const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, Sci::Position *length) {
Expand Down
6 changes: 5 additions & 1 deletion scintilla/src/Editor.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -4181,8 +4181,12 @@ Sci::Position Editor::FindTextFull(
static_cast<FindOption>(wParam),
&lengthFound);
if (pos >= 0) {
Sci::Position endPos = pos + lengthFound;
if (wParam & static_cast<int>(FindOption::MatchToWordEnd)) {
endPos = pdoc->ExtendWordSelect(endPos, 1, true);
}
ft->chrgText.cpMin = pos;
ft->chrgText.cpMax = pos + lengthFound;
ft->chrgText.cpMax = endPos;
}
return pos;
} catch (const RegexError &) {
Expand Down
69 changes: 6 additions & 63 deletions scintilla/src/RESearch.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
/** @file RESearch.cxx
** Regular expression search library.
**/
#if !defined(SCI_OWNREGEX) && defined(NO_CXX11_REGEX)
#ifndef SCI_OWNREGEX

/*
* regex - Regular expression pattern matching and replacement
Expand Down Expand Up @@ -238,12 +238,6 @@ using namespace Scintilla::Internal;
#define CLQ 12 /* 0 to 1 closure */
#define LCLO 13 /* lazy closure */

// experimental
#define EXP_MATCH_WORD_START 14
#define EXP_MATCH_WORD_END 15
#define EXP_MATCH_TO_WORD_END 16
#define EXP_MATCH_TO_WORD_END_OPT 17

#define END 0

/*
Expand Down Expand Up @@ -640,11 +634,6 @@ const char *RESearch::DoCompile(const char *pattern, Sci::Position length, bool
break;
}

if (*p == '?' && *lp == EXP_MATCH_TO_WORD_END) {
*lp = EXP_MATCH_TO_WORD_END_OPT;
break;
}

if (*p == '+') {
for (sp = mp; lp < sp; lp++) {
*mp++ = *lp;
Expand Down Expand Up @@ -675,17 +664,6 @@ const char *RESearch::DoCompile(const char *pattern, Sci::Position length, bool
return badpat("Null pattern inside \\<\\>");
*mp++ = EOW;
break;
case 'h':
*mp++ = EXP_MATCH_WORD_START;
break;
case 'H':
if (*sp == EXP_MATCH_WORD_START)
return badpat("Null pattern inside \\h\\H");
*mp++ = EXP_MATCH_WORD_END;
break;
case 'i':
*mp++ = EXP_MATCH_TO_WORD_END;
break;
case '1':
case '2':
case '3':
Expand Down Expand Up @@ -841,11 +819,10 @@ int RESearch::Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Positio
}
default: /* regular matching all the way. */
while (lp < endp) {
Sci::Position offset = 1;
ep = PMatch(ci, lp, endp, ap, 1, &offset);
ep = PMatch(ci, lp, endp, ap);
if (ep != NOTFOUND)
break;
lp += offset;
lp++;
}
break;
case END: /* munged automaton. fail always */
Expand Down Expand Up @@ -898,7 +875,7 @@ int RESearch::Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Positio
#define CHRSKIP 3 /* [CLO] CHR chr END */
#define CCLSKIP 34 /* [CLO] CCL 32 bytes END */

Sci::Position RESearch::PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci::Position endp, char *ap, int moveDir, Sci::Position *offset) {
Sci::Position RESearch::PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci::Position endp, char *ap) {
uint8_t op;

while ((op = *ap++) != END) {
Expand Down Expand Up @@ -943,39 +920,6 @@ Sci::Position RESearch::PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci
if (lp == bol || !iswordc(ci.CharAt(lp - 1)) || iswordc(ci.CharAt(lp)))
return NOTFOUND;
break;
case EXP_MATCH_WORD_START:
if (!ci.IsWordStartAt(lp)) {
if (offset) {
Sci::Position e = ci.MovePositionOutsideChar(lp, moveDir);
e = (e == lp) ? ci.NextPosition(lp, moveDir) - lp : e - lp;
*offset = (e == 0) ? moveDir : e;
}
return NOTFOUND;
}
break;
case EXP_MATCH_WORD_END:
if (lp == bol || !ci.IsWordEndAt(lp)) {
if (offset) {
Sci::Position e = ci.MovePositionOutsideChar(lp, moveDir);
e = (e == lp) ? ci.NextPosition(lp, moveDir) - lp : e - lp;
*offset = (e == 0) ? moveDir : e;
}
return NOTFOUND;
}
break;
case EXP_MATCH_TO_WORD_END:
case EXP_MATCH_TO_WORD_END_OPT: {
Sci::Position e = ci.ExtendWordSelect(lp, moveDir);
if ((e == lp && op != EXP_MATCH_TO_WORD_END_OPT) || !ci.IsWordEndAt(e)) {
if (offset) {
e = (e == lp) ? ci.NextPosition(lp, moveDir) - lp : e - lp;
*offset = (e == 0) ? moveDir : e;
}
return NOTFOUND;
}
lp = e;
}
break;
case REF: {
const int n = static_cast<uint8_t>(*ap++);
Sci::Position bp = bopat[n]; /* beginning of subpat... */
Expand Down Expand Up @@ -1029,15 +973,14 @@ Sci::Position RESearch::PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci
Sci::Position llp = lp; /* lazy lp for LCLO */
Sci::Position e = NOTFOUND; /* extra pointer for CLO */
while (llp >= are) {
Sci::Position qoff = -1;
const Sci::Position q = PMatch(ci, llp, endp, ap, -1, &qoff);
const Sci::Position q = PMatch(ci, llp, endp, ap);
if (q != NOTFOUND) {
e = q;
lp = llp;
if (op != LCLO) return e;
}
if (*ap == END) return e;
llp += qoff;
--llp;
}
if (*ap == EOT)
PMatch(ci, lp, endp, ap);
Expand Down
8 changes: 2 additions & 6 deletions scintilla/src/RESearch.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,14 @@
// Based on the work of Ozan S. Yigit.
// This file is in the public domain.
#pragma once
#if !defined(SCI_OWNREGEX) && defined(NO_CXX11_REGEX)
#ifndef SCI_OWNREGEX

namespace Scintilla::Internal {

class CharacterIndexer {
public:
virtual char CharAt(Sci::Position index) const noexcept = 0;
virtual bool IsWordStartAt(Sci::Position pos) const noexcept = 0;
virtual bool IsWordEndAt(Sci::Position pos) const noexcept = 0;
virtual Sci::Position MovePositionOutsideChar(Sci::Position pos, Sci::Position moveDir) const noexcept = 0;
virtual Sci::Position NextPosition(Sci::Position pos, int moveDir) const noexcept = 0;
virtual Sci::Position ExtendWordSelect(Sci::Position pos, int delta) const noexcept = 0;
};

class RESearch {
Expand Down Expand Up @@ -49,7 +45,7 @@ class RESearch {
int GetBackslashExpression(const char *pattern, int &incr) noexcept;

const char *DoCompile(const char *pattern, Sci::Position length, bool caseSensitive, bool posix) noexcept;
Sci::Position PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci::Position endp, char *ap, int moveDir = 1, Sci::Position *offset = nullptr);
Sci::Position PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci::Position endp, char *ap);

Sci::Position bol;
Sci::Position tagstk[MAXTAG]; /* subpat tag stack */
Expand Down
Loading

0 comments on commit 631662f

Please sign in to comment.