From 86f0be1e57f09c9a00af4424ffe93336d1dc1528 Mon Sep 17 00:00:00 2001
From: "Denis Kuzmin [ GitHub/3F ]" <x-3F@outlook.com>
Date: Sat, 8 Aug 2020 15:54:14 +0300
Subject: [PATCH] Second-order Quantifiers. Added support for `++`

Updated tests.

Our subset of the regex quantifiers now is standardized as follow:

regex    | regXwild
---------|----------
.*       | *
.+       | +
.?       | ?
.{1}     | #
.{2}     | ##
.{2, }   | ++
.{0, 2}  | ??
---
 Readme.md                          | 44 +++++++++-----
 regXwild/core/ESS/AlgorithmEss.cpp | 27 ++++++++-
 regXwild/core/ESS/AlgorithmEss.h   |  6 +-
 regXwildTest/AlgorithmEssTest.cpp  | 25 +++++++-
 regXwildTest/EssRangesTest.cpp     | 92 ++++++++++++++++++++++++++++++
 5 files changed, 172 insertions(+), 22 deletions(-)

diff --git a/Readme.md b/Readme.md
index ecbdd9b..72738d0 100644
--- a/Readme.md
+++ b/Readme.md
@@ -1,15 +1,15 @@
 
 # [regXwild](https://github.com/3F/regXwild) 
 
-Small and super Fast advanced wildcards! `*`,`|`,`?`,`^`,`$`,`+`,`#`,`>` in addition to slow regex engine and more.
+Small and super Fast Advanced wildcards! `*`,`|`,`?`,`^`,`$`,`+`,`#`,`>` in addition to slow regex engines and more.
 
 Unique algorithms that was implemented on native unmanaged C++ but easily accessible also in .NET through **[Conari](https://github.com/3F/Conari)** (recommended due to caching of 0x29 opcodes and other related optimization).
 
 [![Build status](https://ci.appveyor.com/api/projects/status/8knio1ggle0o8ugh/branch/master?svg=true)](https://ci.appveyor.com/project/3Fs/regxwild-github/branch/master)
-[![release](https://img.shields.io/github/release/3F/regXwild.svg)](https://github.com/3F/regXwild/releases/latest)
-[![License](https://img.shields.io/badge/License-MIT-74A5C2.svg)](https://github.com/3F/regXwild/blob/master/LICENSE)
-[![NuGet package](https://img.shields.io/nuget/v/regXwild.svg)](https://www.nuget.org/packages/regXwild/)
-[![Tests](https://img.shields.io/appveyor/tests/3Fs/regxwild-github/master.svg)](https://ci.appveyor.com/project/3Fs/regxwild-github/build/tests)
+[![release](https://img.shields.io/github/v/release/3F/regXwild)](https://github.com/3F/regXwild/releases/latest)
+[![License](https://img.shields.io/badge/License-MIT-74A5C2)](https://github.com/3F/regXwild/blob/master/LICENSE)
+[![NuGet package](https://img.shields.io/nuget/v/regXwild)](https://www.nuget.org/packages/regXwild/)
+[![Tests](https://img.shields.io/appveyor/tests/3Fs/regxwild-github/master)](https://ci.appveyor.com/project/3Fs/regxwild-github/build/tests)
 
 [![Build history](https://buildstats.info/appveyor/chart/3Fs/regxwild-github?buildCount=20&includeBuildsFromPullRequest=true&showStats=true)](https://ci.appveyor.com/project/3Fs/regxwild-github/history)
 
@@ -30,7 +30,7 @@ Unique algorithms that was implemented on native unmanaged C++ but easily access
 
 It was designed to be faster than just fast, when using more features that usually go beyond the typical wildcards.
 
-🔍 Easy to start:
+### 🔍 Easy to start
 
 Unmanaged native C++ or managed .NET project. It doesn't matter, just use it:
 
@@ -44,7 +44,7 @@ if(searchEssC(_T("regXwild"), _T("reg?wild"), true)) {
 }
 ```
 
-C# if you're using [ [Conari](https://github.com/3F/Conari) ]
+C# if [ [Conari](https://github.com/3F/Conari) ]
 ```csharp
 using(var l = new ConariL("regXwild.dll")) 
 {
@@ -54,7 +54,7 @@ using(var l = new ConariL("regXwild.dll"))
 }
 ```
 
-🏄 Amazing meta symbols:
+### 🏄 Amazing meta symbols
 
 ESS version (advanced EXT version)
 
@@ -63,11 +63,11 @@ enum MetaSymbols
 {
     MS_ANY      = _T('*'), // {0, ~}
     MS_SPLIT    = _T('|'), // str1 or str2 or ...
-    MS_ONE      = _T('?'), // {0, 1}, ??? - {0, 3}, ...
+    MS_ONE      = _T('?'), // {0, 1}, ??? {0, 3}, ...
     MS_BEGIN    = _T('^'), // [str... or [str1... |[str2...
     MS_END      = _T('$'), // ...str] or ...str1]| ...str2]
-    MS_MORE     = _T('+'), // {1, ~}
-    MS_SINGLE   = _T('#'), // {1}
+    MS_MORE     = _T('+'), // {1, ~}, +++ {3, ~}, ...
+    MS_SINGLE   = _T('#'), // {1}, ## {2}, ### {3}, ...
     MS_ANYSP    = _T('>'), // as [^/]*
 };
 ```
@@ -84,14 +84,28 @@ enum MetaSymbols
 };
 ```
 
-Check it with our actual **Unit-Tests**.
+🧮 Quantifiers
 
-🚀 Awesome speed: 
+regex    | regXwild
+---------|----------
+.*       | *
+.+       | +
+.?       | ?
+.{1}     | #
+.{2}     | ##
+.{2, }   | ++
+.{0, 2}  | ??
+
+and similar ...
+
+Play with our actual **Unit-Tests**.
+
+### 🚀 Awesome speed
 
 * [~2000 times faster when C++](#speed).
 * For .NET (including modern .NET Core), [Conari](https://github.com/3F/Conari) provides optional caching of 0x29 opcodes (Calli) and more to get a similar result as possible.
 
-🍰 Open and Free:
+### 🍰 Open and Free
 
 Open Source project; MIT License, Enjoy 🎉
 
@@ -114,7 +128,7 @@ We're waiting for your awesome contributions!
 Please note:
 
 * **+icase** means ignore case sensitivity when matching the filter(pattern) within the searched string, i.e. `ignoreCase = true`. **Without** this, everything **will be much faster** of course. *That is, icase always adds complexity.*
-* Commonly **MultiByte** will be faster than **Unicode** (for the same platform and the same way of module use) but it depends on specific architecture and can be about ~2 times faster when native C++, and about ~4 times faster when .NET + Conari and related.
+* Below, **MultiByte** can be faster than **Unicode** (for the same platform and the same way of module use) but it depends on specific architecture and can be about ~2 times faster when native C++, and about ~4 times faster when .NET + Conari and related.
 * The results below can be different on different machines. You need only look at the difference (in milliseconds) between algorithms for a specific target.
 * To calculate the data, as in the table below, you need execute `algo.exe`
 
diff --git a/regXwild/core/ESS/AlgorithmEss.cpp b/regXwild/core/ESS/AlgorithmEss.cpp
index e88a709..52474e7 100644
--- a/regXwild/core/ESS/AlgorithmEss.cpp
+++ b/regXwild/core/ESS/AlgorithmEss.cpp
@@ -117,9 +117,11 @@ bool AlgorithmEss::search(const tstring& text, const tstring& filter, bool ignor
                 if(rewindToNextBlock(it)){ continue; } return false;
             }
 
-            // Sequential combinations of characters SINGLE & ONE
-            if((item.mask.curr & SINGLE && item.mask.prev & SINGLE) || 
-                (item.mask.curr & ONE && item.mask.prev & ONE)){
+            // Sequential combinations of #, ?, +
+            if((item.mask.curr & SINGLE && item.mask.prev & SINGLE) 
+                || (item.mask.curr & ONE && item.mask.prev & ONE)
+                || (item.mask.curr & MORE && item.mask.prev & MORE))
+            {
                     ++item.overlay;
             }
             else{ item.overlay = 0; }
@@ -261,6 +263,25 @@ udiff_t AlgorithmEss::interval()
         return words.found;
     }
 
+    // "+"
+    if(item.mask.prev & MORE)
+    {
+        udiff_t len     = item.prev.length();
+        diff_t lPosMax  = words.found - len;
+        diff_t plim     = words.found - words.left;
+        diff_t lPos     = lPosMax - plim - 1;
+
+        if(item.overlay > plim) { // When filter ++++ (4 or more) is more than origin data.
+            return tstring::npos;
+        }
+
+        if(_text.substr(lPos, len).compare(item.prev) == 0) {
+            return words.found;
+        }
+
+        return tstring::npos;
+    }
+
     // "?"
     if(item.mask.prev & ONE && (words.found - words.left) > 1)
     {
diff --git a/regXwild/core/ESS/AlgorithmEss.h b/regXwild/core/ESS/AlgorithmEss.h
index fad0c8f..cf1d34a 100644
--- a/regXwild/core/ESS/AlgorithmEss.h
+++ b/regXwild/core/ESS/AlgorithmEss.h
@@ -53,11 +53,11 @@ namespace net { namespace r_eg { namespace regXwild { namespace core { namespace
         {
             MS_ANY      = _T('*'), // {0, ~}
             MS_SPLIT    = _T('|'), // str1 or str2 or ...
-            MS_ONE      = _T('?'), // {0, 1}, ??? - {0, 3}, ...
+            MS_ONE      = _T('?'), // {0, 1}, ??? {0, 3}, ...
             MS_BEGIN    = _T('^'), // [str... or [str1... |[str2...
             MS_END      = _T('$'), // ...str] or ...str1]| ...str2]
-            MS_MORE     = _T('+'), // {1, ~}
-            MS_SINGLE   = _T('#'), // {1}
+            MS_MORE     = _T('+'), // {1, ~}, +++ {3, ~}, ...
+            MS_SINGLE   = _T('#'), // {1}, ## {2}, ### {3}, ...
             MS_ANYSP    = _T('>'), // as [^/]*  //TODO: >\>/ i.e. '>' + {symbol}
         };
 
diff --git a/regXwildTest/AlgorithmEssTest.cpp b/regXwildTest/AlgorithmEssTest.cpp
index 72819de..01bca25 100644
--- a/regXwildTest/AlgorithmEssTest.cpp
+++ b/regXwildTest/AlgorithmEssTest.cpp
@@ -351,7 +351,7 @@ namespace regXwildTest
             Assert::AreEqual(true, searchEss(data, _T("new++systems")));
             Assert::AreEqual(true, searchEss(data, _T("+systems")));
             Assert::AreEqual(true, searchEss(data, _T("project+12")));
-            Assert::AreEqual(true, searchEss(data, _T("project++12")));
+            Assert::AreEqual(false, searchEss(data, _T("project++12")));
             Assert::AreEqual(true, searchEss(data, _T("75+*systems")));
             Assert::AreEqual(true, searchEss(data, _T("75*+*systems")));
             Assert::AreEqual(true, searchEss(data, _T("new+7+system")));
@@ -412,6 +412,26 @@ namespace regXwildTest
             }
         }
 
+        TEST_METHOD(filterMoreTest6)
+        {
+            tstring data = _T("new project20+ 10-pro data");
+
+            Assert::AreEqual(true, searchEss(data, _T("++")));
+            Assert::AreEqual(true, searchEss(data, _T("+++")));
+            Assert::AreEqual(true, searchEss(data, _T("++++")));
+            Assert::AreEqual(true, searchEss(data, _T("++++proj")));
+
+            Assert::AreEqual(false, searchEss(data, _T("+++++proj")));
+
+            Assert::AreEqual(false, searchEss(data, _T("project+20")));
+            Assert::AreEqual(true, searchEss(data, _T("project+10")));
+            Assert::AreEqual(true, searchEss(data, _T("project++10")));
+            Assert::AreEqual(true, searchEss(data, _T("project+++10")));
+            Assert::AreEqual(true, searchEss(data, _T("project++++10")));
+            Assert::AreEqual(false, searchEss(data, _T("project+++++10")));
+            Assert::AreEqual(false, searchEss(data, _T("project++++++10")));
+        }
+
         TEST_METHOD(filterBeginTest1)
         {
             tstring data = _T("new tes;ted project-12, and 75_protection of various systems");
@@ -1007,6 +1027,9 @@ namespace regXwildTest
             Assert::IsTrue(searchEss(data, _T("1_of")));
             Assert::IsTrue(searchEss(data, _T("[1??_of")));
             Assert::IsTrue(searchEss(data, _T("[1???_of")));
+            Assert::IsTrue(searchEss(data, _T("[1+_of")));
+            Assert::IsFalse(searchEss(data, _T("[1++_of")));
+            Assert::IsFalse(searchEss(data, _T("[1+++_of")));
         }
 
         TEST_METHOD(underscoreTest2)
diff --git a/regXwildTest/EssRangesTest.cpp b/regXwildTest/EssRangesTest.cpp
index 269e4ed..2770852 100644
--- a/regXwildTest/EssRangesTest.cpp
+++ b/regXwildTest/EssRangesTest.cpp
@@ -113,6 +113,52 @@ namespace regXwildTest
             Assert::IsFalse(searchEss(_T("number = '12345';"), filter));
         }
 
+        TEST_METHOD(rangeAtMoreTest1)
+        {
+            tstring data = _T("number = '123';");
+
+            Assert::IsTrue(searchEss(data, _T("number = '+++';")));
+            Assert::IsTrue(searchEss(data, _T("number = '++';")));
+            Assert::IsTrue(searchEss(data, _T("number = '+';")));
+            Assert::IsFalse(searchEss(data, _T("number = '++++';")));
+            Assert::IsFalse(searchEss(data, _T("number = '+++++';")));
+        }
+
+        TEST_METHOD(rangeAtMoreTest2)
+        {
+            tstring filter = _T("number = '+++';");
+
+            // +++ means 3 or more
+            Assert::IsFalse(searchEss(_T("number = '';"), filter));
+            Assert::IsFalse(searchEss(_T("number = '1';"), filter));
+            Assert::IsFalse(searchEss(_T("number = '12';"), filter));
+            Assert::IsTrue(searchEss(_T("number = '123';"), filter));
+            Assert::IsTrue(searchEss(_T("number = '1234';"), filter));
+        }
+
+        TEST_METHOD(rangeAtMoreTest3)
+        {
+            tstring data = _T("number = '123';");
+
+            Assert::IsTrue(searchEss(data, _T("ber = '+++';")));
+            Assert::IsTrue(searchEss(data, _T("ber = '++';")));
+            Assert::IsTrue(searchEss(data, _T("ber = '+';")));
+            Assert::IsFalse(searchEss(data, _T("ber = '++++';")));
+            Assert::IsFalse(searchEss(data, _T("ber = '+++++';")));
+        }
+
+        TEST_METHOD(rangeAtMoreTest4)
+        {
+            tstring filter = _T("ber = '+++';");
+
+            // +++ means 3 or more
+            Assert::IsFalse(searchEss(_T("number = '';"), filter));
+            Assert::IsFalse(searchEss(_T("number = '1';"), filter));
+            Assert::IsFalse(searchEss(_T("number = '12';"), filter));
+            Assert::IsTrue(searchEss(_T("number = '123';"), filter));
+            Assert::IsTrue(searchEss(_T("number = '1234';"), filter));
+        }
+
         TEST_METHOD(rangeAtAnyTest1)
         {
             tstring data = _T("number = '123';");
@@ -451,6 +497,52 @@ namespace regXwildTest
             Assert::IsFalse(searchEss(_T("number = '12345';"), filter));
         }
 
+        TEST_METHOD(limRangeAtMoreTest1)
+        {
+            tstring data = _T("number = '123';");
+
+            Assert::IsTrue(searchEss(data, _T("number = '+++'")));
+            Assert::IsTrue(searchEss(data, _T("number = '++'")));
+            Assert::IsTrue(searchEss(data, _T("number = '+'")));
+            Assert::IsFalse(searchEss(data, _T("number = '++++'")));
+            Assert::IsFalse(searchEss(data, _T("number = '+++++'")));
+        }
+
+        TEST_METHOD(limRangeAtMoreTest2)
+        {
+            tstring filter = _T("number = '+++'");
+
+            // +++ means 3 or more
+            Assert::IsFalse(searchEss(_T("number = '';"), filter));
+            Assert::IsFalse(searchEss(_T("number = '1';"), filter));
+            Assert::IsFalse(searchEss(_T("number = '12';"), filter));
+            Assert::IsTrue(searchEss(_T("number = '123';"), filter));
+            Assert::IsTrue(searchEss(_T("number = '1234';"), filter));
+        }
+
+        TEST_METHOD(limRangeAtMoreTest3)
+        {
+            tstring data = _T("number = '123';");
+
+            Assert::IsTrue(searchEss(data, _T("ber = '+++'")));
+            Assert::IsTrue(searchEss(data, _T("ber = '++'")));
+            Assert::IsTrue(searchEss(data, _T("ber = '+'")));
+            Assert::IsFalse(searchEss(data, _T("ber = '++++'")));
+            Assert::IsFalse(searchEss(data, _T("ber = '+++++'")));
+        }
+
+        TEST_METHOD(limRangeAtMoreTest4)
+        {
+            tstring filter = _T("ber = '+++'");
+
+            // +++ means 3 or more
+            Assert::IsFalse(searchEss(_T("number = '';"), filter));
+            Assert::IsFalse(searchEss(_T("number = '1';"), filter));
+            Assert::IsFalse(searchEss(_T("number = '12';"), filter));
+            Assert::IsTrue(searchEss(_T("number = '123';"), filter));
+            Assert::IsTrue(searchEss(_T("number = '1234';"), filter));
+        }
+
         TEST_METHOD(limRangeAtAnyTest1)
         {
             tstring data = _T("number = '123';");