diff --git a/.gitignore b/.gitignore
index 32682da8..77c55740 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
# Idea related directories and files
.idea
target/
-*.iml
\ No newline at end of file
+*.iml
+src/test/java/com/github/curiousoddman/rgxgen/CustomTests.java
diff --git a/README.md b/README.md
index a485058e..5e4ba878 100644
--- a/README.md
+++ b/README.md
@@ -15,13 +15,10 @@ Build status:
Follow the link to Online IDE with already created simple project: [JDoodle](https://www.jdoodle.com/a/1NCw)
-Note: 1.0 version is imported there - unfortunately currently I can't change it to latest (1.1).
-
-Luckly there is no big difference between 1.0 and 1.1
+> Note: 1.0 version is there.
Enter your pattern and see the results.
-
## Usage
### Maven dependency
@@ -56,24 +53,36 @@ Enter your pattern and see the results.
Changes:
-- Fixed: Top level alternatives without group now working properly: https://github.com/curious-odd-man/RgxGen/issues/31
+- Fixed: Top level alternatives without group now working properly: [#31](https://github.com/curious-odd-man/RgxGen/issues/31)
+- Fixed: Empty alternatives not always work: [#35](https://github.com/curious-odd-man/RgxGen/issues/35)
+- Feature: Now it is possible to generate strings that does not match a pattern (see usage below) [#36](https://github.com/curious-odd-man/RgxGen/issues/36)
### Code:
```java
-RgxGen rgxGen = new RgxGen("[^0-9]*[12]?[0-9]{1,2}[^0-9]*"); // Create generator
-String s = rgxGen.generate(); // Generate new random value
-BigInteger estimation = rgxGen.numUnique(); // The estimation (not accurate, see Limitations) how much unique values can be generated with that pattern.
-StringIterator uniqueStrings = rgxGen.iterateUnique(); // Iterate over unique values (not accurate, see Limitations)
+public class Main {
+ public static void main(String[] args){
+ RgxGen rgxGen = new RgxGen("[^0-9]*[12]?[0-9]{1,2}[^0-9]*"); // Create generator
+ String s = rgxGen.generate(); // Generate new random value
+ BigInteger estimation = rgxGen.numUnique(); // The estimation (not accurate, see Limitations) how much unique values can be generated with that pattern.
+ StringIterator uniqueStrings = rgxGen.iterateUnique(); // Iterate over unique values (not accurate, see Limitations)
+ String notMatching = rgxGen.generateNotMatching(); // Generate not matching string
+ }
+}
```
```java
-RgxGen rgxGen = new RgxGen("[^0-9]*[12]?[0-9]{1,2}[^0-9]*"); // Create generator
-Random rnd = new Random(1234)
-String s = rgxGen.generate(rnd); // Generate first value
-String s1 = rgxGen.generate(rnd); // Generate second value
-String s2 = rgxGen.generate(rnd); // Generate third value
-// On each launch s, s1 and s2 will be the same
+public class Main {
+ public static void main(String[] args){
+ RgxGen rgxGen = new RgxGen("[^0-9]*[12]?[0-9]{1,2}[^0-9]*"); // Create generator
+ Random rnd = new Random(1234)
+ String s = rgxGen.generate(rnd); // Generate first value
+ String s1 = rgxGen.generate(rnd); // Generate second value
+ String s2 = rgxGen.generate(rnd); // Generate third value
+ String notMatching = rgxGen.generateNotMatching(rnd); // Generate not matching string
+ // On each launch s, s1 and s2 will be the same
+ }
+}
```
## Supported syntax
@@ -131,6 +140,13 @@ On the contrast, when generating **unique values** - the number of maximum repet
Use `a{n,m}` if you require some specific number of repetitions.
It is suggested to avoid using such infinite patterns to generate data based on regex.
+### Not matching values generation
+
+The general rule is - I am trying to generate not matching strings of same length as would be matching strings, though it is not always possible.
+For example pattern `.` - any symbol - would yield empty string as not matching string.
+Another example `a{0,2}` - this pattern could yield empty string, but for not matching string the resulting strings would be only 1 or 2 symbols long.
+I chose these approaches because they seem predictible and easier to implement.
+
## Other tools to generate values by regex and why this might be better
There are 2 more libraries available to achieve same goal:
diff --git a/pom.xml b/pom.xml
index 4196ae44..c54586aa 100644
--- a/pom.xml
+++ b/pom.xml
@@ -21,8 +21,6 @@
4.12
1.21
- 1.7.29
- 2.13.3
3.8.1
@@ -257,13 +255,6 @@
-
-
- org.slf4j
- slf4j-api
- ${slf4j.version}
-
-
junit
@@ -271,24 +262,6 @@
${junit.version}
test
-
- org.apache.logging.log4j
- log4j-api
- ${log4j.version}
- test
-
-
- org.apache.logging.log4j
- log4j-core
- ${log4j.version}
- test
-
-
- org.apache.logging.log4j
- log4j-slf4j-impl
- ${log4j.version}
- test
-
org.openjdk.jmh
jmh-core
diff --git a/src/main/java/com/github/curiousoddman/rgxgen/RgxGen.java b/src/main/java/com/github/curiousoddman/rgxgen/RgxGen.java
index fbcd8ed6..80b4b4bb 100644
--- a/src/main/java/com/github/curiousoddman/rgxgen/RgxGen.java
+++ b/src/main/java/com/github/curiousoddman/rgxgen/RgxGen.java
@@ -18,6 +18,7 @@
import com.github.curiousoddman.rgxgen.generator.nodes.Node;
import com.github.curiousoddman.rgxgen.generator.visitors.GenerationVisitor;
+import com.github.curiousoddman.rgxgen.generator.visitors.NotMatchingGenerationVisitor;
import com.github.curiousoddman.rgxgen.generator.visitors.UniqueGenerationVisitor;
import com.github.curiousoddman.rgxgen.generator.visitors.UniqueValuesCountingVisitor;
import com.github.curiousoddman.rgxgen.iterators.StringIterator;
@@ -89,16 +90,15 @@ public StringIterator iterateUnique() {
/**
* Generate random string from the pattern.
*
- * @return generated string.
+ * @return matching random string
*/
public String generate() {
- GenerationVisitor gv = new GenerationVisitor();
- aNode.visit(gv);
- return gv.getString();
+ return generate(new Random());
}
/**
* Generate random string from the pattern.
+ * Random initialized with same seed will produce same results.
*
* @param random random to use for the generation.
* @return generated string.
@@ -108,4 +108,26 @@ public String generate(Random random) {
aNode.visit(gv);
return gv.getString();
}
+
+ /**
+ * Generate random string that does not match a pattern.
+ *
+ * @return not matching random string.
+ */
+ public String generateNotMatching() {
+ return generateNotMatching(new Random());
+ }
+
+ /**
+ * Generate random string that does not match a pattern.
+ * Random initialized with same seed will produce same results.
+ *
+ * @param random random to use for the generation.
+ * @return generated string.
+ */
+ public String generateNotMatching(Random random) {
+ NotMatchingGenerationVisitor nmgv = new NotMatchingGenerationVisitor(random);
+ aNode.visit(nmgv);
+ return nmgv.getString();
+ }
}
diff --git a/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/Choice.java b/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/Choice.java
index 2c40f946..89718108 100644
--- a/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/Choice.java
+++ b/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/Choice.java
@@ -17,21 +17,15 @@
/* **************************************************************************/
import com.github.curiousoddman.rgxgen.generator.visitors.NodeVisitor;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import java.util.Arrays;
-public class Choice implements Node {
-
- private static final Logger LOGGER = LoggerFactory.getLogger(Choice.class);
+public class Choice extends Node {
private final Node[] aNodes;
- public Choice(Node... nodes) {
- if (LOGGER.isTraceEnabled()) {
- LOGGER.trace("Creating from {} ", Arrays.asList(nodes));
- }
+ public Choice(String pattern, Node... nodes) {
+ super(pattern);
aNodes = nodes;
}
diff --git a/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/FinalSymbol.java b/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/FinalSymbol.java
index 4d877bc7..9914ff7a 100644
--- a/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/FinalSymbol.java
+++ b/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/FinalSymbol.java
@@ -17,17 +17,12 @@
/* **************************************************************************/
import com.github.curiousoddman.rgxgen.generator.visitors.NodeVisitor;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class FinalSymbol implements Node {
-
- private static final Logger LOGGER = LoggerFactory.getLogger(FinalSymbol.class);
+public class FinalSymbol extends Node {
private final String aValue;
public FinalSymbol(String value) {
- LOGGER.trace("Creating '{}'", value);
+ super(value);
aValue = value;
}
diff --git a/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/Group.java b/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/Group.java
index 13510b21..b520fc57 100644
--- a/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/Group.java
+++ b/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/Group.java
@@ -17,17 +17,13 @@
/* **************************************************************************/
import com.github.curiousoddman.rgxgen.generator.visitors.NodeVisitor;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class Group implements Node {
- private static final Logger LOGGER = LoggerFactory.getLogger(Group.class);
+public class Group extends Node {
private final Node aNode;
private final int aGroupIndex;
- public Group(int index, Node node) {
- LOGGER.trace("Crating idx = '{}' from '{}'", index, node);
+ public Group(String pattern, int index, Node node) {
+ super(pattern);
aNode = node;
aGroupIndex = index;
}
diff --git a/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/GroupRef.java b/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/GroupRef.java
index 3db4e3d1..26c08dbe 100644
--- a/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/GroupRef.java
+++ b/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/GroupRef.java
@@ -17,17 +17,12 @@
/* **************************************************************************/
import com.github.curiousoddman.rgxgen.generator.visitors.NodeVisitor;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class GroupRef implements Node {
-
- private static final Logger LOGGER = LoggerFactory.getLogger(GroupRef.class);
+public class GroupRef extends Node {
private final int aIndex;
- public GroupRef(int index) {
- LOGGER.trace("Crating idx = '{}'", index);
+ public GroupRef(String pattern, int index) {
+ super(pattern);
aIndex = index;
}
diff --git a/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/Node.java b/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/Node.java
index 364aaa46..1611d69c 100644
--- a/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/Node.java
+++ b/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/Node.java
@@ -18,6 +18,16 @@
import com.github.curiousoddman.rgxgen.generator.visitors.NodeVisitor;
-public interface Node {
- void visit(NodeVisitor visitor);
+public abstract class Node {
+ private final String aPattern;
+
+ protected Node(String pattern) {
+ aPattern = pattern;
+ }
+
+ public abstract void visit(NodeVisitor visitor);
+
+ public String getPattern() {
+ return aPattern;
+ }
}
diff --git a/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/NotSymbol.java b/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/NotSymbol.java
index 06eda6fb..74c26eab 100644
--- a/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/NotSymbol.java
+++ b/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/NotSymbol.java
@@ -17,19 +17,15 @@
/* **************************************************************************/
import com.github.curiousoddman.rgxgen.generator.visitors.NodeVisitor;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import java.util.regex.Pattern;
-public class NotSymbol implements Node {
-
- private static final Logger LOGGER = LoggerFactory.getLogger(NotSymbol.class);
-
+public class NotSymbol extends Node {
private final Pattern aSubPattern;
+ // TODO: Parse this pattern always!!
public NotSymbol(String pattern) {
- LOGGER.trace("Crating '{}'", pattern);
+ super(pattern);
aSubPattern = Pattern.compile(pattern);
}
diff --git a/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/Repeat.java b/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/Repeat.java
index 6061db45..8272c796 100644
--- a/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/Repeat.java
+++ b/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/Repeat.java
@@ -17,27 +17,22 @@
/* **************************************************************************/
import com.github.curiousoddman.rgxgen.generator.visitors.NodeVisitor;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class Repeat implements Node {
-
- private static final Logger LOGGER = LoggerFactory.getLogger(Repeat.class);
+public class Repeat extends Node {
private final Node aNode;
private final int aMin;
private final int aMax;
- public static Repeat minimum(Node node, int times) {
- return new Repeat(node, times, -1);
+ public static Repeat minimum(String pattern, Node node, int times) {
+ return new Repeat(pattern, node, times, -1);
}
- public Repeat(Node node, int times) {
- this(node, times, times);
+ public Repeat(String pattern, Node node, int times) {
+ this(pattern, node, times, times);
}
- public Repeat(Node node, int min, int max) {
- LOGGER.trace("Crating ({} to {}) '{}'", min, max, node);
+ public Repeat(String pattern, Node node, int min, int max) {
+ super(pattern);
aNode = node;
aMin = min;
aMax = max;
diff --git a/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/Sequence.java b/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/Sequence.java
index ce6d3150..145ddf7f 100644
--- a/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/Sequence.java
+++ b/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/Sequence.java
@@ -17,21 +17,14 @@
/* **************************************************************************/
import com.github.curiousoddman.rgxgen.generator.visitors.NodeVisitor;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import java.util.Arrays;
-public class Sequence implements Node {
-
- private static final Logger LOGGER = LoggerFactory.getLogger(Sequence.class);
-
+public class Sequence extends Node {
private final Node[] aNodes;
- public Sequence(Node... nodes) {
- if (LOGGER.isTraceEnabled()) {
- LOGGER.trace("Creating from {} ", Arrays.asList(nodes));
- }
+ public Sequence(String pattern, Node... nodes) {
+ super(pattern);
aNodes = nodes;
}
diff --git a/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/SymbolSet.java b/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/SymbolSet.java
index 63f5edc0..5c150c5f 100644
--- a/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/SymbolSet.java
+++ b/src/main/java/com/github/curiousoddman/rgxgen/generator/nodes/SymbolSet.java
@@ -18,8 +18,6 @@
import com.github.curiousoddman.rgxgen.generator.visitors.NodeVisitor;
import com.github.curiousoddman.rgxgen.util.Util;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import java.util.*;
import java.util.stream.Collectors;
@@ -28,10 +26,7 @@
/**
* Generate Any printable character.
*/
-public class SymbolSet implements Node {
-
- private static final Logger LOGGER = LoggerFactory.getLogger(SymbolSet.class);
-
+public class SymbolSet extends Node {
private static final String[] ALL_SYMBOLS = new String[127 - 32];
public static String[] getAllSymbols() {
@@ -42,13 +37,11 @@ public static String[] getAllSymbols() {
private static final int DEL_ASCII_CODE = 127; // Bound for printable characters in ASCII table
static {
+ StringBuilder sb = new StringBuilder("[");
for (int i = SPACE_ASCII_CODE; i < DEL_ASCII_CODE; ++i) {
- ALL_SYMBOLS[i - SPACE_ASCII_CODE] = Character.valueOf((char) i)
- .toString();
- }
-
- if (LOGGER.isTraceEnabled()) {
- LOGGER.trace("All symbols {} ", Arrays.asList(ALL_SYMBOLS));
+ Character character = (char) i;
+ ALL_SYMBOLS[i - SPACE_ASCII_CODE] = character.toString();
+ sb.append(character);
}
}
@@ -107,15 +100,15 @@ public String toString() {
* Symbol set containing all symbols
*/
public SymbolSet() {
- this(ALL_SYMBOLS.clone(), TYPE.POSITIVE);
+ this(".", ALL_SYMBOLS.clone(), TYPE.POSITIVE);
}
- public SymbolSet(String[] symbols, TYPE type) {
- this(Collections.emptyList(), symbols, type);
+ public SymbolSet(String pattern, String[] symbols, TYPE type) {
+ this(pattern, Collections.emptyList(), symbols, type);
}
- public SymbolSet(List symbolRanges, TYPE type) {
- this(symbolRanges, Util.ZERO_LENGTH_STRING_ARRAY, type);
+ public SymbolSet(String pattern, Collection symbolRanges, TYPE type) {
+ this(pattern, symbolRanges, Util.ZERO_LENGTH_STRING_ARRAY, type);
}
/**
@@ -125,11 +118,8 @@ public SymbolSet(List symbolRanges, TYPE type) {
* @param symbols symbols to include/exclude
* @param type POSITIVE - include, NEGATIVE - exclude
*/
- public SymbolSet(List symbolRanges, String[] symbols, TYPE type) {
- if (LOGGER.isTraceEnabled()) {
- LOGGER.trace("Creating [positive = {}] from {} and {}", type, symbolRanges, Arrays.asList(symbols));
- }
-
+ public SymbolSet(String pattern, Collection symbolRanges, String[] symbols, TYPE type) {
+ super(pattern);
List initial = type == TYPE.NEGATIVE
? new ArrayList<>(Arrays.asList(ALL_SYMBOLS)) // First we need to add all, later we remove unnecessary
: new ArrayList<>(ALL_SYMBOLS.length); // Most probably it will be enough.
@@ -172,4 +162,8 @@ public String[] getSymbols() {
public String toString() {
return "SymbolSet{" + Arrays.toString(aSymbols) + '}';
}
+
+ public boolean isEmpty() {
+ return aSymbols.length == 0;
+ }
}
diff --git a/src/main/java/com/github/curiousoddman/rgxgen/generator/visitors/GenerationVisitor.java b/src/main/java/com/github/curiousoddman/rgxgen/generator/visitors/GenerationVisitor.java
index 73241268..21cc42ae 100644
--- a/src/main/java/com/github/curiousoddman/rgxgen/generator/visitors/GenerationVisitor.java
+++ b/src/main/java/com/github/curiousoddman/rgxgen/generator/visitors/GenerationVisitor.java
@@ -24,9 +24,9 @@
import java.util.Random;
public class GenerationVisitor implements NodeVisitor {
- private final StringBuilder aStringBuilder = new StringBuilder();
- private final Map aGroupValues = new HashMap<>();
- private final Random aRandom;
+ protected final StringBuilder aStringBuilder = new StringBuilder();
+ protected final Map aGroupValues = new HashMap<>();
+ protected final Random aRandom;
public GenerationVisitor() {
this(new Random());
@@ -76,13 +76,13 @@ public void visit(Sequence node) {
}
@Override
- public void visit(NotSymbol notSymbol) {
- String value = notSymbol.getSubPattern()
- .pattern();
+ public void visit(NotSymbol node) {
+ String value = node.getSubPattern()
+ .pattern();
String result = Util.randomString(aRandom, value);
- while (!notSymbol.getSubPattern()
- .matcher(value)
- .matches()) {
+ while (!node.getSubPattern()
+ .matcher(value)
+ .matches()) {
result = Util.randomString(aRandom, result);
}
@@ -90,16 +90,16 @@ public void visit(NotSymbol notSymbol) {
}
@Override
- public void visit(GroupRef groupRef) {
- aStringBuilder.append(aGroupValues.get(groupRef.getIndex()));
+ public void visit(GroupRef node) {
+ aStringBuilder.append(aGroupValues.get(node.getIndex()));
}
@Override
- public void visit(Group group) {
+ public void visit(Group node) {
int start = aStringBuilder.length();
- group.getNode()
- .visit(this);
- aGroupValues.put(group.getIndex(), aStringBuilder.substring(start));
+ node.getNode()
+ .visit(this);
+ aGroupValues.put(node.getIndex(), aStringBuilder.substring(start));
}
public String getString() {
diff --git a/src/main/java/com/github/curiousoddman/rgxgen/generator/visitors/NodeVisitor.java b/src/main/java/com/github/curiousoddman/rgxgen/generator/visitors/NodeVisitor.java
index 4d5e6500..911c413a 100644
--- a/src/main/java/com/github/curiousoddman/rgxgen/generator/visitors/NodeVisitor.java
+++ b/src/main/java/com/github/curiousoddman/rgxgen/generator/visitors/NodeVisitor.java
@@ -29,9 +29,9 @@ public interface NodeVisitor {
void visit(Sequence node);
- void visit(NotSymbol notSymbol);
+ void visit(NotSymbol node);
- void visit(GroupRef groupRef);
+ void visit(GroupRef node);
- void visit(Group group);
+ void visit(Group node);
}
diff --git a/src/main/java/com/github/curiousoddman/rgxgen/generator/visitors/NotMatchingGenerationVisitor.java b/src/main/java/com/github/curiousoddman/rgxgen/generator/visitors/NotMatchingGenerationVisitor.java
new file mode 100644
index 00000000..12e323b7
--- /dev/null
+++ b/src/main/java/com/github/curiousoddman/rgxgen/generator/visitors/NotMatchingGenerationVisitor.java
@@ -0,0 +1,122 @@
+package com.github.curiousoddman.rgxgen.generator.visitors;
+
+/* **************************************************************************
+ Copyright 2019 Vladislavs Varslavans
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+/* **************************************************************************/
+
+import com.github.curiousoddman.rgxgen.generator.nodes.*;
+import com.github.curiousoddman.rgxgen.parsing.NodeTreeBuilder;
+import com.github.curiousoddman.rgxgen.parsing.dflt.DefaultTreeBuilder;
+
+import java.util.Random;
+import java.util.regex.Pattern;
+
+public class NotMatchingGenerationVisitor extends GenerationVisitor {
+ private static final String[] allSymbols = SymbolSet.getAllSymbols();
+
+ public NotMatchingGenerationVisitor() {
+ }
+
+ public NotMatchingGenerationVisitor(Random random) {
+ super(random);
+ }
+
+ @Override
+ public void visit(SymbolSet node) {
+ // There is only one way to generate not matching for any character - is to not generate anything
+ String pattern = node.getPattern();
+ SymbolSet symbolSet = new SymbolSet("[^" + pattern.substring(1), node.getSymbols(), SymbolSet.TYPE.NEGATIVE);
+ if (!symbolSet.isEmpty()) {
+ super.visit(symbolSet);
+ }
+ }
+
+ @Override
+ public void visit(Choice node) {
+ Node[] nodes = node.getNodes();
+ int pos = aStringBuilder.length();
+ // We need to add existing group values, so that we could later use it in matching pattern
+ StringBuilder groupsBuilder = new StringBuilder();
+ StringBuilder valuePrefixBuilder = new StringBuilder();
+ int groupValuesUsed = 0;
+ for (int i = 1; groupValuesUsed < aGroupValues.size(); i++) {
+ String s = aGroupValues.get(i);
+ groupsBuilder.append('(');
+ // In complex expressions we might skip some groups (due to inlined choices/groups/whatever).
+ // But still we should properly generate this test
+ if (s != null) {
+ groupsBuilder.append(Pattern.quote(s));
+ ++groupValuesUsed;
+ }
+ groupsBuilder.append(')');
+ valuePrefixBuilder.append(s);
+ }
+
+ // Add groups values to pattern - in case there are group refs used inside the node.getPattern()
+ Pattern pattern = Pattern.compile(groupsBuilder + node.getPattern());
+
+ do {
+ aStringBuilder.delete(pos, Integer.MAX_VALUE);
+ int i = aRandom.nextInt(nodes.length);
+ nodes[i].visit(this);
+ // To match group values along with generated values - we need to prepend groups values before the generated
+ } while (pattern.matcher(valuePrefixBuilder + aStringBuilder.substring(pos))
+ .matches());
+ }
+
+ @Override
+ public void visit(FinalSymbol node) {
+ String nodeValue = node.getValue();
+ if (nodeValue.isEmpty()) {
+ aStringBuilder.append(allSymbols[aRandom.nextInt(allSymbols.length)].charAt(0));
+ } else {
+ StringBuilder builder = new StringBuilder(nodeValue.length());
+ do {
+ builder.delete(0, Integer.MAX_VALUE);
+ nodeValue.chars()
+ .map(c -> allSymbols[aRandom.nextInt(allSymbols.length)].charAt(0))
+ .forEachOrdered(c -> builder.append((char) c));
+ } while (nodeValue.equals(builder.toString()));
+ aStringBuilder.append(builder);
+ }
+ }
+
+ @Override
+ public void visit(Repeat node) {
+ // Zero length repeat will match pattern despite what node is repeated.
+ if (node.getMin() == 0) {
+ super.visit(new Repeat(node.getPattern(), node.getNode(), 1, node.getMax()));
+ } else {
+ super.visit(node);
+ }
+ }
+
+ @Override
+ public void visit(NotSymbol node) {
+ NodeTreeBuilder builder = new DefaultTreeBuilder(node.getSubPattern()
+ .pattern());
+ Node subNode = builder.get();
+ GenerationVisitor generationVisitor = new GenerationVisitor(aRandom);
+ subNode.visit(generationVisitor);
+ aStringBuilder.append(generationVisitor.getString());
+ }
+
+ @Override
+ public void visit(GroupRef node) {
+ // Note: How will this work if we will change only some of the nodes???
+ FinalSymbol finalSymbol = new FinalSymbol(aGroupValues.get(node.getIndex()));
+ visit(finalSymbol);
+ }
+}
diff --git a/src/main/java/com/github/curiousoddman/rgxgen/generator/visitors/UniqueGenerationVisitor.java b/src/main/java/com/github/curiousoddman/rgxgen/generator/visitors/UniqueGenerationVisitor.java
index d94428ca..a2c612f6 100644
--- a/src/main/java/com/github/curiousoddman/rgxgen/generator/visitors/UniqueGenerationVisitor.java
+++ b/src/main/java/com/github/curiousoddman/rgxgen/generator/visitors/UniqueGenerationVisitor.java
@@ -20,8 +20,6 @@
import com.github.curiousoddman.rgxgen.iterators.ReferenceIterator;
import com.github.curiousoddman.rgxgen.iterators.StringIterator;
import com.github.curiousoddman.rgxgen.iterators.suppliers.*;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.HashMap;
@@ -30,9 +28,6 @@
import java.util.function.Supplier;
public class UniqueGenerationVisitor implements NodeVisitor {
-
- private static final Logger LOGGER = LoggerFactory.getLogger(UniqueGenerationVisitor.class);
-
private final List> aIterators = new ArrayList<>();
private final Map> aReferenceIteratorMap;
private final Map aGroupIterators;
@@ -42,7 +37,6 @@ public UniqueGenerationVisitor() {
}
public UniqueGenerationVisitor(Map> referenceIteratorMap, Map groupIterators) {
- LOGGER.trace("Creating\n\trefs: {}\n\tgrps: {}", referenceIteratorMap, groupIterators);
aReferenceIteratorMap = referenceIteratorMap;
aGroupIterators = groupIterators;
}
@@ -86,22 +80,22 @@ public void visit(Sequence node) {
}
@Override
- public void visit(NotSymbol notSymbol) {
- aIterators.add(new NegativeIteratorSupplier(notSymbol.getSubPattern(), new IncrementalLengthIteratorSupplier(new ArrayIteratorSupplier(SymbolSet.getAllSymbols()), 0, -1)));
+ public void visit(NotSymbol node) {
+ aIterators.add(new NegativeIteratorSupplier(node.getSubPattern(), new IncrementalLengthIteratorSupplier(new ArrayIteratorSupplier(SymbolSet.getAllSymbols()), 0, -1)));
}
@Override
- public void visit(GroupRef groupRef) {
- aIterators.add(new ReferenceIteratorSupplier(aReferenceIteratorMap, aGroupIterators, groupRef.getIndex()));
+ public void visit(GroupRef node) {
+ aIterators.add(new ReferenceIteratorSupplier(aReferenceIteratorMap, aGroupIterators, node.getIndex()));
}
@Override
- public void visit(Group group) {
+ public void visit(Group node) {
UniqueGenerationVisitor v = new UniqueGenerationVisitor(aReferenceIteratorMap, aGroupIterators);
- group.getNode()
- .visit(v);
+ node.getNode()
+ .visit(v);
- aIterators.add(new GroupIteratorSupplier(new PermutationsIteratorSupplier(v.aIterators), aReferenceIteratorMap, aGroupIterators, group.getIndex()));
+ aIterators.add(new GroupIteratorSupplier(new PermutationsIteratorSupplier(v.aIterators), aReferenceIteratorMap, aGroupIterators, node.getIndex()));
}
public StringIterator getUniqueStrings() {
diff --git a/src/main/java/com/github/curiousoddman/rgxgen/generator/visitors/UniqueValuesCountingVisitor.java b/src/main/java/com/github/curiousoddman/rgxgen/generator/visitors/UniqueValuesCountingVisitor.java
index 92a53a89..a443277c 100644
--- a/src/main/java/com/github/curiousoddman/rgxgen/generator/visitors/UniqueValuesCountingVisitor.java
+++ b/src/main/java/com/github/curiousoddman/rgxgen/generator/visitors/UniqueValuesCountingVisitor.java
@@ -48,9 +48,15 @@ public void visit(SymbolSet node) {
@Override
public void visit(Choice node) {
- for (Node n : node.getNodes()) {
- // Just sum up all the choices
- n.visit(this);
+ for (Node vnode : node.getNodes()) {
+ BigInteger count = countSeparately(node, vnode);
+ applyOrSkip(v -> {
+ if (count == null) {
+ return null;
+ }
+
+ return v.add(count);
+ });
}
}
@@ -77,37 +83,43 @@ public void visit(Repeat node) {
@Override
public void visit(Sequence node) {
for (Node vnode : node.getNodes()) {
- UniqueValuesCountingVisitor countingVisitor = new UniqueValuesCountingVisitor(node);
- vnode.visit(countingVisitor);
+ BigInteger count = countSeparately(node, vnode);
applyOrSkip(v -> {
- if (countingVisitor.aCount == null) {
+ if (count == null) {
return null;
}
if (v.equals(BigInteger.ZERO)) {
- return countingVisitor.aCount;
+ return count;
}
- return countingVisitor.aCount.equals(BigInteger.ZERO) ? v : v.multiply(countingVisitor.aCount);
+ return count.equals(BigInteger.ZERO) ? v : v.multiply(count);
});
}
}
+ private BigInteger countSeparately(Node parentNode, Node vnode) {
+ UniqueValuesCountingVisitor countingVisitor = new UniqueValuesCountingVisitor(parentNode);
+ vnode.visit(countingVisitor);
+ return countingVisitor.aCount;
+ }
+
@Override
- public void visit(NotSymbol notSymbol) {
+ public void visit(NotSymbol node) {
aCount = null;
}
@Override
public void visit(GroupRef groupRef) {
- if (aParentNode == null
- || !(aParentNode instanceof Repeat)) {
- // Do nothing. It does not add new unique values.
- } else {
+ if (aParentNode != null
+ && (aParentNode instanceof Repeat || aParentNode instanceof Choice)
+ ) {
// When repeated multiple times - it adds as much unique values as it is repeated. So we should add 1 (it will be used in Repeat for calculation).
// E.g. (a|b)\1{2,3} - captured value of group is repeated either 2 or 3 times - it gives 2 unique values.
aCount = aCount.add(BigInteger.ONE);
}
+ //else
+ // Do nothing. It does not add new unique values apart from above mentioned cases
}
@Override
diff --git a/src/main/java/com/github/curiousoddman/rgxgen/iterators/ReferenceIterator.java b/src/main/java/com/github/curiousoddman/rgxgen/iterators/ReferenceIterator.java
index e2eeb236..e8b799a3 100644
--- a/src/main/java/com/github/curiousoddman/rgxgen/iterators/ReferenceIterator.java
+++ b/src/main/java/com/github/curiousoddman/rgxgen/iterators/ReferenceIterator.java
@@ -16,13 +16,7 @@
limitations under the License.
/* **************************************************************************/
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
public class ReferenceIterator extends StringIterator {
-
- private static final Logger LOGGER = LoggerFactory.getLogger(ReferenceIterator.class);
-
private StringIterator aOther;
private boolean hasNext = true;
private String aLast;
@@ -50,7 +44,6 @@ public void reset() {
@Override
public boolean hasNext() {
- LOGGER.trace("hasNext = {}, aOther.current() = {}, aLast = {}", hasNext, aOther.current(), aLast);
return hasNext || !aOther.current()
.equals(aLast);
}
diff --git a/src/main/java/com/github/curiousoddman/rgxgen/iterators/StringIterator.java b/src/main/java/com/github/curiousoddman/rgxgen/iterators/StringIterator.java
index cb9117e3..2590c0e5 100644
--- a/src/main/java/com/github/curiousoddman/rgxgen/iterators/StringIterator.java
+++ b/src/main/java/com/github/curiousoddman/rgxgen/iterators/StringIterator.java
@@ -16,20 +16,13 @@
limitations under the License.
/* **************************************************************************/
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
import java.util.Iterator;
public abstract class StringIterator implements Iterator {
- private static final Logger LOGGER = LoggerFactory.getLogger(StringIterator.class);
-
@SuppressWarnings("IteratorNextCanNotThrowNoSuchElementException")
@Override
public String next() {
- String current = nextImpl();
- LOGGER.trace("Produced value: '{}' using '{}'", current, this);
- return current;
+ return nextImpl();
}
/**
diff --git a/src/main/java/com/github/curiousoddman/rgxgen/iterators/suppliers/ChoiceIteratorSupplier.java b/src/main/java/com/github/curiousoddman/rgxgen/iterators/suppliers/ChoiceIteratorSupplier.java
index 80a1563b..21d2ad92 100644
--- a/src/main/java/com/github/curiousoddman/rgxgen/iterators/suppliers/ChoiceIteratorSupplier.java
+++ b/src/main/java/com/github/curiousoddman/rgxgen/iterators/suppliers/ChoiceIteratorSupplier.java
@@ -33,7 +33,7 @@ public ChoiceIteratorSupplier(List>> suppliers) {
@Override
public StringIterator get() {
- final StringIterator[] stringIterators = aStringIteratorsSuppliers.stream()
+ StringIterator[] stringIterators = aStringIteratorsSuppliers.stream()
.flatMap(Collection::stream)
.map(Supplier::get)
.toArray(StringIterator[]::new);
diff --git a/src/main/java/com/github/curiousoddman/rgxgen/iterators/suppliers/GroupIteratorSupplier.java b/src/main/java/com/github/curiousoddman/rgxgen/iterators/suppliers/GroupIteratorSupplier.java
index 7361e5d8..3794fdbe 100644
--- a/src/main/java/com/github/curiousoddman/rgxgen/iterators/suppliers/GroupIteratorSupplier.java
+++ b/src/main/java/com/github/curiousoddman/rgxgen/iterators/suppliers/GroupIteratorSupplier.java
@@ -18,8 +18,6 @@
import com.github.curiousoddman.rgxgen.iterators.ReferenceIterator;
import com.github.curiousoddman.rgxgen.iterators.StringIterator;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import java.util.Collections;
import java.util.List;
@@ -27,9 +25,6 @@
import java.util.function.Supplier;
public class GroupIteratorSupplier implements Supplier {
-
- private static final Logger LOGGER = LoggerFactory.getLogger(GroupIteratorSupplier.class);
-
private final Supplier aIteratorSupplier;
private final Map> aReferenceIteratorMap;
private final Map aGroupIteratorsMap;
@@ -40,18 +35,14 @@ public GroupIteratorSupplier(Supplier iteratorSupplier, Map orDefault = aReferenceIteratorMap.getOrDefault(aIndex, Collections.emptyList());
- LOGGER.debug("ReferenceIterators to connect: {}", orDefault);
+ List orDefault = aReferenceIteratorMap.getOrDefault(aIndex, Collections.emptyList());
for (ReferenceIterator referenceIterator : orDefault) {
- LOGGER.debug("GroupRef[{}] connecting to group {} ", aIndex, stringIterator);
referenceIterator.setOther(stringIterator);
}
return stringIterator;
diff --git a/src/main/java/com/github/curiousoddman/rgxgen/iterators/suppliers/ReferenceIteratorSupplier.java b/src/main/java/com/github/curiousoddman/rgxgen/iterators/suppliers/ReferenceIteratorSupplier.java
index 25469ef3..d06e2dbb 100644
--- a/src/main/java/com/github/curiousoddman/rgxgen/iterators/suppliers/ReferenceIteratorSupplier.java
+++ b/src/main/java/com/github/curiousoddman/rgxgen/iterators/suppliers/ReferenceIteratorSupplier.java
@@ -18,8 +18,6 @@
import com.github.curiousoddman.rgxgen.iterators.ReferenceIterator;
import com.github.curiousoddman.rgxgen.iterators.StringIterator;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
@@ -27,9 +25,6 @@
import java.util.function.Supplier;
public class ReferenceIteratorSupplier implements Supplier {
-
- private static final Logger LOGGER = LoggerFactory.getLogger(ReferenceIteratorSupplier.class);
-
private final Map> aReferenceIteratorMap;
private final Map aGroupIteratorsMap;
private final int aIndex;
@@ -38,20 +33,16 @@ public ReferenceIteratorSupplier(Map> reference
aReferenceIteratorMap = referenceIteratorMap;
aGroupIteratorsMap = groupIteratorsMap;
aIndex = index;
- LOGGER.trace("Creating idx {}\n\trefs: {}\n\tgrps: {}", index, referenceIteratorMap, groupIteratorsMap);
}
@Override
public StringIterator get() {
- LOGGER.trace("Getting idx {}\n\trefs: {}\n\tgrps: {}", aIndex, aReferenceIteratorMap, aGroupIteratorsMap);
ReferenceIterator referenceIterator = new ReferenceIterator();
- final StringIterator stringIterator = aGroupIteratorsMap.get(aIndex);
+ StringIterator stringIterator = aGroupIteratorsMap.get(aIndex);
if (stringIterator != null) {
- LOGGER.debug("GroupRef[{}] connecting to group {} ", aIndex, stringIterator);
referenceIterator.setOther(stringIterator);
}
- LOGGER.debug("GroupRef[{}] adding to connection queue group ", aIndex);
aReferenceIteratorMap.computeIfAbsent(aIndex, i -> new ArrayList<>())
.add(referenceIterator);
diff --git a/src/main/java/com/github/curiousoddman/rgxgen/parsing/dflt/CharIterator.java b/src/main/java/com/github/curiousoddman/rgxgen/parsing/dflt/CharIterator.java
index d9a9ccc1..ff4f0bff 100644
--- a/src/main/java/com/github/curiousoddman/rgxgen/parsing/dflt/CharIterator.java
+++ b/src/main/java/com/github/curiousoddman/rgxgen/parsing/dflt/CharIterator.java
@@ -86,7 +86,7 @@ public Character next() {
try {
return aValue.charAt(aCurrentIndex++);
} catch (StringIndexOutOfBoundsException e) {
- final NoSuchElementException noSuchElementException = new NoSuchElementException(e.getMessage());
+ NoSuchElementException noSuchElementException = new NoSuchElementException(e.getMessage());
noSuchElementException.initCause(e);
throw noSuchElementException;
}
@@ -99,7 +99,7 @@ public Character next() {
* @return substring start from next of {@code length} characters
*/
public String next(int length) {
- final String substring = aValue.substring(aCurrentIndex, aCurrentIndex + length);
+ String substring = aValue.substring(aCurrentIndex, aCurrentIndex + length);
aCurrentIndex += length;
return substring;
}
@@ -220,4 +220,8 @@ public void setBound(int offset) {
public int pos() {
return aCurrentIndex - 1;
}
+
+ public String substringToCurrPos(int pos) {
+ return aValue.substring(pos, aCurrentIndex);
+ }
}
diff --git a/src/main/java/com/github/curiousoddman/rgxgen/parsing/dflt/DefaultTreeBuilder.java b/src/main/java/com/github/curiousoddman/rgxgen/parsing/dflt/DefaultTreeBuilder.java
index 240ae771..cf6c51fb 100644
--- a/src/main/java/com/github/curiousoddman/rgxgen/parsing/dflt/DefaultTreeBuilder.java
+++ b/src/main/java/com/github/curiousoddman/rgxgen/parsing/dflt/DefaultTreeBuilder.java
@@ -43,8 +43,12 @@ public boolean isNegative() {
}
private static final String[] SINGLETON_UNDERSCORE_ARRAY = {"_"};
+ private static final int HEX_RADIX = 16;
+ private static final String[] EMPTY_STRINGS_ARR = new String[0];
+ private static final Node[] EMPTY_NODES_ARR = new Node[0];
- private final CharIterator aCharIterator;
+ private final CharIterator aCharIterator;
+ private final Map aNodesStartPos = new IdentityHashMap<>();
/**
* Helper class for lazy initialization and reuse of some constants that are re-used.
@@ -97,13 +101,29 @@ public DefaultTreeBuilder(String expr) {
aCharIterator = new CharIterator(expr);
}
- private static void sbToFinal(StringBuilder sb, List nodes) {
+ /**
+ * Convert all text aggregated in StringBuilder into FinalSymbol node.
+ * Does nothing, if sb is empty
+ *
+ * @param sb StringBuilder, that is read and emptied
+ * @param nodes nodes collection to add created node to.
+ */
+ private void sbToFinal(StringBuilder sb, List nodes) {
if (sb.length() != 0) {
- nodes.add(new FinalSymbol(sb.toString()));
- sb.delete(0, sb.length());
+ FinalSymbol finalSymbol = new FinalSymbol(sb.toString());
+ aNodesStartPos.put(finalSymbol, aCharIterator.pos() - finalSymbol.getValue()
+ .length());
+ nodes.add(finalSymbol);
+ sb.delete(0, Integer.MAX_VALUE);
}
}
+ /**
+ * Discovers GroupType starting from current position.
+ * After execution cursor is on first unprocessed character.
+ *
+ * @return type of the group (@see GroupType enum)
+ */
private GroupType processGroupType() {
switch (aCharIterator.next(2)) {
case "?=": // Positive Lookahead does not affect generation.
@@ -117,7 +137,7 @@ private GroupType processGroupType() {
case "?<":
GroupType res = GroupType.POSITIVE_LOOKBEHIND;
- final char next = aCharIterator.next();
+ char next = aCharIterator.next();
if (next == '!') {
res = GroupType.NEGATIVE_LOOKBEHIND;
} else if (next != '=') { // Positive Lookbehind does not affect generation.
@@ -131,22 +151,24 @@ private GroupType processGroupType() {
}
}
- private static Node handleGroupEnd(StringBuilder sb, List nodes, boolean isChoice, List choices, Integer captureGroupIndex) {
+ private Node handleGroupEnd(int startPos, StringBuilder sb, List nodes, boolean isChoice, List choices, Integer captureGroupIndex) {
if (sb.length() == 0 && nodes.isEmpty()) {
// Special case when '(a|)' is used - like empty
- nodes.add(new FinalSymbol(""));
+ FinalSymbol finalSymbol = new FinalSymbol("");
+ aNodesStartPos.put(finalSymbol, startPos);
+ nodes.add(finalSymbol);
} else {
sbToFinal(sb, nodes);
}
if (isChoice) {
- choices.add(sequenceOrNot(nodes, choices, false, null));
+ choices.add(sequenceOrNot(startPos, nodes, choices, false, null));
nodes.clear();
}
- return sequenceOrNot(nodes, choices, isChoice, captureGroupIndex);
+ return sequenceOrNot(startPos, nodes, choices, isChoice, captureGroupIndex);
}
- private Node parseGroup(GroupType currentGroupType) {
+ private Node parseGroup(int groupStartPos, GroupType currentGroupType) {
Integer captureGroupIndex = null;
if (currentGroupType == GroupType.CAPTURE_GROUP) {
captureGroupIndex = aNextGroupIndex++;
@@ -155,6 +177,7 @@ private Node parseGroup(GroupType currentGroupType) {
List nodes = new ArrayList<>();
StringBuilder sb = new StringBuilder(aCharIterator.remaining());
boolean isChoice = false;
+ int choicesStartPos = groupStartPos;
while (aCharIterator.hasNext()) {
char c = aCharIterator.next();
@@ -166,30 +189,37 @@ private Node parseGroup(GroupType currentGroupType) {
case '(':
sbToFinal(sb, nodes);
+ int intGroupStartPos = aCharIterator.pos();
GroupType groupType = processGroupType();
if (groupType.isNegative()) {
+ // FIXME: Does it really work correctly? I mean why can't there be (x(asd)) - where after first ( matching will be second )
String subPattern = aCharIterator.nextUntil(')');
- nodes.add(new NotSymbol(subPattern));
+ NotSymbol notSymbol = new NotSymbol(subPattern);
+ aNodesStartPos.put(notSymbol, intGroupStartPos);
+ nodes.add(notSymbol);
aCharIterator.next(); // Past the closing ')'
} else {
- nodes.add(parseGroup(groupType));
+ nodes.add(parseGroup(intGroupStartPos, groupType));
}
break;
case '|':
if (sb.length() == 0 && nodes.isEmpty()) {
// Special case when '(|a)' is used - like empty or something
- choices.add(new FinalSymbol(""));
+ FinalSymbol finalSymbol = new FinalSymbol("");
+ aNodesStartPos.put(finalSymbol, aCharIterator.pos() + 1);
+ choices.add(finalSymbol);
} else {
sbToFinal(sb, nodes);
- choices.add(sequenceOrNot(nodes, choices, false, null));
+ choices.add(sequenceOrNot(choicesStartPos, nodes, choices, false, null));
+ choicesStartPos = aCharIterator.pos() + 1;
nodes.clear();
}
isChoice = true;
break;
case ')':
- return handleGroupEnd(sb, nodes, isChoice, choices, captureGroupIndex);
+ return handleGroupEnd(groupStartPos, sb, nodes, isChoice, choices, captureGroupIndex);
case '{':
case '*':
@@ -206,13 +236,16 @@ private Node parseGroup(GroupType currentGroupType) {
sb.deleteCharAt(sb.length() - 1);
sbToFinal(sb, nodes);
repeatNode = new FinalSymbol(String.valueOf(charToRepeat));
+ aNodesStartPos.put(repeatNode, aCharIterator.pos() - 1);
}
nodes.add(handleRepeat(c, repeatNode));
break;
case '.':
sbToFinal(sb, nodes);
- nodes.add(new SymbolSet());
+ SymbolSet symbolSet = new SymbolSet();
+ aNodesStartPos.put(symbolSet, aCharIterator.pos());
+ nodes.add(symbolSet);
break;
case '\\':
@@ -225,7 +258,7 @@ private Node parseGroup(GroupType currentGroupType) {
}
}
- return handleGroupEnd(sb, nodes, isChoice, choices, captureGroupIndex);
+ return handleGroupEnd(groupStartPos, sb, nodes, isChoice, choices, captureGroupIndex);
}
/**
@@ -244,13 +277,27 @@ private int parseHexadecimal() {
} else {
hexValue = aCharIterator.next(2);
}
- return Integer.parseInt(hexValue, 16);
+ return Integer.parseInt(hexValue, HEX_RADIX);
}
+ /**
+ * Create group reference node.
+ * It starts after escape character AND first digit of group index.
+ * aCharIterator after execution is on position right after group index digits.
+ *
+ * @param groupRefAllowed if at this position group reference is allowed
+ * @param nodes nodes to which add group reference node when created.
+ * @param firstCharacter first digit character, since we're starting after that
+ * @throws RgxGenParseException if groupRefAllowed is false
+ */
private void handleGroupReference(boolean groupRefAllowed, Collection nodes, char firstCharacter) {
if (groupRefAllowed) {
+ int startPos = aCharIterator.pos() - 1;
String digitsSubstring = aCharIterator.takeWhile(Character::isDigit);
- nodes.add(new GroupRef(Integer.parseInt(firstCharacter + digitsSubstring)));
+ String groupNumber = firstCharacter + digitsSubstring;
+ GroupRef groupRef = new GroupRef("\\" + groupNumber, Integer.parseInt(groupNumber));
+ aNodesStartPos.put(groupRef, startPos);
+ nodes.add(groupRef);
} else {
throw new RgxGenParseException("Group ref is not expected here. " + aCharIterator.context());
}
@@ -267,23 +314,24 @@ private void handleGroupReference(boolean groupRefAllowed, Collection node
*/
private void handleEscapedCharacter(StringBuilder sb, List nodes, boolean groupRefAllowed) {
char c = aCharIterator.next();
+ SymbolSet symbolSet = null;
switch (c) {
case 'd': // Any decimal digit
case 'D': // Any non-decimal digit
sbToFinal(sb, nodes);
- nodes.add(new SymbolSet(CONST_PROVIDER.getDigits(), c == 'd' ? SymbolSet.TYPE.POSITIVE : SymbolSet.TYPE.NEGATIVE));
+ symbolSet = new SymbolSet("\\" + c, CONST_PROVIDER.getDigits(), c == 'd' ? SymbolSet.TYPE.POSITIVE : SymbolSet.TYPE.NEGATIVE);
break;
case 's': // Any white space
case 'S': // Any non-white space
sbToFinal(sb, nodes);
- nodes.add(new SymbolSet(CONST_PROVIDER.getWhitespaces(), c == 's' ? SymbolSet.TYPE.POSITIVE : SymbolSet.TYPE.NEGATIVE));
+ symbolSet = new SymbolSet("\\" + c, CONST_PROVIDER.getWhitespaces(), c == 's' ? SymbolSet.TYPE.POSITIVE : SymbolSet.TYPE.NEGATIVE);
break;
case 'w': // Any word characters
case 'W': // Any non-word characters
sbToFinal(sb, nodes);
- nodes.add(new SymbolSet(CONST_PROVIDER.getWordCharRanges(), SINGLETON_UNDERSCORE_ARRAY, c == 'w' ? SymbolSet.TYPE.POSITIVE : SymbolSet.TYPE.NEGATIVE));
+ symbolSet = new SymbolSet("\\" + c, CONST_PROVIDER.getWordCharRanges(), SINGLETON_UNDERSCORE_ARRAY, c == 'w' ? SymbolSet.TYPE.POSITIVE : SymbolSet.TYPE.NEGATIVE);
break;
// Hex character:
@@ -311,6 +359,10 @@ private void handleEscapedCharacter(StringBuilder sb, List nodes, boolean
break;
}
+ if (symbolSet != null) {
+ aNodesStartPos.put(symbolSet, aCharIterator.pos() - 1);
+ nodes.add(symbolSet);
+ }
}
/**
@@ -319,7 +371,7 @@ private void handleEscapedCharacter(StringBuilder sb, List nodes, boolean
* @param repeatNode node that should be repeated
* @return Repeat node
*/
- private Repeat handleRepeatInCurvyBraces(Node repeatNode) {
+ private Repeat handleRepeatInCurvyBraces(int startPos, Node repeatNode) {
StringBuilder sb = new StringBuilder(10);
int min = -1;
int contextIndex = aCharIterator.pos();
@@ -339,13 +391,13 @@ private Repeat handleRepeatInCurvyBraces(Node repeatNode) {
case '}':
if (min == -1) {
- return new Repeat(repeatNode, Integer.parseInt(sb.toString()));
+ return new Repeat(aCharIterator.substringToCurrPos(startPos), repeatNode, Integer.parseInt(sb.toString()));
} else {
if (sb.length() == 0) {
- return Repeat.minimum(repeatNode, min);
+ return Repeat.minimum(aCharIterator.substringToCurrPos(startPos), repeatNode, min);
} else {
try {
- return new Repeat(repeatNode, min, Integer.parseInt(sb.toString()));
+ return new Repeat(aCharIterator.substringToCurrPos(startPos), repeatNode, min, Integer.parseInt(sb.toString()));
} catch (NumberFormatException e) {
throw new RgxGenParseException("Malformed upper bound number." + aCharIterator.context(), e);
}
@@ -372,14 +424,26 @@ private Repeat handleRepeatInCurvyBraces(Node repeatNode) {
* @return Repeat node
*/
private Repeat handleRepeat(char c, Node repeatNode) {
- if (c == '*') {
- return Repeat.minimum(repeatNode, 0);
- } else if (c == '?') {
- return new Repeat(repeatNode, 0, 1);
- } else if (c == '+') {
- return Repeat.minimum(repeatNode, 1);
- } else if (c == '{') {
- return handleRepeatInCurvyBraces(repeatNode);
+ int startPos = aNodesStartPos.get(repeatNode);
+ Repeat node = null;
+ switch (c) {
+ case '*':
+ node = Repeat.minimum(aCharIterator.substringToCurrPos(startPos), repeatNode, 0);
+ break;
+ case '?':
+ node = new Repeat(aCharIterator.substringToCurrPos(startPos), repeatNode, 0, 1);
+ break;
+ case '+':
+ node = Repeat.minimum(aCharIterator.substringToCurrPos(startPos), repeatNode, 1);
+ break;
+ case '{':
+ node = handleRepeatInCurvyBraces(startPos, repeatNode);
+ break;
+ }
+
+ if (node != null) {
+ aNodesStartPos.put(node, startPos);
+ return node;
}
throw new RgxGenParseException("Unknown repetition character '" + c + '\'' + aCharIterator.context());
@@ -394,7 +458,7 @@ private Repeat handleRepeat(char c, Node repeatNode) {
* @param captureGroupIndex index of capture group
* @return Group, Node
*/
- private static Node sequenceOrNot(List nodes, List choices, boolean isChoice, Integer captureGroupIndex) {
+ private Node sequenceOrNot(int startPos, List nodes, List choices, boolean isChoice, Integer captureGroupIndex) {
Node resultNode;
if (nodes.size() == 1) {
@@ -404,31 +468,32 @@ private static Node sequenceOrNot(List nodes, List choices, boolean
if (choices.isEmpty()) {
throw new RuntimeException("Empty nodes");
}
- resultNode = new Choice(choices.toArray(new Node[0]));
+ resultNode = new Choice(aCharIterator.substringToCurrPos(startPos), choices.toArray(EMPTY_NODES_ARR));
} else {
if (nodes.isEmpty()) {
throw new RuntimeException("Empty nodes");
}
- resultNode = new Sequence(nodes.toArray(new Node[0]));
+ resultNode = new Sequence(aCharIterator.substringToCurrPos(startPos), nodes.toArray(EMPTY_NODES_ARR));
}
}
+ aNodesStartPos.put(resultNode, startPos);
if (captureGroupIndex == null) {
return resultNode;
} else {
- return new Group(captureGroupIndex, resultNode);
+ Group group = new Group(aCharIterator.substringToCurrPos(startPos), captureGroupIndex, resultNode);
+ aNodesStartPos.put(group, startPos);
+ return group;
}
}
- private static boolean handleRange(boolean rangeStarted, StringBuilder sb, List symbolRanges) {
+ private static void handleRange(boolean rangeStarted, StringBuilder sb, List symbolRanges) {
if (rangeStarted) {
char lastChar = sb.charAt(sb.length() - 1);
char firstChar = sb.charAt(sb.length() - 2);
sb.delete(sb.length() - 2, sb.length());
symbolRanges.add(new SymbolSet.SymbolRange(firstChar, lastChar));
}
-
- return false;
}
private boolean handleBackslashCharacter(boolean rangeStarted, StringBuilder sb, List symbolRanges) {
@@ -442,7 +507,8 @@ private boolean handleBackslashCharacter(boolean rangeStarted, StringBuilder sb,
if (!nodes.isEmpty()) {
throw new RgxGenParseException("Cannot make range with a shorthand escape sequences before '" + aCharIterator.context() + '\'');
}
- rangeStarted = handleRange(true, sb, symbolRanges);
+ handleRange(true, sb, symbolRanges);
+ rangeStarted = false;
} else {
StringBuilder tmpSb = new StringBuilder(0);
handleEscapedCharacter(tmpSb, nodes, false);
@@ -491,11 +557,14 @@ private Node handleCharacterVariations() {
handleRange(rangeStarted, sb, symbolRanges);
String[] strings;
if (sb.length() == 0) {
- strings = new String[0];
+ strings = EMPTY_STRINGS_ARR;
} else {
strings = Util.stringToCharsSubstrings(sb.toString());
}
- return new SymbolSet(symbolRanges, strings, symbolSetType);
+
+ SymbolSet symbolSet = new SymbolSet(aCharIterator.substringToCurrPos(openSquareBraceIndex), symbolRanges, strings, symbolSetType);
+ aNodesStartPos.put(symbolSet, openSquareBraceIndex);
+ return symbolSet;
case '-':
if (aCharIterator.peek() == ']' || aCharIterator.peek(-2) == '[') {
@@ -511,7 +580,8 @@ private Node handleCharacterVariations() {
default:
sb.append(c);
- rangeStarted = handleRange(rangeStarted, sb, symbolRanges);
+ handleRange(rangeStarted, sb, symbolRanges);
+ rangeStarted = false;
}
}
@@ -527,7 +597,7 @@ public void build() {
aCharIterator.setBound(-1);
}
- aNode = parseGroup(GroupType.NON_CAPTURE_GROUP);
+ aNode = parseGroup(aCharIterator.pos() + 1, GroupType.NON_CAPTURE_GROUP);
if (aCharIterator.hasNext()) {
throw new RgxGenParseException("Expression was not fully parsed: " + aCharIterator.context());
}
diff --git a/src/test/java/com/github/curiousoddman/rgxgen/CombinedRepeatableTests.java b/src/test/java/com/github/curiousoddman/rgxgen/CombinedRepeatableTests.java
new file mode 100644
index 00000000..1dcead8c
--- /dev/null
+++ b/src/test/java/com/github/curiousoddman/rgxgen/CombinedRepeatableTests.java
@@ -0,0 +1,65 @@
+package com.github.curiousoddman.rgxgen;
+
+import com.github.curiousoddman.rgxgen.generator.visitors.GenerationVisitor;
+import com.github.curiousoddman.rgxgen.generator.visitors.NotMatchingGenerationVisitor;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Random;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import static org.junit.Assert.*;
+
+@RunWith(Parameterized.class)
+public class CombinedRepeatableTests extends CombinedTestTemplate {
+ @Parameterized.Parameters(name = "{1}: {0}")
+ public static Collection