Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve the identifier detection regex to support non-ascii identifiers #291

Merged
merged 1 commit into from
Jul 5, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Improve the identifier detection regex to support non-ascii identifie…
…rs (and also support identifiers that start with @)

Fixes #269
  • Loading branch information
metoule committed Jul 4, 2023
commit 2c5edd88579d69c72f50264f91479e9a7b5eb15d
19 changes: 12 additions & 7 deletions src/DynamicExpresso.Core/Detector.cs
Original file line number Diff line number Diff line change
@@ -11,11 +11,11 @@ internal class Detector
{
private readonly ParserSettings _settings;

private static readonly string Type = @"\b(?<type>[a-zA-Z_]\w*)\b";
private static readonly string Id = @"\b(?<id>[a-zA-Z_]\w*)\b";
private static readonly Regex LambdaDetectionRegex = new Regex($@"(\((((?<withtype>({Type}\s+)?{Id}))(\s*,\s*)?)+\)|(?<withtype>{Id}))\s*=>", RegexOptions.Compiled);
private static readonly Regex IdentifiersDetectionRegex = new Regex(@"(?<id>@?[\p{L}\p{Nl}_][\p{L}\p{Nl}\p{Nd}\p{Mn}\p{Mc}\p{Pc}\p{Cf}_]*)", RegexOptions.Compiled);

private static readonly Regex IdentifiersDetectionRegex = new Regex(@"([^\.]|^)\b(?<id>[a-zA-Z_]\w*)\b", RegexOptions.Compiled);
private static readonly string Id = IdentifiersDetectionRegex.ToString();
private static readonly string Type = Id.Replace("<id>", "<type>");
private static readonly Regex LambdaDetectionRegex = new Regex($@"(\((((?<withtype>({Type}\s+)?{Id}))(\s*,\s*)?)+\)|(?<withtype>{Id}))\s*=>", RegexOptions.Compiled);

private static readonly Regex StringDetectionRegex = new Regex(@"(?<!\\)?"".*?(?<!\\)""", RegexOptions.Compiled);
private static readonly Regex CharDetectionRegex = new Regex(@"(?<!\\)?'.{1,2}?(?<!\\)'", RegexOptions.Compiled);
@@ -57,8 +57,8 @@ public IdentifiersInfo DetectIdentifiers(string expression)
t++;
}

// there might be several lambda parameters with the same name;
// in that case, we ignore the detected type
// there might be several lambda parameters with the same name
// -> in that case, we ignore the detected type
if (lambdaParameters.TryGetValue(identifier, out Identifier already) && already.Expression.Type != type)
type = typeof(object);

@@ -70,11 +70,16 @@ public IdentifiersInfo DetectIdentifiers(string expression)

foreach (Match match in IdentifiersDetectionRegex.Matches(expression))
{
var identifier = match.Groups["id"].Value;
var idGroup = match.Groups["id"];
var identifier = idGroup.Value;

if (IsReservedKeyword(identifier))
continue;

// don't consider member accesses as identifiers (e.g. "x.Length" will only return x but not Length)
if (idGroup.Index > 0 && expression[idGroup.Index - 1] == '.')
continue;

if (_settings.Identifiers.TryGetValue(identifier, out Identifier knownIdentifier))
knownIdentifiers.Add(knownIdentifier);
else if (lambdaParameters.TryGetValue(identifier, out Identifier knownLambdaParam))
89 changes: 57 additions & 32 deletions test/DynamicExpresso.UnitTest/DetectIdentifiersTest.cs
Original file line number Diff line number Diff line change
@@ -39,7 +39,7 @@ public void Detect_unknown_identifiers()
var detectedIdentifiers = target.DetectIdentifiers("x + y");

CollectionAssert.AreEqual(
new []{ "x", "y"},
new[] { "x", "y" },
detectedIdentifiers.UnknownIdentifiers.ToArray());
}

@@ -161,36 +161,30 @@ public void Detect_known_identifiers_types()
}

[Test]
public void Detect_identifiers_inside_other_expressions()
[TestCase("x + y")]
[TestCase("x + y + 654")]
[TestCase("x + y + 654.564")]
[TestCase("x.method + y[0]")]
[TestCase("x+y")]
[TestCase("x[y]")]
[TestCase("x.method1.method2(y)")]
[TestCase("x + y + \"z\"")]
[TestCase("x + y + \"lorem ipsum\"")]
[TestCase(@"x + y + ""literal \""2""")]
[TestCase("x + y + \"\"")]
[TestCase("x + y + 'z'")]
[TestCase("x + y + '\\a'")]
[TestCase("x + y + '\\''")]
[TestCase("x+y")]
public void Detect_identifiers_inside_other_expressions(string testCase)
{
var testCases = new[] {
"x + y",
"x + y + 654",
"x + y + 654.564",
"x.method + y[0]",
"x+y",
"x[y]",
"x.method1.method2(y)",
"x + y + \"z\"",
"x + y + \"lorem ipsum\"",
@"x + y + ""literal \""2""",
"x + y + \"\"",
"x + y + 'z'",
"x + y + '\\a'",
"x + y + '\\''",
"x+y",
};

foreach (var testCase in testCases)
{
var target = new Interpreter();
var target = new Interpreter();

var detectedIdentifiers = target.DetectIdentifiers(testCase);
var detectedIdentifiers = target.DetectIdentifiers(testCase);

Assert.AreEqual("x", detectedIdentifiers.UnknownIdentifiers.ElementAt(0));
Assert.AreEqual("y", detectedIdentifiers.UnknownIdentifiers.ElementAt(1));
Assert.AreEqual(2, detectedIdentifiers.UnknownIdentifiers.Count());
}
Assert.AreEqual(2, detectedIdentifiers.UnknownIdentifiers.Count());
Assert.AreEqual("x", detectedIdentifiers.UnknownIdentifiers.ElementAt(0));
Assert.AreEqual("y", detectedIdentifiers.UnknownIdentifiers.ElementAt(1));
}

[Test]
@@ -233,15 +227,15 @@ public void Detect_identifiers_inside_lambda_expression_multiple_params()
{
var target = new Interpreter(InterpreterOptions.Default | InterpreterOptions.LambdaExpressions);

var detectedIdentifiers = target.DetectIdentifiers("(x, y) => x + y");
var detectedIdentifiers = target.DetectIdentifiers("(x, _1y) => x + _1y");
Assert.IsEmpty(detectedIdentifiers.UnknownIdentifiers);

Assert.AreEqual(2, detectedIdentifiers.Identifiers.Count());

Assert.AreEqual("x", detectedIdentifiers.Identifiers.ElementAt(0).Name);
Assert.AreEqual(typeof(object), detectedIdentifiers.Identifiers.ElementAt(0).Expression.Type);

Assert.AreEqual("y", detectedIdentifiers.Identifiers.ElementAt(1).Name);
Assert.AreEqual("_1y", detectedIdentifiers.Identifiers.ElementAt(1).Name);
Assert.AreEqual(typeof(object), detectedIdentifiers.Identifiers.ElementAt(1).Expression.Type);
}

@@ -250,7 +244,7 @@ public void Detect_identifiers_inside_lambda_expression_multiple_params_with_typ
{
var target = new Interpreter(InterpreterOptions.Default | InterpreterOptions.LambdaExpressions);

var detectedIdentifiers = target.DetectIdentifiers("(int x, string y) => x + y");
var detectedIdentifiers = target.DetectIdentifiers("(int x, string @class) => x + @class");
Assert.IsEmpty(detectedIdentifiers.UnknownIdentifiers);

Assert.AreEqual(2, detectedIdentifiers.Types.Count());
@@ -264,7 +258,7 @@ public void Detect_identifiers_inside_lambda_expression_multiple_params_with_typ
Assert.AreEqual("x", detectedIdentifiers.Identifiers.ElementAt(0).Name);
Assert.AreEqual(typeof(int), detectedIdentifiers.Identifiers.ElementAt(0).Expression.Type);

Assert.AreEqual("y", detectedIdentifiers.Identifiers.ElementAt(1).Name);
Assert.AreEqual("@class", detectedIdentifiers.Identifiers.ElementAt(1).Name);
Assert.AreEqual(typeof(string), detectedIdentifiers.Identifiers.ElementAt(1).Expression.Type);
}

@@ -299,5 +293,36 @@ public void Detect_identifiers_inside_lambda_expression_duplicate_param_name()
Assert.AreEqual("b", detectedIdentifiers.Identifiers.ElementAt(4).Name);
Assert.AreEqual(typeof(string), detectedIdentifiers.Identifiers.ElementAt(4).Expression.Type);
}

[Test]
[TestCase("@class")]
[TestCase("français_holé")]
[TestCase("中文")]
[TestCase("_1中0文")]
[TestCase("日本語")]
[TestCase("русский")]
public void Detect_all_identifiers_including_not_ascii(string identifier)
{
var code = $"1 + {identifier}.Method()";

var target = new Interpreter(InterpreterOptions.Default | InterpreterOptions.LambdaExpressions);
var detectedIdentifiers = target.DetectIdentifiers(code);

Assert.AreEqual(1, detectedIdentifiers.UnknownIdentifiers.Count());
Assert.AreEqual(identifier, detectedIdentifiers.UnknownIdentifiers.ElementAt(0));
}

[Test]
public void Dont_detect_members_with_at()
{
var code = "@class.@if()";

var target = new Interpreter(InterpreterOptions.Default | InterpreterOptions.LambdaExpressions);
var detectedIdentifiers = target.DetectIdentifiers(code);

// @class should be detected as an identifier, but not the @if because it's a member
Assert.AreEqual(1, detectedIdentifiers.UnknownIdentifiers.Count());
Assert.AreEqual("@class", detectedIdentifiers.UnknownIdentifiers.ElementAt(0));
}
}
}