Skip to content

Commit

Permalink
MatchExpressions: Support terminating by len(), CaptureSlashesByDefau…
Browse files Browse the repository at this point in the history
…lt is now configurable and overriden with **, ends(val) now captures val properly, reserved names are prohibited for var names, uints are supported, char classes now work even with ignore case (but aren't transformed)
  • Loading branch information
lilith committed Jun 12, 2024
1 parent c5f0b72 commit c41bc3c
Show file tree
Hide file tree
Showing 5 changed files with 345 additions and 18 deletions.
8 changes: 7 additions & 1 deletion src/Imazen.Routing/Matching/ExpressionParsingHelpers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,12 @@ internal static bool ValidateSegmentName(string name, ReadOnlySpan<char> segment
{
if (name.Length == 0)
{
error = "Don't use empty segment names, only null or valid";
error = "Don't use empty segment names, only null or valid (internal error, callers should filter empty names)";
return false;
}
if (name.Contains('*') || name.Contains('?'))
{
// Nullified by external logic, actually.
error =
$"Invalid segment expression {{{segmentExpression.ToString()}}} Conditions and modifiers such as * and ? belong after the colon. Ex: {{name:*:?}} ";
return false;
Expand All @@ -121,6 +122,11 @@ internal static bool ValidateSegmentName(string name, ReadOnlySpan<char> segment
error = $"Invalid name '{name}' in segment expression {{{segmentExpression.ToString()}}}. Names must start with a letter or underscore, and contain only letters, numbers, or underscores";
return false;
}
if (StringCondition.IsReservedName(name))
{
error = $"Did you forget to put : before your condition? '{name}' cannot be used as a variable name in {{{segmentExpression.ToString()}}} (for clarity), since it has a function.";
return false;
}
error = null;
return true;
}
Expand Down
119 changes: 111 additions & 8 deletions src/Imazen.Routing/Matching/MatchExpression.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

namespace Imazen.Routing.Matching;


// TODO: split into data used during runtime and during parsing..
public record MatchingContext
{
public bool OrdinalIgnoreCase { get; init; }
Expand All @@ -21,6 +21,11 @@ public record MatchingContext
OrdinalIgnoreCase = false,
SupportedImageExtensions = new []{"jpg", "jpeg", "png", "gif", "webp"}
};

/// <summary>
/// If true, all segments will capture the / character by default. If false, segments must specify {:**} to capture slashes.
/// </summary>
public bool CaptureSlashesByDefault { get; init; }
}

public partial record class MatchExpression
Expand Down Expand Up @@ -181,6 +186,22 @@ public bool TryMatchVerbose(in MatchingContext context, in ReadOnlyMemory<char>
return true;
}

public MatchExpressionSuccess MatchOrThrow(in MatchingContext context, in ReadOnlyMemory<char> input)
{
var matched = this.TryMatchVerbose(context, input, out var result, out var error);
if (!matched)
{
throw new ArgumentException($"Expression {this} incorrectly failed to match {input} with error {error}");
}
return result!.Value;
}

public Dictionary<string,string> CaptureDictOrThrow(in MatchingContext context, string input)
{
var match = MatchOrThrow(context, input.AsMemory());
return match.Captures!.ToDictionary(x => x.Name, x => x.Value.ToString());
}

public bool TryMatch(in MatchingContext context, in ReadOnlyMemory<char> input, [NotNullWhen(true)] out MatchExpressionSuccess? result,
[NotNullWhen(false)] out string? error, [NotNullWhen(false)] out int? failingSegmentIndex)
{
Expand All @@ -206,6 +227,7 @@ public bool TryMatch(in MatchingContext context, in ReadOnlyMemory<char> input,
var boundaryStarts = -1;
var boundaryFinishes = -1;
var foundBoundaryOrEnd = false;
SegmentBoundary foundBoundary = default;
var closingBoundary = false;
// No more segments to try?
if (currentSegment >= Segments.Length)
Expand All @@ -215,6 +237,7 @@ public bool TryMatch(in MatchingContext context, in ReadOnlyMemory<char> input,
// We still have an open segment, so we close it and capture it.
boundaryStarts = boundaryFinishes = inputSpan.Length;
foundBoundaryOrEnd = true;
foundBoundary = default;
closingBoundary = true;
}else if (remainingInput.Length == 0)
{
Expand Down Expand Up @@ -282,10 +305,11 @@ public bool TryMatch(in MatchingContext context, in ReadOnlyMemory<char> input,
boundaryStarts = s == -1 ? -1 : charactersConsumed + s;
boundaryFinishes = f == -1 ? -1 : charactersConsumed + f;
foundBoundaryOrEnd = searchResult;
foundBoundary = searchSegment;
}
if (!foundBoundaryOrEnd)
{

foundBoundary = default;
if (Segments[currentSegment].IsOptional)
{
// We didn't find the segment, but it's optional, so we can skip it.
Expand Down Expand Up @@ -318,7 +342,12 @@ public bool TryMatch(in MatchingContext context, in ReadOnlyMemory<char> input,
var variableStart = openSegment.StartsOn.IncludesMatchingTextInVariable
? openSegmentAbsoluteStart
: openSegmentAbsoluteEnd;
var variableEnd = boundaryStarts;

var variableEnd = (foundBoundary != default && foundBoundary.IsEndingBoundary &&
foundBoundary.IncludesMatchingTextInVariable)
? boundaryFinishes
: boundaryStarts;

var conditionsOk = openSegment.ConditionsMatch(context, inputSpan[variableStart..variableEnd]);
if (!conditionsOk)
{
Expand Down Expand Up @@ -431,9 +460,9 @@ internal static bool TryParseSegmentExpression(MatchingContext context,
}
// it's a literal
// Check for invalid characters like &
if (expr.IndexOfAny(new[] {'*', '?'}) != -1)
if (expr.IndexOf('*') != -1 || expr.IndexOf('?') != -1)
{
error = "Literals cannot contain * or ? operators, they must be enclosed in {} such as {name:?} or {name:*:?}";
error = "Literals cannot contain * or ? operators, they must be enclosed in {} such as {name:?} or {name:**:?}";
segment = null;
return false;
}
Expand Down Expand Up @@ -534,11 +563,20 @@ private static bool TryParseConditionOrSegment(MatchingContext context,
var makeOptional = (globChars & ExpressionParsingHelpers.GlobChars.Optional) ==
ExpressionParsingHelpers.GlobChars.Optional
|| conditionSpan.Is("optional");
var hasDoubleStar = (globChars & ExpressionParsingHelpers.GlobChars.DoubleStar) ==
ExpressionParsingHelpers.GlobChars.DoubleStar;
if (makeOptional)
{
segmentStartLogic ??= SegmentBoundary.DefaultStart;
segmentStartLogic = segmentStartLogic.Value.SetOptional(true);
}
if (!hasDoubleStar && context.CaptureSlashesByDefault)
{
conditions ??= new List<StringCondition>();
// exclude '/' from chars
conditions.Add(StringCondition.ExcludeForwardSlash);
}


// We ignore the glob chars, they don't constrain behavior any.
if (globChars != ExpressionParsingHelpers.GlobChars.None
Expand All @@ -554,6 +592,8 @@ private static bool TryParseConditionOrSegment(MatchingContext context,
}

var functionName = functionNameMemory.ToString() ?? throw new InvalidOperationException("Unreachable code");


var conditionConsumed = false;
if (args is { Count: 1 })
{
Expand All @@ -570,7 +610,7 @@ private static bool TryParseConditionOrSegment(MatchingContext context,
{
if (segmentEndLogic is { HasDefaultEndWhen: false })
{
error = $"The segment {segmentText.ToString()} has conflicting end conditions; do not mix equals and ends-with and suffix conditions";
error = $"The segment {segmentText.ToString()} has conflicting end conditions; do not mix equals, length, ends-with, and suffix conditions";
return false;
}
segmentEndLogic = sb;
Expand All @@ -597,6 +637,7 @@ private static bool TryParseConditionOrSegment(MatchingContext context,
//TODO: add more context to error
return false;
}

conditions.Add(condition.Value);
}
return true;
Expand Down Expand Up @@ -657,7 +698,8 @@ private enum SegmentBoundaryFunction
IgnoreCase = 16,
IncludeInVar = 32,
EndingBoundary = 64,
SegmentOptional = 128
SegmentOptional = 128,
FixedLength = 256
}

private static SegmentBoundaryFunction FromString(string name, bool useIgnoreCaseVariant, bool segmentOptional)
Expand All @@ -669,6 +711,7 @@ private static SegmentBoundaryFunction FromString(string name, bool useIgnoreCas
"ends_with" or "ends-with" or "ends" => SegmentBoundaryFunction.StartsWith | SegmentBoundaryFunction.IncludeInVar | SegmentBoundaryFunction.EndingBoundary,
"prefix" => SegmentBoundaryFunction.StartsWith,
"suffix" => SegmentBoundaryFunction.StartsWith | SegmentBoundaryFunction.EndingBoundary,
"len" or "length" => SegmentBoundaryFunction.FixedLength | SegmentBoundaryFunction.EndingBoundary | SegmentBoundaryFunction.IncludeInVar,
_ => SegmentBoundaryFunction.None
};
if (fn == SegmentBoundaryFunction.None)
Expand All @@ -681,13 +724,20 @@ private static SegmentBoundaryFunction FromString(string name, bool useIgnoreCas
}
if (segmentOptional)
{
if (fn == SegmentBoundaryFunction.FixedLength)
{
// When a fixed length segment is optional, we don't make a end boundary for it.
return SegmentBoundaryFunction.None;
}
fn |= SegmentBoundaryFunction.SegmentOptional;
}
return fn;
}

public static SegmentBoundary Literal(ReadOnlySpan<char> literal, bool ignoreCase) =>
StringEquals(literal, ignoreCase, false);



public static SegmentBoundary LiteralEnd = new(Flags.EndingBoundary, When.SegmentFullyMatchedByStartBoundary, null, '\0');

Expand Down Expand Up @@ -745,17 +795,20 @@ public static bool TryCreate(string function, bool useIgnoreCase, bool segmentOp
private static bool TryCreate(SegmentBoundaryFunction function, ReadOnlySpan<char> arg0, out SegmentBoundary? result)
{
var argType = ExpressionParsingHelpers.GetArgType(arg0);

if ((argType & ExpressionParsingHelpers.ArgType.String) == 0)
{
result = null;
return false;
}

var includeInVar = (function & SegmentBoundaryFunction.IncludeInVar) == SegmentBoundaryFunction.IncludeInVar;
var ignoreCase = (function & SegmentBoundaryFunction.IgnoreCase) == SegmentBoundaryFunction.IgnoreCase;
var startsWith = (function & SegmentBoundaryFunction.StartsWith) == SegmentBoundaryFunction.StartsWith;
var equals = (function & SegmentBoundaryFunction.Equals) == SegmentBoundaryFunction.Equals;
var segmentOptional = (function & SegmentBoundaryFunction.SegmentOptional) == SegmentBoundaryFunction.SegmentOptional;
var endingBoundary = (function & SegmentBoundaryFunction.EndingBoundary) == SegmentBoundaryFunction.EndingBoundary;
var segmentFixedLength = (function & SegmentBoundaryFunction.FixedLength) == SegmentBoundaryFunction.FixedLength;
if (startsWith)
{
result = StartWith(arg0, ignoreCase, includeInVar, endingBoundary).SetOptional(segmentOptional);
Expand All @@ -767,6 +820,25 @@ private static bool TryCreate(SegmentBoundaryFunction function, ReadOnlySpan<cha
result = StringEquals(arg0, ignoreCase, includeInVar).SetOptional(segmentOptional);
return true;
}
if (segmentFixedLength)
{
if (segmentOptional)
{
// We don't support optional fixed length segments at this time.
result = null;
return false;
}
// len requires a number
if ((argType & ExpressionParsingHelpers.ArgType.UnsignedInteger) > 0)
{
//parse the number into char
var len = int.Parse(arg0.ToString());
result = FixedLengthEnd(len);
return true;
}
result = null;
return false;
}
throw new InvalidOperationException("Unreachable code");
}

Expand Down Expand Up @@ -801,7 +873,16 @@ private static SegmentBoundary StringEquals(ReadOnlySpan<char> asSpan, bool ordi
return new(includeInVar ? Flags.IncludeMatchingTextInVariable : Flags.None,
ordinalIgnoreCase ? When.EqualsOrdinalIgnoreCase : When.EqualsOrdinal, asSpan.ToString(), '\0');
}

private static SegmentBoundary FixedLengthEnd(int length)
{
if (length < 1) throw new ArgumentOutOfRangeException(nameof(length)
, "Fixed length must be greater than 0");
if (length > char.MaxValue) throw new ArgumentOutOfRangeException(nameof(length)
, "Fixed length must be less than or equal to " + char.MaxValue);
return new SegmentBoundary(Flags.IncludeMatchingTextInVariable | Flags.EndingBoundary,
When.FixedLength
, null, (char)length);
}
[Flags]
private enum Flags : byte
{
Expand Down Expand Up @@ -833,6 +914,7 @@ private enum When : byte
EqualsOrdinal,
EqualsChar,
EqualsOrdinalIgnoreCase,
FixedLength,
}


Expand All @@ -858,6 +940,14 @@ public bool TryMatch(ReadOnlySpan<char> text, out int start, out int end)
if (text.Length == 0) return false;
switch (On)
{
case When.FixedLength:
if (text.Length >= this.Char)
{
start = 0;
end = this.Char;
return true;
}
return false;
case When.AtChar or When.EqualsChar:
if (text[0] == Char)
{
Expand Down Expand Up @@ -912,6 +1002,14 @@ public bool TryScan(ReadOnlySpan<char> text, out int start, out int end)
if (text.Length == 0) return false;
switch (On)
{
case When.FixedLength:
if (text.Length >= this.Char)
{
start = this.Char;
end = this.Char;
return true;
}
return false;
case When.AtChar or When.EqualsChar:
var index = text.IndexOf(Char);
if (index == -1) return false;
Expand Down Expand Up @@ -959,10 +1057,15 @@ public override string ToString()
? (isStartBoundary ? "starts-with" : "ends-with")
: (isStartBoundary ? "prefix" : "suffix"),
When.EqualsOrdinal or When.EqualsChar or When.EqualsOrdinalIgnoreCase => "equals",
When.FixedLength => $"len",
_ => throw new InvalidOperationException("Unreachable code")
};
var ignoreCase = On is When.AtStringIgnoreCase or When.EqualsOrdinalIgnoreCase ? "-i" : "";
var optional = (Behavior & Flags.SegmentOptional) != 0 ? "?": "";
if (On == When.FixedLength)
{
return $"{name}({(int)Char}){ignoreCase}{optional}";
}
if (Chars != null)
{
name = $"{name}({Chars}){ignoreCase}{optional}";
Expand Down
Loading

0 comments on commit c41bc3c

Please sign in to comment.