Skip to content

Commit

Permalink
support case insenstive patterns for toPatternExpr (#1127)
Browse files Browse the repository at this point in the history
Allows mapping case insensitive regex to systems that only
support simpler regex like RE2. Fixes #1126.
  • Loading branch information
brharrington authored Mar 19, 2024
1 parent 576c9f2 commit 77566c5
Show file tree
Hide file tree
Showing 3 changed files with 164 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import java.util.List;
import java.util.Objects;
import java.util.StringJoiner;
import java.util.stream.Collectors;

/**
* Represents an expression of simpler patterns combined with AND, OR, and NOT clauses.
Expand All @@ -33,6 +34,11 @@ public interface PatternExpr {
*/
boolean matches(String value);

/**
* Returns a copy of the expression that will ignore the case when matching.
*/
PatternExpr ignoreCase();

/**
* Convert this expression into a query string. A common example would be to implement
* an encoder that would convert it into part of a WHERE clause for a SQL DB.
Expand Down Expand Up @@ -163,6 +169,10 @@ public PatternMatcher matcher() {
return matcher.matches(str);
}

@Override public PatternExpr ignoreCase() {
return new Regex(matcher.ignoreCase());
}

@Override public String toString() {
return "'" + matcher + "'";
}
Expand Down Expand Up @@ -200,6 +210,10 @@ public List<PatternExpr> exprs() {
return true;
}

@Override public PatternExpr ignoreCase() {
return new And(exprs.stream().map(PatternExpr::ignoreCase).collect(Collectors.toList()));
}

@Override public String toString() {
StringJoiner joiner = new StringJoiner(" AND ", "(", ")");
exprs.forEach(expr -> joiner.add(expr.toString()));
Expand Down Expand Up @@ -239,6 +253,10 @@ public List<PatternExpr> exprs() {
return false;
}

@Override public PatternExpr ignoreCase() {
return new Or(exprs.stream().map(PatternExpr::ignoreCase).collect(Collectors.toList()));
}

@Override public String toString() {
StringJoiner joiner = new StringJoiner(" OR ", "(", ")");
exprs.forEach(expr -> joiner.add(expr.toString()));
Expand Down Expand Up @@ -273,6 +291,10 @@ public PatternExpr expr() {
return !expr.matches(str);
}

@Override public PatternExpr ignoreCase() {
return new Not(expr.ignoreCase());
}

@Override public String toString() {
return "NOT " + expr;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/
package com.netflix.spectator.impl.matcher;

import com.netflix.spectator.impl.PatternExpr;
import com.netflix.spectator.impl.PatternMatcher;

import java.io.Serializable;
Expand All @@ -25,13 +26,13 @@ final class IgnoreCaseMatcher implements PatternMatcher, Serializable {

private static final long serialVersionUID = 1L;

private final PatternMatcher matcher;
private final Matcher matcher;

/**
* Underlying matcher to use for checking the string. It should have already been converted
* to match on the lower case version of the string.
*/
IgnoreCaseMatcher(PatternMatcher matcher) {
IgnoreCaseMatcher(Matcher matcher) {
this.matcher = matcher;
}

Expand All @@ -40,6 +41,12 @@ public boolean matches(String str) {
return matcher.matches(str);
}

@Override
public PatternExpr toPatternExpr(int max) {
PatternExpr expr = PatternUtils.toPatternExpr(matcher, max);
return expr == null ? null : expr.ignoreCase();
}

@Override
public String toString() {
return "(?i)" + matcher.toString();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/*
* Copyright 2014-2024 Netflix, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.netflix.spectator.impl.matcher;

import com.netflix.spectator.impl.PatternExpr;
import com.netflix.spectator.impl.PatternMatcher;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;

import java.util.regex.Pattern;

public class Re2CaseInsensitivePatternMatcherTest extends AbstractPatternMatcherTest {

@Test
public void ignoreCasePatternExpr() {
PatternMatcher m = PatternMatcher.compile("foo");
Assertions.assertNotNull(m.toPatternExpr(50));
Assertions.assertNotNull(m.ignoreCase().toPatternExpr(50));
}

private boolean shouldCheckRegex(String regex) {
// Java regex has inconsistent behavior for POSIX character classes and the literal version
// of the same character class. For now we skip regex that use POSIX classes.
// https://bugs.openjdk.java.net/browse/JDK-8214245
// Bug was fixed in jdk15.
return JavaVersion.major() < 15
&& !regex.contains("\\p{")
&& !regex.contains("\\P{")
&& !regex.contains("[^");
}

@Override
protected void testRE(String regex, String value) {
if (shouldCheckRegex(regex)) {
PatternExpr expr = PatternMatcher.compile(regex).ignoreCase().toPatternExpr(1000);
com.google.re2j.Pattern re2;
if (expr == null) {
return;
} else {
// Validate that all remaining patterns can be processed with RE2
expr.toQueryString(new Re2Encoder());
if (expr instanceof PatternExpr.Regex)
re2 = compileRE2(((PatternExpr.Regex) expr).matcher().toString());
else
re2 = null;
/*try {
re2 = compileRE2(encoded);
} catch (Exception e) {
re2 = null;
}*/
}


Pattern pattern = Pattern.compile("^.*(" + regex + ")", Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
if (pattern.matcher(value).find()) {
Assertions.assertTrue(expr.matches(value), regex + " should match " + value);
if (re2 != null)
Assertions.assertTrue(re2.matcher(value).find(), re2 + " should match " + value);
} else {
Assertions.assertFalse(expr.matches(value), regex + " shouldn't match " + value);
if (re2 != null)
Assertions.assertFalse(re2.matcher(value).find(), re2 + " shouldn't match " + value);
}
}
}

private static com.google.re2j.Pattern compileRE2(String matcher) {
// RE2 unicode escape is \\x{NNNN} instead of \\uNNNN
String re = matcher.replaceAll("\\\\u([0-9a-fA-F]{4})", "\\\\x{$1}");
return com.google.re2j.Pattern.compile("^.*(" + re + ")", com.google.re2j.Pattern.DOTALL);
}

private static class Re2Encoder implements PatternExpr.Encoder {

@Override
public String regex(PatternMatcher matcher) {
return compileRE2(matcher.toString()).pattern();
}

@Override
public String startAnd() {
return "(";
}

@Override
public String separatorAnd() {
return " AND ";
}

@Override
public String endAnd() {
return ")";
}

@Override
public String startOr() {
return "(";
}

@Override
public String separatorOr() {
return " OR ";
}

@Override
public String endOr() {
return ")";
}

@Override
public String startNot() {
return "NOT ";
}

@Override
public String endNot() {
return "";
}
}
}

0 comments on commit 77566c5

Please sign in to comment.