diff --git a/docs/source-2.0/guides/model-linters.rst b/docs/source-2.0/guides/model-linters.rst index 05e0680a287..ac432618273 100644 --- a/docs/source-2.0/guides/model-linters.rst +++ b/docs/source-2.0/guides/model-linters.rst @@ -196,10 +196,6 @@ ReservedWords Validates that shape names and member names do not match a configured set of reserved words. -Reserved words are compared in a case-insensitive manner via substring match -and support a leading and trailing wildcard character, "*". See -:ref:`wildcard evaluation ` for more detail. - Rationale Tools that generate code from Smithy models SHOULD automatically convert reserved words into symbols that are safe to use in the targeted @@ -223,9 +219,15 @@ Configuration - Description * - words - [ ``string`` ] - - **Required**. A list of words that shape or member names MUST not - case-insensitively match. Supports only the leading and trailing - wildcard character of "*". + - A list of words that shape or member names MUST not case-insensitively + match. Supports a leading and trailing wildcard character of "*". + See :ref:`reserved-words-wildcards` for details. + * - terms + - [ ``string`` ] + - A list of search terms that match shape or member names + case-insensitively based on word boundaries (for example, the term + "access key id" matches "AccessKeyId", "access_key_id", and + "accesskeyid"). See :ref:`reserved-words-boundaries` for details. * - selector - ``string`` - Specifies a selector of shapes to validate for this configuration. @@ -343,6 +345,79 @@ be specified. * - **Codename** - Match +.. _reserved-words-boundaries: + +Reserved words boundary matching +-------------------------------- + +Word boundaries can be used to find reserved words. Word boundary search +text consists of one or more alphanumeric words separated by a single +space. When comparing against another string, the contents of the string +are separated into words based on word boundaries. Those words are +case-insensitively compared against the words in the search text for a match. + +Word boundaries are detected when the casing between two characters changes, +or the type of character between two characters changes. The following table +demonstrates how comparison text is parsed into words. + +.. list-table:: + :header-rows: 1 + :widths: 50 50 + + * - Comparison text + - Parsed words + * - accessKey + - access key + * - accessKeyID + - access key id + * - accessKeyIDValue + - access key id value + * - accesskeyId + - accesskey id + * - accessKey1 + - access key 1 + * - access_keyID + - access key id + +The following table shows matches for a reserved term of ``secret id``, +meaning the word "secret" needs to be followed by the word "id". Word +boundary searches also match if the search terms concatenated together with +no spaces is considered a word in the search text (for example, +``secret id`` will match the word ``secretid``). + +.. list-table:: + :header-rows: 1 + :widths: 75 25 + + * - Comparison text + - Result + * - Some\ **SecretId** + - Match + * - Some\ **SecretID**\ Value + - Match + * - Some\ **Secret__ID**\ __value + - Match + * - **secret_id** + - Match + * - **secret_id**\ 100 + - Match + * - **secretid** + - Match + * - **secretid**\ _value + - Match + * - secretidvalue + - No Match + * - SecretThingId + - No match + * - SomeSecretid + - No match + +.. admonition:: Syntax restrictions + + * Empty search terms are not valid. + * Only a single space can appear between words in word boundary patterns. + * Leading and trailing spaces are not permitted in word boundary patterns. + * Word boundary patterns can only contain alphanumeric characters. .. _StandardOperationVerb: diff --git a/smithy-linters/src/main/java/software/amazon/smithy/linters/ReservedWordsValidator.java b/smithy-linters/src/main/java/software/amazon/smithy/linters/ReservedWordsValidator.java index 85512d8b0e9..e75db76c2f8 100644 --- a/smithy-linters/src/main/java/software/amazon/smithy/linters/ReservedWordsValidator.java +++ b/smithy-linters/src/main/java/software/amazon/smithy/linters/ReservedWordsValidator.java @@ -20,11 +20,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; -import java.util.Locale; import java.util.Optional; -import java.util.regex.Pattern; -import java.util.stream.Collectors; -import java.util.stream.Stream; import software.amazon.smithy.model.Model; import software.amazon.smithy.model.node.NodeMapper; import software.amazon.smithy.model.selector.Selector; @@ -34,7 +30,6 @@ import software.amazon.smithy.model.validation.Severity; import software.amazon.smithy.model.validation.ValidationEvent; import software.amazon.smithy.model.validation.ValidatorService; -import software.amazon.smithy.utils.OptionalUtils; /** * Emits validation events for a configuration of reserved words. @@ -48,6 +43,7 @@ *
  • words: ([string]) A list of words that are * case-insensitively reserved. Leading and trailing wildcards * ("*") are supported. + *
  • terms: ([string]) A list of word boundary terms to test.
  • *
  • selector: (string) Specifies a selector for this * configuration. Defaults to validating all shapes, including * member names. @@ -83,9 +79,10 @@ public void setReserved(List reserved) { * A single reserved words configuration. */ public static final class ReservedWords { - private List words = Collections.emptyList(); private Selector selector = Selector.IDENTITY; private String reason = ""; + private final WildcardMatcher wildcardMatcher = new WildcardMatcher(); + private final WordBoundaryMatcher wordMatcher = new WordBoundaryMatcher(); /** * Sets the list of reserved word definitions. @@ -96,16 +93,16 @@ public static final class ReservedWords { * @param words Words to set. */ public void setWords(List words) { - this.words = new ArrayList<>(words.size()); - for (String word : words) { - if (word.equals("*")) { - throw new IllegalArgumentException("Reservations cannot be made against '*'"); - } - if (CONTAINS_INNER_WILDCARD.matcher(word).find()) { - throw new IllegalArgumentException("Only preceding and trailing wildcards ('*') are supported."); - } - this.words.add(word.toLowerCase(Locale.ENGLISH)); - } + words.forEach(wildcardMatcher::addSearch); + } + + /** + * Sets the list of reserved word terms to match based on word boundaries. + * + * @param terms Terms to set. + */ + public void setTerms(List terms) { + terms.forEach(wordMatcher::addSearch); } /** @@ -126,8 +123,10 @@ public void setReason(String reason) { this.reason = reason; } - private Stream validate(Model model) { - return selector.select(model).stream().flatMap(shape -> OptionalUtils.stream(validateShape(shape))); + private void validate(Model model, List events) { + for (Shape shape : selector.select(model)) { + validateShape(shape).ifPresent(events::add); + } } private Optional validateShape(Shape shape) { @@ -139,28 +138,13 @@ private Optional validateShape(Shape shape) { } /** - * Checks a passed word against the list of reserved words in this - * configuration. Validates these in a case-insensitive manner, and - * supports starting and ending wildcards '*'. + * Checks a passed word against the reserved words in this configuration. * * @param word A value that may be reserved. * @return Returns true if the word is reserved by this configuration */ private boolean isReservedWord(String word) { - String compare = word.toLowerCase(Locale.US); - return words.stream().anyMatch(reservation -> { - // Comparisons against '*' have been rejected at configuration load. - if (reservation.startsWith("*")) { - if (reservation.endsWith("*")) { - return compare.contains(reservation.substring(1, reservation.lastIndexOf("*"))); - } - return compare.endsWith(reservation.substring(1)); - } - if (reservation.endsWith("*")) { - return compare.startsWith(reservation.substring(0, reservation.lastIndexOf("*"))); - } - return compare.equals(reservation); - }); + return wildcardMatcher.test(word) || wordMatcher.test(word); } private ValidationEvent emit(Shape shape, String word, String reason) { @@ -182,21 +166,18 @@ public Provider() { } } - private static final Pattern CONTAINS_INNER_WILDCARD = Pattern.compile("^.+\\*.+$"); - private final Config config; private ReservedWordsValidator(Config config) { this.config = config; - - if (config.getReserved().isEmpty()) { - throw new IllegalArgumentException("Missing `reserved` words"); - } } @Override public List validate(Model model) { - return config.getReserved().stream().flatMap(reservation -> reservation.validate(model)) - .collect(Collectors.toList()); + List events = new ArrayList<>(); + for (ReservedWords reserved : config.getReserved()) { + reserved.validate(model, events); + } + return events; } } diff --git a/smithy-linters/src/main/java/software/amazon/smithy/linters/WildcardMatcher.java b/smithy-linters/src/main/java/software/amazon/smithy/linters/WildcardMatcher.java new file mode 100644 index 00000000000..6d4accee4d2 --- /dev/null +++ b/smithy-linters/src/main/java/software/amazon/smithy/linters/WildcardMatcher.java @@ -0,0 +1,86 @@ +/* + * Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package software.amazon.smithy.linters; + +import java.util.ArrayList; +import java.util.List; +import java.util.Locale; +import java.util.function.Predicate; +import software.amazon.smithy.utils.StringUtils; + +final class WildcardMatcher implements Predicate { + + private final List> predicates = new ArrayList<>(); + + @Override + public boolean test(String text) { + if (StringUtils.isEmpty(text)) { + return false; + } + + text = text.toLowerCase(Locale.ENGLISH); + for (Predicate predicate : predicates) { + if (predicate.test(text)) { + return true; + } + } + + return false; + } + + void addSearch(String pattern) { + if (StringUtils.isEmpty(pattern)) { + throw new IllegalArgumentException("Invalid empty pattern"); + } else if (pattern.equals("*")) { + throw new IllegalArgumentException("Invalid wildcard pattern: *"); + } else { + predicates.add(parseWildcardPattern(pattern)); + } + } + + private static Predicate parseWildcardPattern(String pattern) { + boolean suffix = false; + boolean prefix = false; + + // Find any leading or ending star, ensure that no inner stars are used. + StringBuilder result = new StringBuilder(); + for (int i = 0; i < pattern.length(); i++) { + char c = pattern.charAt(i); + if (c == '*') { + if (i == 0) { + suffix = true; + } else if (i == pattern.length() - 1) { + prefix = true; + } else { + throw new IllegalArgumentException("Invalid inner '*' in wildcard pattern: " + pattern); + } + } else { + result.append(Character.toLowerCase(c)); + } + } + + String needle = result.toString(); + if (suffix && prefix) { + return text -> text.contains(needle); + } else if (suffix) { + return text -> text.endsWith(needle); + } else if (prefix) { + return text -> text.startsWith(needle); + } else { + return text -> text.equals(needle); + } + } +} diff --git a/smithy-linters/src/main/java/software/amazon/smithy/linters/WordBoundaryMatcher.java b/smithy-linters/src/main/java/software/amazon/smithy/linters/WordBoundaryMatcher.java new file mode 100644 index 00000000000..99abaa6a0c7 --- /dev/null +++ b/smithy-linters/src/main/java/software/amazon/smithy/linters/WordBoundaryMatcher.java @@ -0,0 +1,168 @@ +/* + * Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package software.amazon.smithy.linters; + +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Set; +import java.util.function.Predicate; +import software.amazon.smithy.utils.StringUtils; + +/** + * Matches text based on word boundaries. + * + *

    Note that this class is not thread safe due caching. If that ever needs + * to change, we can reevaluate how to make {@link #test(String)} thread safe + * (for example, by wrapping the cache in a synchronized map or by passing + * in a function to customize how to normalize search text). + */ +final class WordBoundaryMatcher implements Predicate { + + private final Set words = new HashSet<>(); + + // Use an LRU cache that stores up to 128 canonicalized search strings (e.g. don't parse "member" over and over). + private final Map searchCache = new LinkedHashMap(128, 1.0f, true) { + @Override + protected boolean removeEldestEntry(Map.Entry eldest) { + return this.size() > 127; + } + }; + + /** + * Add a word boundary search terms to the matcher. + * + * @param terms Words separated by a single space. + */ + public void addSearch(String terms) { + if (StringUtils.isEmpty(terms)) { + throw new IllegalArgumentException("Invalid empty search terms"); + } + String wordPattern = parseWordPattern(terms); + words.add(wordPattern); + words.add(wordPattern.replace(" ", "")); + } + + @Override + public boolean test(String text) { + if (text == null || text.isEmpty() || words.isEmpty()) { + return false; + } + + String searchString = searchCache.computeIfAbsent(text, WordBoundaryMatcher::splitWords); + for (String needle : words) { + if (testWordMatch(needle, searchString)) { + return true; + } + } + + return false; + } + + private boolean testWordMatch(String needle, String haystack) { + int position = haystack.indexOf(needle); + int haystackLength = haystack.length(); + int needleLength = needle.length(); + if (position == -1) { + return false; + } else if (needleLength == haystackLength) { + return true; + } else if (position == 0) { + return haystack.charAt(needleLength) == ' '; + } else if (position == haystackLength - needleLength) { + return haystack.charAt(position - 1) == ' '; + } else { + return haystack.charAt(position - 1) == ' ' && haystack.charAt(position + needleLength) == ' '; + } + } + + private static String parseWordPattern(String pattern) { + boolean previousSpace = false; + StringBuilder result = new StringBuilder(pattern.length() - 2); + for (int i = 0; i < pattern.length(); i++) { + char c = pattern.charAt(i); + result.append(Character.toLowerCase(c)); + if (c == ' ') { + // Ensure that extraneous spaces aren't found at the beginning, end, or between words. + if (i == 0 || i == pattern.length() - 1 || previousSpace) { + throw new IllegalArgumentException("Invalid spaces in word boundary search: " + pattern); + } else { + previousSpace = true; + } + } else if (!Character.isLetterOrDigit(c)) { + throw new IllegalArgumentException( + "Invalid non-alphanumeric characters in word boundary search:" + pattern); + } else { + previousSpace = false; + } + } + + return result.toString(); + } + + // Rather than return a list of words, this method canonicalizes words into a lowercase, space-delimited string. + // This allows substring checks to be used to determine if a search is found in the delimited words. + // Adapted from Apache Commons Lang3: https://github.com/apache/commons-lang/blob/master/src/main/java/org/apache/commons/lang3/StringUtils.java#L7457 + private static String splitWords(String str) { + if (str.isEmpty()) { + return ""; + } + + final StringBuilder result = new StringBuilder(); + int tokenStart = 0; + int currentType = Character.getType(str.charAt(tokenStart)); + + for (int pos = tokenStart + 1; pos < str.length(); pos++) { + final char c = str.charAt(pos); + final int type = Character.getType(c); + if (type == currentType) { + continue; + } + if (type == Character.LOWERCASE_LETTER && currentType == Character.UPPERCASE_LETTER) { + final int newTokenStart = pos - 1; + if (newTokenStart != tokenStart) { + addLowerCaseStringToBuilder(result, str, tokenStart, newTokenStart - tokenStart); + result.append(' '); + tokenStart = newTokenStart; + } + } else { + // Skip character groupings that are delimiters. We just want letters and numbers. + if (Character.isLetterOrDigit(str.charAt(pos - 1))) { + addLowerCaseStringToBuilder(result, str, tokenStart, pos - tokenStart); + result.append(' '); + } + tokenStart = pos; + } + currentType = type; + } + + if (Character.isLetterOrDigit(str.charAt(tokenStart))) { + // Add the last segment if it's a letter or number. + addLowerCaseStringToBuilder(result, str, tokenStart, str.length() - tokenStart); + } else { + // Since the last segment is ignored, remove the trailing space. + result.setLength(result.length() - 1); + } + + return result.toString(); + } + + private static void addLowerCaseStringToBuilder(StringBuilder result, String str, int start, int count) { + for (int i = start; i < start + count; i++) { + result.append(Character.toLowerCase(str.charAt(i))); + } + } +} diff --git a/smithy-linters/src/test/java/software/amazon/smithy/linters/WildcardMatcherTest.java b/smithy-linters/src/test/java/software/amazon/smithy/linters/WildcardMatcherTest.java new file mode 100644 index 00000000000..02155ab2548 --- /dev/null +++ b/smithy-linters/src/test/java/software/amazon/smithy/linters/WildcardMatcherTest.java @@ -0,0 +1,81 @@ +package software.amazon.smithy.linters; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.is; + +import java.util.stream.Stream; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +public class WildcardMatcherTest { + + @ParameterizedTest + @MethodSource("patternProvider") + public void matchesPatterns(String text, String pattern, boolean match) { + WildcardMatcher matcher = new WildcardMatcher(); + matcher.addSearch(pattern); + + assertThat("'" + text + "' matches '" + pattern + '\'', matcher.test(text), is(match)); + } + + public static Stream patternProvider() { + return Stream.of( + // Can't match empty or null. + Arguments.of("", "foo", false), + Arguments.of(null, "foo", false), + + // Not a contains match + Arguments.of("foo", "*hello*", false), + + // Good contains matches. + Arguments.of("__accessKeyId__", "*accesskeyid*", true), + Arguments.of("accessKeyId", "*accesskeyid*", true), + Arguments.of("hello", "*hello*", true), + Arguments.of("foo_hello_there", "*hello*", true), + + // Not a prefix match. + Arguments.of("foo", "hello*", false), + + // Good prefix matches. + Arguments.of("accessKeyId", "accesskeyid*", true), + Arguments.of("hello", "hello*", true), + Arguments.of("hello_there", "hello*", true), + + // Not a suffix match. + Arguments.of("foo", "*hello", false), + + // Good suffix matches. + Arguments.of("accessKeyId", "*accesskeyid", true), + Arguments.of("hello", "*hello", true), + Arguments.of("well_hello", "*hello", true), + + // An exact match. + Arguments.of("string", "string", true) + ); + } + + @ParameterizedTest + @MethodSource("invalidPatternProvider") + public void validatesSyntax(String invalidPattern) { + WildcardMatcher matcher = new WildcardMatcher(); + + IllegalArgumentException e = Assertions.assertThrows(IllegalArgumentException.class, + () -> matcher.addSearch(invalidPattern), + invalidPattern); + + // All syntax errors should show the invalid pattern. + assertThat(e.getMessage(), containsString(invalidPattern)); + } + + public static Stream invalidPatternProvider() { + return Stream.of( + Arguments.of("*"), + Arguments.of("**foo"), + Arguments.of("foo*bar"), + Arguments.of("") + ); + } +} diff --git a/smithy-linters/src/test/java/software/amazon/smithy/linters/WordBoundaryMatcherTest.java b/smithy-linters/src/test/java/software/amazon/smithy/linters/WordBoundaryMatcherTest.java new file mode 100644 index 00000000000..2d5dee57118 --- /dev/null +++ b/smithy-linters/src/test/java/software/amazon/smithy/linters/WordBoundaryMatcherTest.java @@ -0,0 +1,97 @@ +package software.amazon.smithy.linters; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.is; + +import java.util.stream.Stream; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +public class WordBoundaryMatcherTest { + + @ParameterizedTest + @MethodSource("patternProvider") + public void matchesPatterns(String text, String pattern, boolean match) { + WordBoundaryMatcher matcher = new WordBoundaryMatcher(); + matcher.addSearch(pattern); + + assertThat("'" + text + "' matches '" + pattern + '\'', matcher.test(text), is(match)); + } + + public static Stream patternProvider() { + return Stream.of( + // Can't match empty or null. + Arguments.of("", "access key id", false), + Arguments.of(null, "access key id", false), + + // Good word matches. + Arguments.of("accessKeyId", "access key id", true), + Arguments.of("accesskeyid", "access key id", true), + Arguments.of("access_key_id", "access key id", true), + Arguments.of("access_key_ID", "access key id", true), + Arguments.of("accessKey__Id", "access key id", true), + + // Tricky word boundary matches. + Arguments.of("accessKey__Id", "access key id", true), + Arguments.of("accessKey__Id", "access key id", true), + Arguments.of("accessKey__Id", "access key id", true), + Arguments.of("accessKey__Id", "access key id", true), + Arguments.of("accessKey__Id", "access key id", true), + Arguments.of("accessKey__Id", "access key id", true), + Arguments.of("accessKey__Id", "access key id", true), + Arguments.of("access:Key:Id", "access key id", true), + Arguments.of("access Key Id", "access key id", true), + Arguments.of("access-Key-Id", "access key id", true), + Arguments.of("access.Key.Id200", "access key id", true), + Arguments.of("AccessKeyIDValue", "access key id", true), + Arguments.of("__AccessKeyIDValue__", "access key id", true), + Arguments.of("zip", "zip", true), + Arguments.of("unzip", "zip", false), + Arguments.of("zipCode", "zip", true), + + // No match because zipcode is parsed as one word. + Arguments.of("zipcode", "zip", false), + + // No match is found because "accesskey_id" is split into "accesskey id" + Arguments.of("foo accesskey_id", "access key id", false), + + // Cases where no match is found and the word counts differ. + Arguments.of("string", "this is too long to match", false), + Arguments.of("this is not a match", "no", false), + + // An exact match. + Arguments.of("string", "string", true), + + Arguments.of("foo_bar_baz", "bar", true), + Arguments.of("foo_baz_bar", "bar", true), + Arguments.of("foo_bazbar", "bar", false), + Arguments.of("bazbarbaz", "bar", false) + ); + } + + @ParameterizedTest + @MethodSource("invalidPatternProvider") + public void validatesSyntax(String invalidPattern) { + WordBoundaryMatcher matcher = new WordBoundaryMatcher(); + + IllegalArgumentException e = Assertions.assertThrows(IllegalArgumentException.class, + () -> matcher.addSearch(invalidPattern), + invalidPattern); + + // All syntax errors should show the invalid pattern. + assertThat(e.getMessage(), containsString(invalidPattern)); + } + + public static Stream invalidPatternProvider() { + return Stream.of( + Arguments.of("foo bar"), + Arguments.of(" foo bar "), + Arguments.of(" foo"), + Arguments.of("foo_bar"), // non alphanumeric + Arguments.of("foo+bar") // non alphanumeric + ); + } +}