Skip to content

Commit

Permalink
Support unicode letters (#10)
Browse files Browse the repository at this point in the history
* Added initial support for Unicode letters

* Updated unit tests with Unicode characters
  • Loading branch information
markwhitaker authored Sep 17, 2019
1 parent 5b28e40 commit faadc02
Show file tree
Hide file tree
Showing 4 changed files with 808 additions and 434 deletions.
33 changes: 16 additions & 17 deletions src/main/kotlin/RegexBuilder.kt
Original file line number Diff line number Diff line change
Expand Up @@ -180,52 +180,52 @@ class RegexBuilder {
fun nonDigit(quantifier: RegexQuantifier? = null) = append("\\D", quantifier)

/**
* Add an element to match any letter in the Roman alphabet (a-z, A-Z)
* Add an element to match any Unicode letter.
*
* @param quantifier Quantifier to apply to this element
* @return The current [RegexBuilder] object, for method chaining
*/
fun letter(quantifier: RegexQuantifier? = null) = append("[a-zA-Z]", quantifier)
fun letter(quantifier: RegexQuantifier? = null) = append("\\p{L}", quantifier)

/**
* Add an element to match any character that is not a letter in the Roman alphabet (a-z, A-Z)
* Add an element to match any character that is not a Unicode letter.
*
* @param quantifier Quantifier to apply to this element
* @return The current [RegexBuilder] object, for method chaining
*/
fun nonLetter(quantifier: RegexQuantifier? = null) = append("[^a-zA-Z]", quantifier)
fun nonLetter(quantifier: RegexQuantifier? = null) = append("\\P{L}", quantifier)

/**
* Add an element to match any upper-case letter in the Roman alphabet (A-Z).
* Add an element to match any upper-case Unicode letter.
*
* @param quantifier Quantifier to apply to this element
* @return The current [RegexBuilder] object, for method chaining
*/
fun uppercaseLetter(quantifier: RegexQuantifier? = null) = append("[A-Z]", quantifier)
fun uppercaseLetter(quantifier: RegexQuantifier? = null) = append("\\p{Lu}", quantifier)

/**
* Add an element to match any lowercase letter in the Roman alphabet (a-z)
* Add an element to match any lowercase Unicode letter.
*
* @param quantifier Quantifier to apply to this element
* @return The current [RegexBuilder] object, for method chaining
*/
fun lowercaseLetter(quantifier: RegexQuantifier? = null) = append("[a-z]", quantifier)
fun lowercaseLetter(quantifier: RegexQuantifier? = null) = append("\\p{Ll}", quantifier)

/**
* Add an element to match any letter in the Roman alphabet or decimal digit (a-z, A-Z, 0-9)
* Add an element to match any Unicode letter or decimal digit.
*
* @param quantifier Quantifier to apply to this element
* @return The current [RegexBuilder] object, for method chaining
*/
fun letterOrDigit(quantifier: RegexQuantifier? = null) = append("[a-zA-Z0-9]", quantifier)
fun letterOrDigit(quantifier: RegexQuantifier? = null) = append("[\\p{L}0-9]", quantifier)

/**
* Add an element to match any character that is not letter in the Roman alphabet or a decimal digit (a-z, A-Z, 0-9)
* Add an element to match any character that is not a Unicode letter or a decimal digit.
*
* @param quantifier Quantifier to apply to this element
* @return The current [RegexBuilder] object, for method chaining
*/
fun nonLetterOrDigit(quantifier: RegexQuantifier? = null) = append("[^a-zA-Z0-9]", quantifier)
fun nonLetterOrDigit(quantifier: RegexQuantifier? = null) = append("[^\\p{L}0-9]", quantifier)

/**
* Add an element to match any hexadecimal digit (a-f, A-F, 0-9)
Expand Down Expand Up @@ -260,21 +260,20 @@ class RegexBuilder {
fun nonHexDigit(quantifier: RegexQuantifier? = null) = append("[^0-9A-Fa-f]", quantifier)

/**
* Add an element to match any Roman alphabet letter, decimal digit, or underscore (a-z, A-Z, 0-9, _)
* Add an element to match any Unicode letter, decimal digit or underscore
*
* @param quantifier Quantifier to apply to this element
* @return The current [RegexBuilder] object, for method chaining
*/
fun wordCharacter(quantifier: RegexQuantifier? = null) = append("\\w", quantifier)
fun wordCharacter(quantifier: RegexQuantifier? = null) = append("[\\p{L}0-9_]", quantifier)

/**
* Add an element to match any character that is not a Roman alphabet letter, decimal digit, or underscore
* (a-z, A-Z, 0-9, _)
* Add an element to match any character that is not a Unicode letter, decimal digit or underscore
*
* @param quantifier Quantifier to apply to this element
* @return The current [RegexBuilder] object, for method chaining
*/
fun nonWordCharacter(quantifier: RegexQuantifier? = null) = append("\\W", quantifier)
fun nonWordCharacter(quantifier: RegexQuantifier? = null) = append("[^\\p{L}0-9_]", quantifier)

/**
* Add an element (a character class) to match any of the characters provided.
Expand Down
Loading

0 comments on commit faadc02

Please sign in to comment.