add join validation check, use char or str for dlm (fix comments/exam…

…ple), formatting
acxz · Jun 16, 2024 · ae9c81e · ae9c81e
1 parent f189e47
commit ae9c81e
Show file tree

Hide file tree

Showing 3 changed files with 23 additions and 49 deletions.
diff --git a/README.md b/README.md
@@ -72,7 +72,7 @@ camel_case_acro = PatternStringCase(
 
 # Convert a string from our newly defined string case, camel_case_acro, to a
 # common string case already defined in StringCases.jl (StringCases.PASCAL_CASE)
-# For more string cases provided out of the box, take a look at the end of the
+# For more string cases provided out of the box, take a look at the
 # `src/commonstringcases.jl` file
 
 StringCases.convert("stringCasesFTW!", camel_case_acro, StringCases.PASCAL_CASE)
@@ -136,24 +136,6 @@ StringCases.convert("askBest30MWΠrice", my_pattern_case, StringCases.SNAKE_CASE
 # and is also lowercased as required by the snake case convention
 StringCases.convert("askBest30MWΠrice", camel_case_acro_num, StringCases.SNAKE_CASE)
 # Output: ask_best_30mw_πrice"
-
-# You can also customize delimiters, in fact they can be any one of a string,
-# regular expression, function, single character or collection of characters.
-
-# Let's create a regex delimiter to split on one or more (+) characters in the
-# Unicode punctuation category (\p{P})
-dlm = r"\p{P}+"
-
-camel_case_punc = DelimiterStringCase(
-    "camel.Case-_Punctuation!",
-    lowercase,
-    uppercase,
-    lowercase,
-    dlm
-);
-
-StringCases.convert("string.Cases-_FTW!", camel_case_punc, StringCases.SNAKE_CASE)
-# Output: "string_cases_ftw"
 ```
 
 For more examples see the
@@ -185,11 +167,10 @@ taken in [kasechange](https://github.com/pearxteam/kasechange). However, this
 library is more easy to read and succinct. It is also more generic, using a
 union type to allow upper/lower/title/any cases and being
 able to split tokens either on a delimiter or to match tokens based on a pattern.
-The delimiter can be [specified as a character, collection of characters, string,
-regular expression, or a function](https://docs.julialang.org/en/v1/base/strings/#Base.split)
-and a pattern can be specified with a regex. Default pattern regexes are
-provided that allow you to match based on case changes, acronyms, numbers, and
-combinations thereof.
+The delimiter can be specified as a character or a string and a pattern can be
+specified with a regex. Default pattern regexes are provided that allow you to
+match based on case changes, acronyms, numbers, and combinations thereof with
+the constructor.
 Compared to [kasechange](https://github.com/pearxteam/kasechange) which is using
 a [boolean for allowing only upper/lower cases](https://github.com/pearxteam/kasechange/blob/6c274238ddae339b7cd0d50751855b710facf223/src/commonMain/kotlin/net/pearx/kasechange/formatter/CaseFormatterConfigurable.kt#L15)
 and only allows specifing string splits via

diff --git a/src/commonstringcases.jl b/src/commonstringcases.jl
@@ -1,19 +1,19 @@
 # Delimiter String Cases
-const TITLE_CASE = DelimiterStringCase("Title Case", lowercase, titlecase, titlecase, " ")
+const TITLE_CASE = DelimiterStringCase("Title Case", lowercase, titlecase, titlecase, ' ')
 const LENIENT_TITLE_CASE =
-    DelimiterStringCase("LEnient Title Case", anycase, titlecase, titlecase, " ")
+    DelimiterStringCase("LEnient Title Case", anycase, titlecase, titlecase, ' ')
 const SENTENCE_CASE =
-    DelimiterStringCase("Sentence case", lowercase, lowercase, uppercase, " ")
-const SNAKE_CASE = DelimiterStringCase("snake_case", lowercase, lowercase, lowercase, "_")
+    DelimiterStringCase("Sentence case", lowercase, lowercase, uppercase, ' ')
+const SNAKE_CASE = DelimiterStringCase("snake_case", lowercase, lowercase, lowercase, '_')
 const SCREAMING_SNAKE_CASE =
-    DelimiterStringCase("SCREAMING_SNAKE_CASE", uppercase, uppercase, uppercase, "_")
-const KEBAB_CASE = DelimiterStringCase("kebab-case", lowercase, lowercase, lowercase, "-")
-const COBOL_CASE = DelimiterStringCase("COBOL-CASE", uppercase, uppercase, uppercase, "-")
-const ADA_CASE = DelimiterStringCase("Ada_Case", lowercase, uppercase, uppercase, "_")
-const TRAIN_CASE = DelimiterStringCase("Train-Case", lowercase, uppercase, uppercase, "-")
-const SPACE_CASE = DelimiterStringCase("space case", anycase, anycase, anycase, " ")
-const PATH_CASE = DelimiterStringCase("path/case", anycase, anycase, anycase, "/")
-const DOT_CASE = DelimiterStringCase("dot.case", anycase, anycase, anycase, ".")
+    DelimiterStringCase("SCREAMING_SNAKE_CASE", uppercase, uppercase, uppercase, '_')
+const KEBAB_CASE = DelimiterStringCase("kebab-case", lowercase, lowercase, lowercase, '-')
+const COBOL_CASE = DelimiterStringCase("COBOL-CASE", uppercase, uppercase, uppercase, '-')
+const ADA_CASE = DelimiterStringCase("Ada_Case", lowercase, uppercase, uppercase, '_')
+const TRAIN_CASE = DelimiterStringCase("Train-Case", lowercase, uppercase, uppercase, '-')
+const SPACE_CASE = DelimiterStringCase("space case", anycase, anycase, anycase, ' ')
+const PATH_CASE = DelimiterStringCase("path/case", anycase, anycase, anycase, '/')
+const DOT_CASE = DelimiterStringCase("dot.case", anycase, anycase, anycase, '.')
 
 # Pattern String Cases
 const FLAT_CASE = PatternStringCase("flatcase", lowercase, lowercase, lowercase)

diff --git a/src/stringcases.jl b/src/stringcases.jl
@@ -41,10 +41,7 @@ struct DelimiterStringCase{
     tokencase::TC
     tokencasefirst::TCF
     strcasefirst::SCF
-
-    # delimiter to identify how to split a token
-    # See: Base.split's dlm documentation for more info
-    dlm::Any
+    dlm::Union{AbstractChar,AbstractString}
 end
 
 # String Case which determines how to split based on token patterns
@@ -66,7 +63,7 @@ struct PatternStringCase{
         tokencase::TC,
         tokencasefirst::TCF,
         strcasefirst::SCF,
-        pat::Regex
+        pat::Regex,
     ) where {TC,TCF,SCF}
         return new{TC,TCF,SCF}(name, tokencase, tokencasefirst, strcasefirst, pat)
     end
@@ -239,7 +236,7 @@ struct PatternStringCase{
 end
 
 function split(s::AbstractString, dsc::DelimiterStringCase)
-    return Base.split(s, dsc.dlm, keepempty=false)
+    return Base.split(s, dsc.dlm, keepempty = false)
 end
 
 function split(s::AbstractString, psc::PatternStringCase)
@@ -280,16 +277,12 @@ end
 # TODO: add isvalid, validated_tokens, and correct_tokens as output to an
 # encompassing validate function,
 # this helps redundant splits if we run validate inside convert
-# TODO: validate regex for possible characters in token
-# have a validate for delimiter and one for pattern
-# pattern is same as delimiter but it makes sure that all the letters are
-# captured in the regex
-# add example of delimiter regex on all of punctuation
 function validate(s::AbstractString, sc::AbstractStringCase)
-    # Split string based on dlm or pat
+    # Split string based string case
     tokens = split(s, sc)
 
-    is_valid_str = true
+    # Validate split tokens with respect to the original string
+    is_valid_str = s == join(tokens, sc)
 
     # Check case for all but first token
     correct_tokens = Vector{SubString{typeof(s)}}()