From 7032e461689ad577154928b7886417f9f2672def Mon Sep 17 00:00:00 2001
From: Rafael Fourquet <fourquet.rafael@gmail.com>
Date: Tue, 22 Aug 2017 09:44:53 +0200
Subject: [PATCH 1/2] titlecase: chars not starting a word are converted to
 lowercase

A keyword argument `strict` is added to `titlecase` to control
whether to convert those chars to lowercase. The default value
is `true`, which makes this change breaking.
This is how some languages (e.g. Python) implement this function,
and is compatible with http://www.unicode.org/L2/L1999/99190.htm.
---
 NEWS.md                         |  6 ++++++
 base/strings/unicode.jl         | 15 ++++++++++-----
 stdlib/Unicode/test/runtests.jl |  7 +++++--
 3 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index b512e2c1b363b..e0a45e111e003 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -365,6 +365,11 @@ This section lists changes that do not have deprecation warnings.
   * `findn(x::AbstractVector)` now return a 1-tuple with the vector of indices, to be
     consistent with higher order arrays ([#25365]).
 
+  * the default behavior of `titlecase` is changed such that characters not starting
+    a word are converted to lowercase; a new keyword argument `strict` is added which
+    allows to get the old behavior when it's `false`.
+
+
 Library improvements
 --------------------
 
@@ -918,6 +923,7 @@ Deprecated or removed
 
   * `findin(a, b)` has been deprecated in favor of `find(occursin(b), a)` ([#24673]).
 
+
 Command-line option changes
 ---------------------------
 
diff --git a/base/strings/unicode.jl b/base/strings/unicode.jl
index 96eaa0d65342d..bdc1b54a1d5f7 100644
--- a/base/strings/unicode.jl
+++ b/base/strings/unicode.jl
@@ -649,19 +649,24 @@ julia> lowercase("STRINGS AND THINGS")
 lowercase(s::AbstractString) = map(lowercase, s)
 
 """
-    titlecase(s::AbstractString) -> String
+    titlecase(s::AbstractString; strict::Bool=true) -> String
 
-Capitalize the first character of each word in `s`.
+Capitalize the first character of each word in `s`;
+if `strict` is true, every other character is
+converted to lowercase, otherwise they are left unchanged.
 See also [`ucfirst`](@ref) to capitalize only the first
 character in `s`.
 
 # Examples
 ```jldoctest
-julia> titlecase("the Julia programming language")
+julia> titlecase("the JULIA programming language")
 "The Julia Programming Language"
+
+julia> titlecase("ISS - international space station", strict=false)
+"ISS - International Space Station"
 ```
 """
-function titlecase(s::AbstractString)
+function titlecase(s::AbstractString; strict::Bool=true)
     startword = true
     b = IOBuffer()
     for c in s
@@ -669,7 +674,7 @@ function titlecase(s::AbstractString)
             print(b, c)
             startword = true
         else
-            print(b, startword ? titlecase(c) : c)
+            print(b, startword ? titlecase(c) : strict ? lowercase(c) : c)
             startword = false
         end
     end
diff --git a/stdlib/Unicode/test/runtests.jl b/stdlib/Unicode/test/runtests.jl
index dacf266ccbaef..00f15d97925ba 100644
--- a/stdlib/Unicode/test/runtests.jl
+++ b/stdlib/Unicode/test/runtests.jl
@@ -366,8 +366,11 @@ end
     @testset "titlecase" begin
         @test titlecase('ǉ') == 'ǈ'
         @test titlecase("ǉubljana") == "ǈubljana"
-        @test titlecase("aBc ABC") == "ABc ABC"
-        @test titlecase("abcD   EFG\n\thij") == "AbcD   EFG\n\tHij"
+        @test titlecase("aBc ABC")               == "Abc Abc"
+        @test titlecase("aBc ABC", strict=true)  == "Abc Abc"
+        @test titlecase("aBc ABC", strict=false) == "ABc ABC"
+        @test titlecase("abcD   EFG\n\thij", strict=true)  == "Abcd   Efg\n\tHij"
+        @test titlecase("abcD   EFG\n\thij", strict=false) == "AbcD   EFG\n\tHij"
     end
 end
 

From f94ab0a73d62748cc99b9b23b78bb320fe10af99 Mon Sep 17 00:00:00 2001
From: Rafael Fourquet <fourquet.rafael@gmail.com>
Date: Tue, 22 Aug 2017 18:55:55 +0200
Subject: [PATCH 2/2] titlecase: all non-letters are considered word-separators

---
 NEWS.md                         | 10 +++++++---
 base/strings/unicode.jl         | 25 ++++++++++++++++++++++---
 stdlib/Unicode/src/Unicode.jl   |  2 +-
 stdlib/Unicode/test/runtests.jl |  5 ++++-
 4 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index e0a45e111e003..dfe7f1e3d4e76 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -365,9 +365,13 @@ This section lists changes that do not have deprecation warnings.
   * `findn(x::AbstractVector)` now return a 1-tuple with the vector of indices, to be
     consistent with higher order arrays ([#25365]).
 
-  * the default behavior of `titlecase` is changed such that characters not starting
-    a word are converted to lowercase; a new keyword argument `strict` is added which
-    allows to get the old behavior when it's `false`.
+  * the default behavior of `titlecase` is changed in two ways ([#23393]):
+    + characters not starting a word are converted to lowercase;
+      a new keyword argument `strict` is added which
+      allows to get the old behavior when it's `false`.
+    + any non-letter character is considered as a word separator;
+      to get the old behavior (only "space" characters are considered as
+      word separators), use the keyword `wordsep=isspace`.
 
 
 Library improvements
diff --git a/base/strings/unicode.jl b/base/strings/unicode.jl
index bdc1b54a1d5f7..8447a125601b1 100644
--- a/base/strings/unicode.jl
+++ b/base/strings/unicode.jl
@@ -384,6 +384,19 @@ function isupper(c::Char)
     cat == UTF8PROC_CATEGORY_LU || cat == UTF8PROC_CATEGORY_LT
 end
 
+"""
+    iscased(c::Char) -> Bool
+
+Tests whether a character is cased, i.e. is lower-, upper- or title-cased.
+"""
+function iscased(c::Char)
+    cat = category_code(c)
+    return cat == UTF8PROC_CATEGORY_LU ||
+           cat == UTF8PROC_CATEGORY_LT ||
+           cat == UTF8PROC_CATEGORY_LL
+end
+
+
 """
     isdigit(c::Char) -> Bool
 
@@ -649,11 +662,14 @@ julia> lowercase("STRINGS AND THINGS")
 lowercase(s::AbstractString) = map(lowercase, s)
 
 """
-    titlecase(s::AbstractString; strict::Bool=true) -> String
+    titlecase(s::AbstractString; [wordsep::Function], strict::Bool=true) -> String
 
 Capitalize the first character of each word in `s`;
 if `strict` is true, every other character is
 converted to lowercase, otherwise they are left unchanged.
+By default, all non-letters are considered as word separators;
+a predicate can be passed as the `wordsep` keyword to determine
+which characters should be considered as word separators.
 See also [`ucfirst`](@ref) to capitalize only the first
 character in `s`.
 
@@ -664,13 +680,16 @@ julia> titlecase("the JULIA programming language")
 
 julia> titlecase("ISS - international space station", strict=false)
 "ISS - International Space Station"
+
+julia> titlecase("a-a b-b", wordsep = c->c==' ')
+"A-a B-b"
 ```
 """
-function titlecase(s::AbstractString; strict::Bool=true)
+function titlecase(s::AbstractString; wordsep::Function = !iscased, strict::Bool=true)
     startword = true
     b = IOBuffer()
     for c in s
-        if isspace(c)
+        if wordsep(c)
             print(b, c)
             startword = true
         else
diff --git a/stdlib/Unicode/src/Unicode.jl b/stdlib/Unicode/src/Unicode.jl
index 59077acb6a79c..e55a8f6cc39ef 100644
--- a/stdlib/Unicode/src/Unicode.jl
+++ b/stdlib/Unicode/src/Unicode.jl
@@ -7,7 +7,7 @@ module Unicode
 using Base.Unicode: normalize, graphemes, isassigned, textwidth, isvalid,
                     islower, isupper, isalpha, isdigit, isxdigit, isnumeric, isalnum,
                     iscntrl, ispunct, isspace, isprint, isgraph,
-                    lowercase, uppercase, titlecase, lcfirst, ucfirst
+                    lowercase, uppercase, titlecase, lcfirst, ucfirst, iscased
 
 export graphemes, textwidth, isvalid,
        islower, isupper, isalpha, isdigit, isxdigit, isnumeric, isalnum,
diff --git a/stdlib/Unicode/test/runtests.jl b/stdlib/Unicode/test/runtests.jl
index 00f15d97925ba..5a5b83eb12b87 100644
--- a/stdlib/Unicode/test/runtests.jl
+++ b/stdlib/Unicode/test/runtests.jl
@@ -2,7 +2,7 @@
 
 using Test
 using Unicode
-using Unicode: normalize, isassigned
+using Unicode: normalize, isassigned, iscased
 
 @testset "string normalization" begin
     # normalize (Unicode normalization etc.):
@@ -371,6 +371,9 @@ end
         @test titlecase("aBc ABC", strict=false) == "ABc ABC"
         @test titlecase("abcD   EFG\n\thij", strict=true)  == "Abcd   Efg\n\tHij"
         @test titlecase("abcD   EFG\n\thij", strict=false) == "AbcD   EFG\n\tHij"
+        @test titlecase("abc-def")                     == "Abc-Def"
+        @test titlecase("abc-def", wordsep = !iscased) == "Abc-Def"
+        @test titlecase("abc-def", wordsep = isspace)  == "Abc-def"
     end
 end