Skip to content

Commit

Permalink
Auto merge of rust-lang#124773 - Marcondiro:master, r=joboet
Browse files Browse the repository at this point in the history
fix rust-lang#124714 str.to_lowercase sigma handling

Hello,
This PR fixes issue rust-lang#124714 about 'Σ' handling in `str.to_lowercase()`.
The fix consists in considering the full original string during 'Σ' handling instead of considering just the substring left after the optimized ascii handling.
A new test is added to avoid regression.
Thanks!
  • Loading branch information
bors committed May 9, 2024
2 parents 24f4b51 + bb9678a commit 382db29
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 4 deletions.
10 changes: 6 additions & 4 deletions alloc/src/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -375,14 +375,16 @@ impl str {
// Safety: We have written only valid ASCII to our vec
let mut s = unsafe { String::from_utf8_unchecked(out) };

for (i, c) in rest[..].char_indices() {
for (i, c) in rest.char_indices() {
if c == 'Σ' {
// Σ maps to σ, except at the end of a word where it maps to ς.
// This is the only conditional (contextual) but language-independent mapping
// in `SpecialCasing.txt`,
// so hard-code it rather than have a generic "condition" mechanism.
// See https://github.com/rust-lang/rust/issues/26035
map_uppercase_sigma(rest, i, &mut s)
let out_len = self.len() - rest.len();
let sigma_lowercase = map_uppercase_sigma(&self, i + out_len);
s.push(sigma_lowercase);
} else {
match conversions::to_lower(c) {
[a, '\0', _] => s.push(a),
Expand All @@ -400,13 +402,13 @@ impl str {
}
return s;

fn map_uppercase_sigma(from: &str, i: usize, to: &mut String) {
fn map_uppercase_sigma(from: &str, i: usize) -> char {
// See https://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
// for the definition of `Final_Sigma`.
debug_assert!('Σ'.len_utf8() == 2);
let is_word_final = case_ignorable_then_cased(from[..i].chars().rev())
&& !case_ignorable_then_cased(from[i + 2..].chars());
to.push_str(if is_word_final { "ς" } else { "σ" });
if is_word_final { 'ς' } else { 'σ' }
}

fn case_ignorable_then_cased<I: Iterator<Item = char>>(iter: I) -> bool {
Expand Down
3 changes: 3 additions & 0 deletions alloc/tests/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1848,6 +1848,9 @@ fn to_lowercase() {
assert_eq!("ΑΣ'Α".to_lowercase(), "ασ'α");
assert_eq!("ΑΣ''Α".to_lowercase(), "ασ''α");

// https://github.com/rust-lang/rust/issues/124714
assert_eq!("abcdefghijklmnopΣ".to_lowercase(), "abcdefghijklmnopς");

// a really long string that has it's lowercase form
// even longer. this tests that implementations don't assume
// an incorrect upper bound on allocations
Expand Down

0 comments on commit 382db29

Please sign in to comment.