Skip to content

Commit 3058381

Browse files
committed
Add String::make_(lower|upper)case APIs
1 parent 362211d commit 3058381

7 files changed

Lines changed: 444 additions & 155 deletions

File tree

library/alloc/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@
147147
#![feature(slice_ptr_get)]
148148
#![feature(slice_range)]
149149
#![feature(std_internals)]
150+
#![feature(str_internals)]
150151
#![feature(temporary_niche_types)]
151152
#![feature(titlecase)]
152153
#![feature(transmutability)]

library/alloc/src/str.rs

Lines changed: 153 additions & 149 deletions
Original file line numberDiff line numberDiff line change
@@ -333,155 +333,6 @@ impl str {
333333
result
334334
}
335335

336-
/// Returns the lowercase equivalent of this string slice, as a new [`String`].
337-
///
338-
/// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property
339-
/// `Lowercase`.
340-
///
341-
/// Since some characters can expand into multiple characters when changing
342-
/// the case, this function returns a [`String`] instead of modifying the
343-
/// parameter in-place.
344-
///
345-
/// # Examples
346-
///
347-
/// Basic usage:
348-
///
349-
/// ```
350-
/// let s = "HELLO";
351-
///
352-
/// assert_eq!("hello", s.to_lowercase());
353-
/// ```
354-
///
355-
/// A tricky example, with sigma:
356-
///
357-
/// ```
358-
/// let sigma = "Σ";
359-
///
360-
/// assert_eq!("σ", sigma.to_lowercase());
361-
///
362-
/// // but at the end of a word, it's ς, not σ:
363-
/// let odysseus = "ὈΔΥΣΣΕΎΣ";
364-
///
365-
/// assert_eq!("ὀδυσσεύς", odysseus.to_lowercase());
366-
/// ```
367-
///
368-
/// Languages without case are not changed:
369-
///
370-
/// ```
371-
/// let new_year = "农历新年";
372-
///
373-
/// assert_eq!(new_year, new_year.to_lowercase());
374-
/// ```
375-
#[cfg(not(no_global_oom_handling))]
376-
#[rustc_allow_incoherent_impl]
377-
#[must_use = "this returns the lowercase string as a new String, \
378-
without modifying the original"]
379-
#[stable(feature = "unicode_case_mapping", since = "1.2.0")]
380-
pub fn to_lowercase(&self) -> String {
381-
let (mut s, rest) = convert_while_ascii(self, u8::to_ascii_lowercase);
382-
383-
let prefix_len = s.len();
384-
385-
for (i, c) in rest.char_indices() {
386-
if c == 'Σ' {
387-
// Σ maps to σ, except at the end of a word where it maps to ς.
388-
// This is the only conditional (contextual) but language-independent mapping
389-
// in `SpecialCasing.txt`,
390-
// so hard-code it rather than have a generic "condition" mechanism.
391-
// See https://github.com/rust-lang/rust/issues/26035
392-
let sigma_lowercase = map_uppercase_sigma(self, prefix_len + i);
393-
s.push(sigma_lowercase);
394-
} else {
395-
match conversions::to_lower(c) {
396-
[a, '\0', _] => s.push(a),
397-
[a, b, '\0'] => {
398-
s.push(a);
399-
s.push(b);
400-
}
401-
[a, b, c] => {
402-
s.push(a);
403-
s.push(b);
404-
s.push(c);
405-
}
406-
}
407-
}
408-
}
409-
return s;
410-
411-
fn map_uppercase_sigma(from: &str, i: usize) -> char {
412-
// See https://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
413-
// for the definition of `Final_Sigma`.
414-
let is_word_final = case_ignorable_then_cased(from[..i].chars().rev())
415-
&& !case_ignorable_then_cased(from[i + const { 'Σ'.len_utf8() }..].chars());
416-
if is_word_final { 'ς' } else { 'σ' }
417-
}
418-
419-
fn case_ignorable_then_cased<I: Iterator<Item = char>>(iter: I) -> bool {
420-
match iter.skip_while(|&c| c.is_case_ignorable()).next() {
421-
Some(c) => c.is_cased(),
422-
None => false,
423-
}
424-
}
425-
}
426-
427-
/// Returns the uppercase equivalent of this string slice, as a new [`String`].
428-
///
429-
/// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property
430-
/// `Uppercase`.
431-
///
432-
/// Since some characters can expand into multiple characters when changing
433-
/// the case, this function returns a [`String`] instead of modifying the
434-
/// parameter in-place.
435-
///
436-
/// # Examples
437-
///
438-
/// Basic usage:
439-
///
440-
/// ```
441-
/// let s = "hello";
442-
///
443-
/// assert_eq!("HELLO", s.to_uppercase());
444-
/// ```
445-
///
446-
/// Scripts without case are not changed:
447-
///
448-
/// ```
449-
/// let new_year = "农历新年";
450-
///
451-
/// assert_eq!(new_year, new_year.to_uppercase());
452-
/// ```
453-
///
454-
/// One character can become multiple:
455-
/// ```
456-
/// let s = "tschüß";
457-
///
458-
/// assert_eq!("TSCHÜSS", s.to_uppercase());
459-
/// ```
460-
#[cfg(not(no_global_oom_handling))]
461-
#[rustc_allow_incoherent_impl]
462-
#[must_use = "this returns the uppercase string as a new String, \
463-
without modifying the original"]
464-
#[stable(feature = "unicode_case_mapping", since = "1.2.0")]
465-
pub fn to_uppercase(&self) -> String {
466-
let (mut s, rest) = convert_while_ascii(self, u8::to_ascii_uppercase);
467-
468-
for c in rest.chars() {
469-
match conversions::to_upper(c) {
470-
[a, '\0', _] => s.push(a),
471-
[a, b, '\0'] => {
472-
s.push(a);
473-
s.push(b);
474-
}
475-
[a, b, c] => {
476-
s.push(a);
477-
s.push(b);
478-
s.push(c);
479-
}
480-
}
481-
}
482-
s
483-
}
484-
485336
/// Converts a [`Box<str>`] into a [`String`] without copying or allocating.
486337
///
487338
/// # Examples
@@ -710,3 +561,156 @@ unsafe fn replace_ascii(utf8_bytes: &[u8], from: u8, to: u8) -> String {
710561
// SAFETY: We replaced ascii with ascii on valid utf8 strings.
711562
unsafe { String::from_utf8_unchecked(result) }
712563
}
564+
565+
// Case changes
566+
567+
pub(crate) fn case_ignorable_then_cased<I: Iterator<Item = char>>(iter: I) -> bool {
568+
match iter.skip_while(|&c| c.is_case_ignorable()).next() {
569+
Some(c) => c.is_cased(),
570+
None => false,
571+
}
572+
}
573+
574+
impl str {
575+
/// Returns the lowercase equivalent of this string slice, as a new [`String`].
576+
///
577+
/// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property
578+
/// `Lowercase`.
579+
///
580+
/// Since some characters can expand into multiple characters when changing
581+
/// the case, this function returns a [`String`] instead of modifying the
582+
/// parameter in-place.
583+
///
584+
/// # Examples
585+
///
586+
/// Basic usage:
587+
///
588+
/// ```
589+
/// let s = "HELLO";
590+
///
591+
/// assert_eq!("hello", s.to_lowercase());
592+
/// ```
593+
///
594+
/// A tricky example, with sigma:
595+
///
596+
/// ```
597+
/// let sigma = "Σ";
598+
///
599+
/// assert_eq!("σ", sigma.to_lowercase());
600+
///
601+
/// // but at the end of a word, it's ς, not σ:
602+
/// let odysseus = "ὈΔΥΣΣΕΎΣ";
603+
///
604+
/// assert_eq!("ὀδυσσεύς", odysseus.to_lowercase());
605+
/// ```
606+
///
607+
/// Languages without case are not changed:
608+
///
609+
/// ```
610+
/// let new_year = "农历新年";
611+
///
612+
/// assert_eq!(new_year, new_year.to_lowercase());
613+
/// ```
614+
#[cfg(not(no_global_oom_handling))]
615+
#[rustc_allow_incoherent_impl]
616+
#[must_use = "this returns the lowercase string as a new String, \
617+
without modifying the original"]
618+
#[stable(feature = "unicode_case_mapping", since = "1.2.0")]
619+
pub fn to_lowercase(&self) -> String {
620+
let (mut s, rest) = convert_while_ascii(self, u8::to_ascii_lowercase);
621+
622+
let prefix_len = s.len();
623+
624+
for (i, c) in rest.char_indices() {
625+
if c == 'Σ' {
626+
// Σ maps to σ, except at the end of a word where it maps to ς.
627+
// This is the only conditional (contextual) but language-independent mapping
628+
// in `SpecialCasing.txt`,
629+
// so hard-code it rather than have a generic "condition" mechanism.
630+
// See https://github.com/rust-lang/rust/issues/26035
631+
let sigma_lowercase = map_uppercase_sigma(self, prefix_len + i);
632+
s.push(sigma_lowercase);
633+
} else {
634+
match conversions::to_lower(c) {
635+
[a, '\0', _] => s.push(a),
636+
[a, b, '\0'] => {
637+
s.push(a);
638+
s.push(b);
639+
}
640+
[a, b, c] => {
641+
s.push(a);
642+
s.push(b);
643+
s.push(c);
644+
}
645+
}
646+
}
647+
}
648+
return s;
649+
650+
fn map_uppercase_sigma(from: &str, i: usize) -> char {
651+
// See https://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
652+
// for the definition of `Final_Sigma`.
653+
let is_word_final = case_ignorable_then_cased(from[..i].chars().rev())
654+
&& !case_ignorable_then_cased(from[i + const { 'Σ'.len_utf8() }..].chars());
655+
if is_word_final { 'ς' } else { 'σ' }
656+
}
657+
}
658+
659+
/// Returns the uppercase equivalent of this string slice, as a new [`String`].
660+
///
661+
/// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property
662+
/// `Uppercase`.
663+
///
664+
/// Since some characters can expand into multiple characters when changing
665+
/// the case, this function returns a [`String`] instead of modifying the
666+
/// parameter in-place.
667+
///
668+
/// # Examples
669+
///
670+
/// Basic usage:
671+
///
672+
/// ```
673+
/// let s = "hello";
674+
///
675+
/// assert_eq!("HELLO", s.to_uppercase());
676+
/// ```
677+
///
678+
/// Scripts without case are not changed:
679+
///
680+
/// ```
681+
/// let new_year = "农历新年";
682+
///
683+
/// assert_eq!(new_year, new_year.to_uppercase());
684+
/// ```
685+
///
686+
/// One character can become multiple:
687+
/// ```
688+
/// let s = "tschüß";
689+
///
690+
/// assert_eq!("TSCHÜSS", s.to_uppercase());
691+
/// ```
692+
#[cfg(not(no_global_oom_handling))]
693+
#[rustc_allow_incoherent_impl]
694+
#[must_use = "this returns the uppercase string as a new String, \
695+
without modifying the original"]
696+
#[stable(feature = "unicode_case_mapping", since = "1.2.0")]
697+
pub fn to_uppercase(&self) -> String {
698+
let (mut s, rest) = convert_while_ascii(self, u8::to_ascii_uppercase);
699+
700+
for c in rest.chars() {
701+
match conversions::to_upper(c) {
702+
[a, '\0', _] => s.push(a),
703+
[a, b, '\0'] => {
704+
s.push(a);
705+
s.push(b);
706+
}
707+
[a, b, c] => {
708+
s.push(a);
709+
s.push(b);
710+
s.push(c);
711+
}
712+
}
713+
}
714+
s
715+
}
716+
}

0 commit comments

Comments
 (0)