@@ -333,155 +333,6 @@ impl str {
333333 result
334334 }
335335
336- /// Returns the lowercase equivalent of this string slice, as a new [`String`].
337- ///
338- /// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property
339- /// `Lowercase`.
340- ///
341- /// Since some characters can expand into multiple characters when changing
342- /// the case, this function returns a [`String`] instead of modifying the
343- /// parameter in-place.
344- ///
345- /// # Examples
346- ///
347- /// Basic usage:
348- ///
349- /// ```
350- /// let s = "HELLO";
351- ///
352- /// assert_eq!("hello", s.to_lowercase());
353- /// ```
354- ///
355- /// A tricky example, with sigma:
356- ///
357- /// ```
358- /// let sigma = "Σ";
359- ///
360- /// assert_eq!("σ", sigma.to_lowercase());
361- ///
362- /// // but at the end of a word, it's ς, not σ:
363- /// let odysseus = "ὈΔΥΣΣΕΎΣ";
364- ///
365- /// assert_eq!("ὀδυσσεύς", odysseus.to_lowercase());
366- /// ```
367- ///
368- /// Languages without case are not changed:
369- ///
370- /// ```
371- /// let new_year = "农历新年";
372- ///
373- /// assert_eq!(new_year, new_year.to_lowercase());
374- /// ```
375- #[ cfg( not( no_global_oom_handling) ) ]
376- #[ rustc_allow_incoherent_impl]
377- #[ must_use = "this returns the lowercase string as a new String, \
378- without modifying the original"]
379- #[ stable( feature = "unicode_case_mapping" , since = "1.2.0" ) ]
380- pub fn to_lowercase ( & self ) -> String {
381- let ( mut s, rest) = convert_while_ascii ( self , u8:: to_ascii_lowercase) ;
382-
383- let prefix_len = s. len ( ) ;
384-
385- for ( i, c) in rest. char_indices ( ) {
386- if c == 'Σ' {
387- // Σ maps to σ, except at the end of a word where it maps to ς.
388- // This is the only conditional (contextual) but language-independent mapping
389- // in `SpecialCasing.txt`,
390- // so hard-code it rather than have a generic "condition" mechanism.
391- // See https://github.com/rust-lang/rust/issues/26035
392- let sigma_lowercase = map_uppercase_sigma ( self , prefix_len + i) ;
393- s. push ( sigma_lowercase) ;
394- } else {
395- match conversions:: to_lower ( c) {
396- [ a, '\0' , _] => s. push ( a) ,
397- [ a, b, '\0' ] => {
398- s. push ( a) ;
399- s. push ( b) ;
400- }
401- [ a, b, c] => {
402- s. push ( a) ;
403- s. push ( b) ;
404- s. push ( c) ;
405- }
406- }
407- }
408- }
409- return s;
410-
411- fn map_uppercase_sigma ( from : & str , i : usize ) -> char {
412- // See https://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
413- // for the definition of `Final_Sigma`.
414- let is_word_final = case_ignorable_then_cased ( from[ ..i] . chars ( ) . rev ( ) )
415- && !case_ignorable_then_cased ( from[ i + const { 'Σ' . len_utf8 ( ) } ..] . chars ( ) ) ;
416- if is_word_final { 'ς' } else { 'σ' }
417- }
418-
419- fn case_ignorable_then_cased < I : Iterator < Item = char > > ( iter : I ) -> bool {
420- match iter. skip_while ( |& c| c. is_case_ignorable ( ) ) . next ( ) {
421- Some ( c) => c. is_cased ( ) ,
422- None => false ,
423- }
424- }
425- }
426-
427- /// Returns the uppercase equivalent of this string slice, as a new [`String`].
428- ///
429- /// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property
430- /// `Uppercase`.
431- ///
432- /// Since some characters can expand into multiple characters when changing
433- /// the case, this function returns a [`String`] instead of modifying the
434- /// parameter in-place.
435- ///
436- /// # Examples
437- ///
438- /// Basic usage:
439- ///
440- /// ```
441- /// let s = "hello";
442- ///
443- /// assert_eq!("HELLO", s.to_uppercase());
444- /// ```
445- ///
446- /// Scripts without case are not changed:
447- ///
448- /// ```
449- /// let new_year = "农历新年";
450- ///
451- /// assert_eq!(new_year, new_year.to_uppercase());
452- /// ```
453- ///
454- /// One character can become multiple:
455- /// ```
456- /// let s = "tschüß";
457- ///
458- /// assert_eq!("TSCHÜSS", s.to_uppercase());
459- /// ```
460- #[ cfg( not( no_global_oom_handling) ) ]
461- #[ rustc_allow_incoherent_impl]
462- #[ must_use = "this returns the uppercase string as a new String, \
463- without modifying the original"]
464- #[ stable( feature = "unicode_case_mapping" , since = "1.2.0" ) ]
465- pub fn to_uppercase ( & self ) -> String {
466- let ( mut s, rest) = convert_while_ascii ( self , u8:: to_ascii_uppercase) ;
467-
468- for c in rest. chars ( ) {
469- match conversions:: to_upper ( c) {
470- [ a, '\0' , _] => s. push ( a) ,
471- [ a, b, '\0' ] => {
472- s. push ( a) ;
473- s. push ( b) ;
474- }
475- [ a, b, c] => {
476- s. push ( a) ;
477- s. push ( b) ;
478- s. push ( c) ;
479- }
480- }
481- }
482- s
483- }
484-
485336 /// Converts a [`Box<str>`] into a [`String`] without copying or allocating.
486337 ///
487338 /// # Examples
@@ -710,3 +561,156 @@ unsafe fn replace_ascii(utf8_bytes: &[u8], from: u8, to: u8) -> String {
710561 // SAFETY: We replaced ascii with ascii on valid utf8 strings.
711562 unsafe { String :: from_utf8_unchecked ( result) }
712563}
564+
565+ // Case changes
566+
567+ pub ( crate ) fn case_ignorable_then_cased < I : Iterator < Item = char > > ( iter : I ) -> bool {
568+ match iter. skip_while ( |& c| c. is_case_ignorable ( ) ) . next ( ) {
569+ Some ( c) => c. is_cased ( ) ,
570+ None => false ,
571+ }
572+ }
573+
574+ impl str {
575+ /// Returns the lowercase equivalent of this string slice, as a new [`String`].
576+ ///
577+ /// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property
578+ /// `Lowercase`.
579+ ///
580+ /// Since some characters can expand into multiple characters when changing
581+ /// the case, this function returns a [`String`] instead of modifying the
582+ /// parameter in-place.
583+ ///
584+ /// # Examples
585+ ///
586+ /// Basic usage:
587+ ///
588+ /// ```
589+ /// let s = "HELLO";
590+ ///
591+ /// assert_eq!("hello", s.to_lowercase());
592+ /// ```
593+ ///
594+ /// A tricky example, with sigma:
595+ ///
596+ /// ```
597+ /// let sigma = "Σ";
598+ ///
599+ /// assert_eq!("σ", sigma.to_lowercase());
600+ ///
601+ /// // but at the end of a word, it's ς, not σ:
602+ /// let odysseus = "ὈΔΥΣΣΕΎΣ";
603+ ///
604+ /// assert_eq!("ὀδυσσεύς", odysseus.to_lowercase());
605+ /// ```
606+ ///
607+ /// Languages without case are not changed:
608+ ///
609+ /// ```
610+ /// let new_year = "农历新年";
611+ ///
612+ /// assert_eq!(new_year, new_year.to_lowercase());
613+ /// ```
614+ #[ cfg( not( no_global_oom_handling) ) ]
615+ #[ rustc_allow_incoherent_impl]
616+ #[ must_use = "this returns the lowercase string as a new String, \
617+ without modifying the original"]
618+ #[ stable( feature = "unicode_case_mapping" , since = "1.2.0" ) ]
619+ pub fn to_lowercase ( & self ) -> String {
620+ let ( mut s, rest) = convert_while_ascii ( self , u8:: to_ascii_lowercase) ;
621+
622+ let prefix_len = s. len ( ) ;
623+
624+ for ( i, c) in rest. char_indices ( ) {
625+ if c == 'Σ' {
626+ // Σ maps to σ, except at the end of a word where it maps to ς.
627+ // This is the only conditional (contextual) but language-independent mapping
628+ // in `SpecialCasing.txt`,
629+ // so hard-code it rather than have a generic "condition" mechanism.
630+ // See https://github.com/rust-lang/rust/issues/26035
631+ let sigma_lowercase = map_uppercase_sigma ( self , prefix_len + i) ;
632+ s. push ( sigma_lowercase) ;
633+ } else {
634+ match conversions:: to_lower ( c) {
635+ [ a, '\0' , _] => s. push ( a) ,
636+ [ a, b, '\0' ] => {
637+ s. push ( a) ;
638+ s. push ( b) ;
639+ }
640+ [ a, b, c] => {
641+ s. push ( a) ;
642+ s. push ( b) ;
643+ s. push ( c) ;
644+ }
645+ }
646+ }
647+ }
648+ return s;
649+
650+ fn map_uppercase_sigma ( from : & str , i : usize ) -> char {
651+ // See https://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
652+ // for the definition of `Final_Sigma`.
653+ let is_word_final = case_ignorable_then_cased ( from[ ..i] . chars ( ) . rev ( ) )
654+ && !case_ignorable_then_cased ( from[ i + const { 'Σ' . len_utf8 ( ) } ..] . chars ( ) ) ;
655+ if is_word_final { 'ς' } else { 'σ' }
656+ }
657+ }
658+
659+ /// Returns the uppercase equivalent of this string slice, as a new [`String`].
660+ ///
661+ /// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property
662+ /// `Uppercase`.
663+ ///
664+ /// Since some characters can expand into multiple characters when changing
665+ /// the case, this function returns a [`String`] instead of modifying the
666+ /// parameter in-place.
667+ ///
668+ /// # Examples
669+ ///
670+ /// Basic usage:
671+ ///
672+ /// ```
673+ /// let s = "hello";
674+ ///
675+ /// assert_eq!("HELLO", s.to_uppercase());
676+ /// ```
677+ ///
678+ /// Scripts without case are not changed:
679+ ///
680+ /// ```
681+ /// let new_year = "农历新年";
682+ ///
683+ /// assert_eq!(new_year, new_year.to_uppercase());
684+ /// ```
685+ ///
686+ /// One character can become multiple:
687+ /// ```
688+ /// let s = "tschüß";
689+ ///
690+ /// assert_eq!("TSCHÜSS", s.to_uppercase());
691+ /// ```
692+ #[ cfg( not( no_global_oom_handling) ) ]
693+ #[ rustc_allow_incoherent_impl]
694+ #[ must_use = "this returns the uppercase string as a new String, \
695+ without modifying the original"]
696+ #[ stable( feature = "unicode_case_mapping" , since = "1.2.0" ) ]
697+ pub fn to_uppercase ( & self ) -> String {
698+ let ( mut s, rest) = convert_while_ascii ( self , u8:: to_ascii_uppercase) ;
699+
700+ for c in rest. chars ( ) {
701+ match conversions:: to_upper ( c) {
702+ [ a, '\0' , _] => s. push ( a) ,
703+ [ a, b, '\0' ] => {
704+ s. push ( a) ;
705+ s. push ( b) ;
706+ }
707+ [ a, b, c] => {
708+ s. push ( a) ;
709+ s. push ( b) ;
710+ s. push ( c) ;
711+ }
712+ }
713+ }
714+ s
715+ }
716+ }
0 commit comments