focaccia/folding/
full.rs

1use core::cmp::Ordering;
2
3use crate::folding::mapping::{lookup, Mode};
4
5/// Compare two strings with Full Unicode case folding.
6///
7/// This function is implemented with a lookup table generated from Unicode case
8/// folding tables.
9///
10/// # Examples
11///
12/// ```
13/// # use core::cmp::Ordering;
14/// # use focaccia::unicode_full_casecmp;
15/// assert_eq!(unicode_full_casecmp("MASSE", "Maße"), Ordering::Equal);
16/// assert_eq!(unicode_full_casecmp("São Paulo", "Sao Paulo"), Ordering::Greater);
17/// ```
18#[inline]
19#[must_use]
20pub fn casecmp(left: &str, right: &str) -> Ordering {
21    let left = left.chars().flat_map(|c| lookup(c, Mode::Full));
22    let right = right.chars().flat_map(|c| lookup(c, Mode::Full));
23    left.cmp(right)
24}
25
26/// Check two strings for equality with Full Unicode case folding.
27///
28/// This function is implemented with a lookup table generated from Unicode case
29/// folding tables.
30///
31/// # Examples
32///
33/// ```
34/// # use focaccia::unicode_full_case_eq;
35/// assert!(unicode_full_case_eq("MASSE", "Maße"));
36/// assert!(!unicode_full_case_eq("São Paulo", "Sao Paulo"));
37/// ```
38#[inline]
39#[must_use]
40pub fn case_eq(left: &str, right: &str) -> bool {
41    let left = left.chars().flat_map(|c| lookup(c, Mode::Full));
42    let right = right.chars().flat_map(|c| lookup(c, Mode::Full));
43    left.eq(right)
44}
45
46#[cfg(test)]
47mod tests {
48    use core::cmp::Ordering;
49
50    use super::{case_eq, casecmp};
51
52    #[test]
53    fn empty_string() {
54        assert!(case_eq("", ""));
55        assert_eq!(casecmp("", ""), Ordering::Equal);
56
57        assert!(!case_eq("", "rake"));
58        assert_eq!(casecmp("", "rake"), Ordering::Less);
59
60        assert!(!case_eq("rake", ""));
61        assert_eq!(casecmp("rake", ""), Ordering::Greater);
62
63        assert!(!case_eq("", "São Paulo"));
64        assert_eq!(casecmp("", "São Paulo"), Ordering::Less);
65
66        assert!(!case_eq("São Paulo", ""));
67        assert_eq!(casecmp("São Paulo", ""), Ordering::Greater);
68    }
69
70    #[test]
71    fn unicode_replacement_character() {
72        assert!(case_eq("\u{FFFD}", "\u{FFFD}"));
73        assert_eq!(casecmp("\u{FFFD}", "\u{FFFD}"), Ordering::Equal);
74
75        assert_eq!(casecmp("\u{FFFD}", "\u{FFFD}yam"), Ordering::Less);
76        assert_eq!(casecmp("\u{FFFD}yam", "\u{FFFD}"), Ordering::Greater);
77    }
78
79    #[test]
80    fn compares_symbols_without_regard_to_case() {
81        assert!(!case_eq("abcdef", "abcde"));
82        assert!(case_eq("aBcDeF", "abcdef"));
83        assert!(!case_eq("abcdef", "abcdefg"));
84        assert!(case_eq("abcdef", "ABCDEF"));
85
86        assert_eq!(casecmp("abcdef", "abcde"), Ordering::Greater);
87        assert_eq!(casecmp("aBcDeF", "abcdef"), Ordering::Equal);
88        assert_eq!(casecmp("abcdef", "abcdefg"), Ordering::Less);
89        assert_eq!(casecmp("abcdef", "ABCDEF"), Ordering::Equal);
90
91        assert_eq!(casecmp("abcdef", "abcde") as i32, 1);
92        assert_eq!(casecmp("aBcDeF", "abcdef") as i32, 0);
93        assert_eq!(casecmp("abcdef", "abcdefg") as i32, -1);
94        assert_eq!(casecmp("abcdef", "ABCDEF") as i32, 0);
95        assert_eq!(casecmp("abcdef", "abcde") as i32, 1);
96    }
97
98    #[test]
99    fn non_ascii_chars_that_are_not_fold_eq_are_not_eq() {
100        // -- UTF-8 --
101        let upper_a_tilde = "Ã";
102        let lower_a_tilde = "ã";
103        let upper_a_umlaut = "Ä";
104        let lower_a_umlaut = "ä";
105
106        // From `spec/core/symbol/casecmp_spec.rb`:
107        //
108        // ```ruby
109        // lower_a_tilde.casecmp?(lower_a_umlaut).should_not == true
110        // lower_a_umlaut.casecmp?(lower_a_tilde).should_not == true
111        // upper_a_tilde.casecmp?(upper_a_umlaut).should_not == true
112        // upper_a_umlaut.casecmp?(upper_a_tilde).should_not == true
113        // ```
114        assert!(!case_eq(lower_a_tilde, lower_a_umlaut));
115        assert!(!case_eq(lower_a_umlaut, lower_a_tilde));
116        assert!(!case_eq(upper_a_tilde, upper_a_umlaut));
117        assert!(!case_eq(upper_a_umlaut, upper_a_tilde));
118
119        assert_ne!(casecmp(lower_a_tilde, lower_a_umlaut), Ordering::Equal);
120        assert_ne!(casecmp(lower_a_umlaut, lower_a_tilde), Ordering::Equal);
121        assert_ne!(casecmp(upper_a_tilde, upper_a_umlaut), Ordering::Equal);
122        assert_ne!(casecmp(upper_a_umlaut, upper_a_tilde), Ordering::Equal);
123    }
124
125    #[test]
126    fn does_case_mapping_for_unicode_chars() {
127        // -- UTF-8 --
128        let upper_a_tilde = "Ã";
129        let lower_a_tilde = "ã";
130        let upper_a_umlaut = "Ä";
131        let lower_a_umlaut = "ä";
132
133        // From `spec/core/symbol/casecmp_spec.rb`:
134        //
135        // ```ruby
136        // upper_a_tilde.casecmp?(lower_a_tilde).should == true
137        // upper_a_umlaut.casecmp?(lower_a_umlaut).should == true
138        // lower_a_tilde.casecmp?(upper_a_tilde).should == true
139        // lower_a_umlaut.casecmp?(upper_a_umlaut).should == true
140        // ```
141        assert!(case_eq(upper_a_tilde, lower_a_tilde));
142        assert!(case_eq(upper_a_umlaut, lower_a_umlaut));
143        assert!(case_eq(lower_a_tilde, upper_a_tilde));
144        assert!(case_eq(lower_a_umlaut, upper_a_umlaut));
145
146        assert_eq!(casecmp(upper_a_tilde, lower_a_tilde), Ordering::Equal);
147        assert_eq!(casecmp(upper_a_umlaut, lower_a_umlaut), Ordering::Equal);
148        assert_eq!(casecmp(lower_a_tilde, upper_a_tilde), Ordering::Equal);
149        assert_eq!(casecmp(lower_a_umlaut, upper_a_umlaut), Ordering::Equal);
150    }
151}