focaccia/folding/
turkic.rs

1use core::cmp::Ordering;
2
3use crate::folding::mapping::{lookup, Mode};
4
5/// Compare two strings with Full Unicode case folding for Turkic languages.
6///
7/// This function is implemented with a lookup table generated from Unicode case
8/// folding tables.
9///
10/// # Examples
11///
12/// ```
13/// # use core::cmp::Ordering;
14/// # use focaccia::unicode_full_turkic_casecmp;
15/// assert_eq!(unicode_full_turkic_casecmp("İstanbul", "istanbul"), Ordering::Equal);
16/// assert_ne!(unicode_full_turkic_casecmp("İstanbul", "Istanbul"), Ordering::Equal);
17/// ```
18///
19/// # Examples – Full
20///
21/// Turkic case folding is largely compatible with full Unicode case folding.
22///
23/// ```
24/// # use core::cmp::Ordering;
25/// # use focaccia::unicode_full_turkic_casecmp;
26/// assert_eq!(unicode_full_turkic_casecmp("MASSE", "Maße"), Ordering::Equal);
27/// assert_eq!(unicode_full_turkic_casecmp("São Paulo", "Sao Paulo"), Ordering::Greater);
28/// ```
29#[inline]
30#[must_use]
31pub fn casecmp(left: &str, right: &str) -> Ordering {
32    let left = left.chars().flat_map(|c| lookup(c, Mode::Turkic));
33    let right = right.chars().flat_map(|c| lookup(c, Mode::Turkic));
34    left.cmp(right)
35}
36
37/// Check two strings for equality with Full Unicode case folding for Turkic
38/// languages.
39///
40/// This function is implemented with a lookup table generated from Unicode case
41/// folding tables.
42///
43/// # Examples
44///
45/// ```
46/// # use focaccia::unicode_full_turkic_case_eq;
47/// assert!(unicode_full_turkic_case_eq("İstanbul", "istanbul"));
48/// assert!(!unicode_full_turkic_case_eq("İstanbul", "Istanbul"));
49/// ```
50///
51/// # Examples – Full
52///
53/// Turkic case folding is largely compatible with full Unicode case folding.
54///
55/// ```
56/// # use focaccia::unicode_full_turkic_case_eq;
57/// assert!(unicode_full_turkic_case_eq("MASSE", "Maße"));
58/// assert!(!unicode_full_turkic_case_eq("São Paulo", "Sao Paulo"));
59/// ```
60#[inline]
61#[must_use]
62pub fn case_eq(left: &str, right: &str) -> bool {
63    let left = left.chars().flat_map(|c| lookup(c, Mode::Turkic));
64    let right = right.chars().flat_map(|c| lookup(c, Mode::Turkic));
65    left.eq(right)
66}
67
68#[cfg(test)]
69mod tests {
70    use core::cmp::Ordering;
71
72    use super::{case_eq, casecmp};
73
74    #[test]
75    fn empty_string() {
76        assert!(case_eq("", ""));
77        assert_eq!(casecmp("", ""), Ordering::Equal);
78
79        assert!(!case_eq("", "rake"));
80        assert_eq!(casecmp("", "rake"), Ordering::Less);
81
82        assert!(!case_eq("rake", ""));
83        assert_eq!(casecmp("rake", ""), Ordering::Greater);
84
85        assert!(!case_eq("", "São Paulo"));
86        assert_eq!(casecmp("", "São Paulo"), Ordering::Less);
87
88        assert!(!case_eq("São Paulo", ""));
89        assert_eq!(casecmp("São Paulo", ""), Ordering::Greater);
90
91        assert!(!case_eq("", "İstanbul"));
92        assert_eq!(casecmp("", "İstanbul"), Ordering::Less);
93
94        assert!(!case_eq("İstanbul", ""));
95        assert_eq!(casecmp("İstanbul", ""), Ordering::Greater);
96    }
97
98    #[test]
99    fn unicode_replacement_character() {
100        assert!(case_eq("\u{FFFD}", "\u{FFFD}"));
101        assert_eq!(casecmp("\u{FFFD}", "\u{FFFD}"), Ordering::Equal);
102
103        assert_eq!(casecmp("\u{FFFD}", "\u{FFFD}yam"), Ordering::Less);
104        assert_eq!(casecmp("\u{FFFD}yam", "\u{FFFD}"), Ordering::Greater);
105    }
106
107    #[test]
108    fn compares_symbols_without_regard_to_case() {
109        assert!(!case_eq("abcdef", "abcde"));
110        assert!(case_eq("aBcDeF", "abcdef"));
111        assert!(!case_eq("abcdef", "abcdefg"));
112        assert!(case_eq("abcdef", "ABCDEF"));
113
114        assert_eq!(casecmp("abcdef", "abcde"), Ordering::Greater);
115        assert_eq!(casecmp("aBcDeF", "abcdef"), Ordering::Equal);
116        assert_eq!(casecmp("abcdef", "abcdefg"), Ordering::Less);
117        assert_eq!(casecmp("abcdef", "ABCDEF"), Ordering::Equal);
118
119        assert_eq!(casecmp("abcdef", "abcde") as i32, 1);
120        assert_eq!(casecmp("aBcDeF", "abcdef") as i32, 0);
121        assert_eq!(casecmp("abcdef", "abcdefg") as i32, -1);
122        assert_eq!(casecmp("abcdef", "ABCDEF") as i32, 0);
123        assert_eq!(casecmp("abcdef", "abcde") as i32, 1);
124    }
125
126    #[test]
127    fn non_ascii_chars_that_are_not_fold_eq_are_not_eq() {
128        // -- UTF-8 --
129        let upper_a_tilde = "Ã";
130        let lower_a_tilde = "ã";
131        let upper_a_umlaut = "Ä";
132        let lower_a_umlaut = "ä";
133
134        // From `spec/core/symbol/casecmp_spec.rb`:
135        //
136        // ```ruby
137        // lower_a_tilde.casecmp?(lower_a_umlaut).should_not == true
138        // lower_a_umlaut.casecmp?(lower_a_tilde).should_not == true
139        // upper_a_tilde.casecmp?(upper_a_umlaut).should_not == true
140        // upper_a_umlaut.casecmp?(upper_a_tilde).should_not == true
141        // ```
142        assert!(!case_eq(lower_a_tilde, lower_a_umlaut));
143        assert!(!case_eq(lower_a_umlaut, lower_a_tilde));
144        assert!(!case_eq(upper_a_tilde, upper_a_umlaut));
145        assert!(!case_eq(upper_a_umlaut, upper_a_tilde));
146
147        assert_ne!(casecmp(lower_a_tilde, lower_a_umlaut), Ordering::Equal);
148        assert_ne!(casecmp(lower_a_umlaut, lower_a_tilde), Ordering::Equal);
149        assert_ne!(casecmp(upper_a_tilde, upper_a_umlaut), Ordering::Equal);
150        assert_ne!(casecmp(upper_a_umlaut, upper_a_tilde), Ordering::Equal);
151    }
152
153    #[test]
154    fn does_case_mapping_for_unicode_chars() {
155        // -- UTF-8 --
156        let upper_a_tilde = "Ã";
157        let lower_a_tilde = "ã";
158        let upper_a_umlaut = "Ä";
159        let lower_a_umlaut = "ä";
160
161        // From `spec/core/symbol/casecmp_spec.rb`:
162        //
163        // ```ruby
164        // upper_a_tilde.casecmp?(lower_a_tilde).should == true
165        // upper_a_umlaut.casecmp?(lower_a_umlaut).should == true
166        // lower_a_tilde.casecmp?(upper_a_tilde).should == true
167        // lower_a_umlaut.casecmp?(upper_a_umlaut).should == true
168        // ```
169        assert!(case_eq(upper_a_tilde, lower_a_tilde));
170        assert!(case_eq(upper_a_umlaut, lower_a_umlaut));
171        assert!(case_eq(lower_a_tilde, upper_a_tilde));
172        assert!(case_eq(lower_a_umlaut, upper_a_umlaut));
173
174        assert_eq!(casecmp(upper_a_tilde, lower_a_tilde), Ordering::Equal);
175        assert_eq!(casecmp(upper_a_umlaut, lower_a_umlaut), Ordering::Equal);
176        assert_eq!(casecmp(lower_a_tilde, upper_a_tilde), Ordering::Equal);
177        assert_eq!(casecmp(lower_a_umlaut, upper_a_umlaut), Ordering::Equal);
178    }
179
180    #[test]
181    fn does_case_mapping_for_turkic_unicode_chars() {
182        // -- UTF-8 --
183        let upper_dotless_i = "I";
184        let lower_dotless_i = "ı";
185        let upper_dotted_i = "İ";
186        let lower_dotted_i = "i";
187
188        assert!(case_eq(upper_dotless_i, lower_dotless_i));
189        assert!(case_eq(upper_dotted_i, lower_dotted_i));
190        assert!(case_eq(lower_dotless_i, upper_dotless_i));
191        assert!(case_eq(lower_dotted_i, upper_dotted_i));
192
193        assert_eq!(casecmp(upper_dotless_i, lower_dotless_i), Ordering::Equal);
194        assert_eq!(casecmp(upper_dotted_i, lower_dotted_i), Ordering::Equal);
195        assert_eq!(casecmp(lower_dotless_i, upper_dotless_i), Ordering::Equal);
196        assert_eq!(casecmp(lower_dotted_i, upper_dotted_i), Ordering::Equal);
197
198        assert!(!case_eq(upper_dotless_i, upper_dotted_i));
199        assert!(!case_eq(upper_dotless_i, lower_dotted_i));
200        assert!(!case_eq(lower_dotless_i, upper_dotted_i));
201        assert!(!case_eq(lower_dotless_i, lower_dotted_i));
202        assert!(!case_eq(upper_dotted_i, upper_dotless_i));
203        assert!(!case_eq(upper_dotted_i, lower_dotless_i));
204        assert!(!case_eq(lower_dotted_i, upper_dotless_i));
205        assert!(!case_eq(lower_dotted_i, lower_dotless_i));
206
207        assert_ne!(casecmp(upper_dotless_i, upper_dotted_i), Ordering::Equal);
208        assert_ne!(casecmp(upper_dotless_i, lower_dotted_i), Ordering::Equal);
209        assert_ne!(casecmp(lower_dotless_i, upper_dotted_i), Ordering::Equal);
210        assert_ne!(casecmp(lower_dotless_i, lower_dotted_i), Ordering::Equal);
211        assert_ne!(casecmp(upper_dotted_i, upper_dotless_i), Ordering::Equal);
212        assert_ne!(casecmp(upper_dotted_i, lower_dotless_i), Ordering::Equal);
213        assert_ne!(casecmp(lower_dotted_i, upper_dotless_i), Ordering::Equal);
214        assert_ne!(casecmp(lower_dotted_i, lower_dotless_i), Ordering::Equal);
215    }
216}