focaccia/folding/
ascii.rs

1use core::cmp::Ordering;
2
3/// Compare two byte strings with ASCII case folding.
4///
5/// This function is implemented with ASCII folding functions in Rust `core`.
6///
7/// # Examples
8///
9/// ```
10/// # use core::cmp::Ordering;
11/// # use focaccia::{ascii_casecmp, ascii_case_eq};
12/// assert_eq!(ascii_casecmp(b"Artichoke Ruby", b"artichoke ruby"), Ordering::Equal);
13/// assert_eq!(ascii_casecmp(b"artichoke ruby", b"Artichoke"), Ordering::Greater);
14/// ```
15#[inline]
16#[must_use]
17pub fn casecmp(left: &[u8], right: &[u8]) -> Ordering {
18    let left = left.iter().map(u8::to_ascii_lowercase);
19    let right = right.iter().map(u8::to_ascii_lowercase);
20    left.cmp(right)
21}
22
23/// Check two byte strings for equality with ASCII case folding.
24///
25/// This function is implemented with ASCII folding functions in Rust `core`.
26///
27/// # Examples
28///
29/// ```
30/// # use focaccia::{ascii_casecmp, ascii_case_eq};
31/// assert!(ascii_case_eq(b"Artichoke Ruby", b"artichoke ruby"));
32/// assert!(!ascii_case_eq(b"artichoke ruby", b"Artichoke"));
33/// ```
34#[inline]
35#[must_use]
36pub fn case_eq(left: &[u8], right: &[u8]) -> bool {
37    left.eq_ignore_ascii_case(right)
38}
39
40#[cfg(test)]
41mod tests {
42    use core::cmp::Ordering;
43
44    use super::{case_eq, casecmp};
45
46    #[test]
47    fn empty_string() {
48        assert!(case_eq(b"", b""));
49        assert_eq!(casecmp(b"", b""), Ordering::Equal);
50
51        assert!(!case_eq(b"", b"rake"));
52        assert_eq!(casecmp(b"", b"rake"), Ordering::Less);
53
54        assert!(!case_eq(b"rake", b""));
55        assert_eq!(casecmp(b"rake", b""), Ordering::Greater);
56    }
57
58    #[test]
59    fn unicode_replacement_character() {
60        assert!(case_eq("\u{FFFD}".as_bytes(), "\u{FFFD}".as_bytes()));
61        assert_eq!(
62            casecmp("\u{FFFD}".as_bytes(), "\u{FFFD}".as_bytes()),
63            Ordering::Equal
64        );
65
66        assert_eq!(
67            casecmp("\u{FFFD}".as_bytes(), "\u{FFFD}yam".as_bytes()),
68            Ordering::Less
69        );
70        assert_eq!(
71            casecmp("\u{FFFD}yam".as_bytes(), "\u{FFFD}".as_bytes()),
72            Ordering::Greater
73        );
74    }
75
76    #[test]
77    fn compares_symbols_without_regard_to_case() {
78        assert!(!case_eq(b"abcdef", b"abcde"));
79        assert!(case_eq(b"aBcDeF", b"abcdef"));
80        assert!(!case_eq(b"abcdef", b"abcdefg"));
81        assert!(case_eq(b"abcdef", b"ABCDEF"));
82
83        assert_eq!(casecmp(b"abcdef", b"abcde"), Ordering::Greater);
84        assert_eq!(casecmp(b"aBcDeF", b"abcdef"), Ordering::Equal);
85        assert_eq!(casecmp(b"abcdef", b"abcdefg"), Ordering::Less);
86        assert_eq!(casecmp(b"abcdef", b"ABCDEF"), Ordering::Equal);
87
88        assert_eq!(casecmp(b"abcdef", b"abcde") as i32, 1);
89        assert_eq!(casecmp(b"aBcDeF", b"abcdef") as i32, 0);
90        assert_eq!(casecmp(b"abcdef", b"abcdefg") as i32, -1);
91        assert_eq!(casecmp(b"abcdef", b"ABCDEF") as i32, 0);
92        assert_eq!(casecmp(b"abcdef", b"abcde") as i32, 1);
93    }
94
95    #[test]
96    fn non_ascii_chars_that_are_not_fold_eq_are_not_eq() {
97        // -- Latin-1 --
98        let upper_a_tilde = b"\xC3";
99        let upper_a_umlaut = b"\xC4";
100        let lower_a_tilde = b"\xE3";
101        let lower_a_umlaut = b"\xE4";
102
103        // From `spec/core/symbol/casecmp_spec.rb`:
104        //
105        // ```ruby
106        // lower_a_tilde.casecmp(lower_a_umlaut).should_not == 0
107        // lower_a_umlaut.casecmp(lower_a_tilde).should_not == 0
108        // upper_a_tilde.casecmp(upper_a_umlaut).should_not == 0
109        // upper_a_umlaut.casecmp(upper_a_tilde).should_not == 0
110        // ```
111        assert!(!case_eq(lower_a_tilde, lower_a_umlaut));
112        assert!(!case_eq(lower_a_umlaut, lower_a_tilde));
113        assert!(!case_eq(upper_a_tilde, upper_a_umlaut));
114        assert!(!case_eq(upper_a_umlaut, upper_a_tilde));
115
116        assert_ne!(casecmp(lower_a_tilde, lower_a_umlaut), Ordering::Equal);
117        assert_ne!(casecmp(lower_a_umlaut, lower_a_tilde), Ordering::Equal);
118        assert_ne!(casecmp(upper_a_tilde, upper_a_umlaut), Ordering::Equal);
119        assert_ne!(casecmp(upper_a_umlaut, upper_a_tilde), Ordering::Equal);
120
121        // -- UTF-8 --
122        let upper_a_tilde = "Ã".as_bytes();
123        let lower_a_tilde = "ã".as_bytes();
124        let upper_a_umlaut = "Ä".as_bytes();
125        let lower_a_umlaut = "ä".as_bytes();
126
127        // From `spec/core/symbol/casecmp_spec.rb`:
128        //
129        // ```ruby
130        // lower_a_tilde.casecmp(lower_a_umlaut).should_not == 0
131        // lower_a_umlaut.casecmp(lower_a_tilde).should_not == 0
132        // upper_a_tilde.casecmp(upper_a_umlaut).should_not == 0
133        // upper_a_umlaut.casecmp(upper_a_tilde).should_not == 0
134        // ```
135        assert!(!case_eq(lower_a_tilde, lower_a_umlaut));
136        assert!(!case_eq(lower_a_umlaut, lower_a_tilde));
137        assert!(!case_eq(upper_a_tilde, upper_a_umlaut));
138        assert!(!case_eq(upper_a_umlaut, upper_a_tilde));
139
140        assert_ne!(casecmp(lower_a_tilde, lower_a_umlaut), Ordering::Equal);
141        assert_ne!(casecmp(lower_a_umlaut, lower_a_tilde), Ordering::Equal);
142        assert_ne!(casecmp(upper_a_tilde, upper_a_umlaut), Ordering::Equal);
143        assert_ne!(casecmp(upper_a_umlaut, upper_a_tilde), Ordering::Equal);
144    }
145
146    #[test]
147    fn doesent_do_case_mapping_for_non_ascii_chars() {
148        // -- Latin-1 --
149        let upper_a_tilde = b"\xC3";
150        let upper_a_umlaut = b"\xC4";
151        let lower_a_tilde = b"\xE3";
152        let lower_a_umlaut = b"\xE4";
153
154        // From `spec/core/symbol/casecmp_spec.rb`:
155        //
156        // ```ruby
157        // upper_a_tilde.casecmp(lower_a_tilde).should == -1
158        // upper_a_umlaut.casecmp(lower_a_umlaut).should == -1
159        // lower_a_tilde.casecmp(upper_a_tilde).should == 1
160        // lower_a_umlaut.casecmp(upper_a_umlaut).should == 1
161        // ```
162        assert!(!case_eq(upper_a_tilde, lower_a_tilde));
163        assert!(!case_eq(upper_a_umlaut, lower_a_umlaut));
164        assert!(!case_eq(lower_a_tilde, upper_a_tilde));
165        assert!(!case_eq(lower_a_umlaut, upper_a_umlaut));
166
167        assert_eq!(casecmp(upper_a_tilde, lower_a_tilde), Ordering::Less);
168        assert_eq!(casecmp(upper_a_umlaut, lower_a_umlaut), Ordering::Less);
169        assert_eq!(casecmp(lower_a_tilde, upper_a_tilde), Ordering::Greater);
170        assert_eq!(casecmp(lower_a_umlaut, upper_a_umlaut), Ordering::Greater);
171
172        assert_eq!(casecmp(upper_a_tilde, lower_a_tilde) as i32, -1);
173        assert_eq!(casecmp(upper_a_umlaut, lower_a_umlaut) as i32, -1);
174        assert_eq!(casecmp(lower_a_tilde, upper_a_tilde) as i32, 1);
175        assert_eq!(casecmp(lower_a_umlaut, upper_a_umlaut) as i32, 1);
176
177        // -- UTF-8 --
178        let upper_a_tilde = "Ã".as_bytes();
179        let lower_a_tilde = "ã".as_bytes();
180        let upper_a_umlaut = "Ä".as_bytes();
181        let lower_a_umlaut = "ä".as_bytes();
182
183        // From `spec/core/symbol/casecmp_spec.rb`:
184        //
185        // ```ruby
186        // upper_a_tilde.casecmp(lower_a_tilde).should == -1
187        // upper_a_umlaut.casecmp(lower_a_umlaut).should == -1
188        // lower_a_tilde.casecmp(upper_a_tilde).should == 1
189        // lower_a_umlaut.casecmp(upper_a_umlaut).should == 1
190        // ```
191        assert!(!case_eq(upper_a_tilde, lower_a_tilde));
192        assert!(!case_eq(upper_a_umlaut, lower_a_umlaut));
193        assert!(!case_eq(lower_a_tilde, upper_a_tilde));
194        assert!(!case_eq(lower_a_umlaut, upper_a_umlaut));
195
196        assert_eq!(casecmp(upper_a_tilde, lower_a_tilde), Ordering::Less);
197        assert_eq!(casecmp(upper_a_umlaut, lower_a_umlaut), Ordering::Less);
198        assert_eq!(casecmp(lower_a_tilde, upper_a_tilde), Ordering::Greater);
199        assert_eq!(casecmp(lower_a_umlaut, upper_a_umlaut), Ordering::Greater);
200
201        assert_eq!(casecmp(upper_a_tilde, lower_a_tilde) as i32, -1);
202        assert_eq!(casecmp(upper_a_umlaut, lower_a_umlaut) as i32, -1);
203        assert_eq!(casecmp(lower_a_tilde, upper_a_tilde) as i32, 1);
204        assert_eq!(casecmp(lower_a_umlaut, upper_a_umlaut) as i32, 1);
205    }
206
207    #[test]
208    fn exhaustive() {
209        let lower = 'a'..='z';
210        let upper = 'A'..='Z';
211        let mut l_buf = [0; 4];
212        let mut r_buf = [0; 4];
213        for (left, right) in lower.zip(upper) {
214            let left = left.encode_utf8(&mut l_buf);
215            let right = right.encode_utf8(&mut r_buf);
216            assert_eq!(casecmp(left.as_bytes(), right.as_bytes()), Ordering::Equal);
217            assert_eq!(casecmp(right.as_bytes(), left.as_bytes()), Ordering::Equal);
218        }
219    }
220}