posix_space/
lib.rs

1#![warn(clippy::all)]
2#![warn(clippy::pedantic)]
3#![warn(clippy::cargo)]
4#![allow(unknown_lints)]
5#![warn(missing_copy_implementations)]
6#![warn(missing_debug_implementations)]
7#![warn(missing_docs)]
8#![warn(rust_2018_idioms)]
9#![warn(trivial_casts, trivial_numeric_casts)]
10#![warn(unsafe_op_in_unsafe_fn)]
11#![warn(unused_qualifications)]
12#![warn(variant_size_differences)]
13#![forbid(unsafe_code)]
14// Enable feature callouts in generated documentation:
15// https://doc.rust-lang.org/beta/unstable-book/language-features/doc-cfg.html
16//
17// This approach is borrowed from tokio.
18#![cfg_attr(docsrs, feature(doc_cfg))]
19#![cfg_attr(docsrs, feature(doc_alias))]
20
21//! A small crate which determines if a byte is classified as a space in the
22//! POSIX locale per [POSIX.1-2017], chapter 7, [Locale].
23//!
24//! [POSIX.1-2017]: https://pubs.opengroup.org/onlinepubs/9699919799/mindex.html
25//! [Locale]: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html
26//!
27//! > **space**
28//! >
29//! > Define characters to be classified as white-space characters.
30//! >
31//! > In the POSIX locale, exactly \<space\>, \<form-feed\>, \<newline\>, \<carriage-return\>,
32//! > \<tab\>, and \<vertical-tab\> shall be included.
33//!
34//! The function defined in this crate should have equivalent behavior to the C
35//! function [`isspace`] as defined in `ctype.h`.
36//!
37//! [`isspace`]: https://linux.die.net/man/3/isspace
38
39#![doc(html_root_url = "https://docs.rs/posix-space/1.0.4")]
40#![no_std]
41
42/// Determine whether the given byte is in **space** POSIX character class.
43///
44/// In the POSIX locale, exactly \<space\>, \<form-feed\>, \<newline\>,
45/// \<carriage-return\>, \<tab\>, and \<vertical-tab\> shall be included.
46///
47/// # Compatibility
48///
49/// This function differs from [`u8::is_ascii_whitespace`] in that \<vertical-tab\>,
50/// `\x0B`, is considered a **space**.
51///
52/// # Examples
53///
54/// ```
55/// assert!(posix_space::is_space(b' '));
56/// assert!(posix_space::is_space(b'\x0C'));
57/// assert!(posix_space::is_space(b'\n'));
58/// assert!(posix_space::is_space(b'\r'));
59/// assert!(posix_space::is_space(b'\t'));
60/// assert!(posix_space::is_space(b'\x0B'));
61/// ```
62///
63/// Other ASCII characters are not POSIX spaces:
64///
65/// ```
66/// assert!(!posix_space::is_space(b'C'));
67/// assert!(!posix_space::is_space(b'&'));
68/// assert!(!posix_space::is_space(b'\x7F'));
69/// ```
70///
71/// Non-ASCII bytes are not POSIX spaces:
72///
73/// ```
74/// assert!(!posix_space::is_space(b'\x80'));
75/// assert!(!posix_space::is_space(b'\xFF'));
76/// ```
77#[must_use]
78pub fn is_space(byte: u8) -> bool {
79    byte.is_ascii_whitespace() || byte == b'\x0B'
80}
81
82#[cfg(test)]
83mod tests {
84    use super::*;
85
86    // ```
87    // [3.1.2] > (0..255).each { |b| puts "(0x#{b.to_s(16).upcase}, #{b.chr.match?(/[[:space:]]/)}, #{b.chr.inspect})," }
88    // ```
89    const BYTE_TO_POSIX_SPACE: [(u8, bool, &str); 256] = [
90        (0x0, false, r"\x00"),
91        (0x1, false, r"\x01"),
92        (0x2, false, r"\x02"),
93        (0x3, false, r"\x03"),
94        (0x4, false, r"\x04"),
95        (0x5, false, r"\x05"),
96        (0x6, false, r"\x06"),
97        (0x7, false, r"\a"),
98        (0x8, false, r"\b"),
99        (0x9, true, r"\t"),
100        (0xA, true, r"\n"),
101        (0xB, true, r"\v"),
102        (0xC, true, r"\f"),
103        (0xD, true, r"\r"),
104        (0xE, false, r"\x0E"),
105        (0xF, false, r"\x0F"),
106        (0x10, false, r"\x10"),
107        (0x11, false, r"\x11"),
108        (0x12, false, r"\x12"),
109        (0x13, false, r"\x13"),
110        (0x14, false, r"\x14"),
111        (0x15, false, r"\x15"),
112        (0x16, false, r"\x16"),
113        (0x17, false, r"\x17"),
114        (0x18, false, r"\x18"),
115        (0x19, false, r"\x19"),
116        (0x1A, false, r"\x1A"),
117        (0x1B, false, r"\e"),
118        (0x1C, false, r"\x1C"),
119        (0x1D, false, r"\x1D"),
120        (0x1E, false, r"\x1E"),
121        (0x1F, false, r"\x1F"),
122        (0x20, true, " "),
123        (0x21, false, "!"),
124        (0x22, false, r#"\""#),
125        (0x23, false, "#"),
126        (0x24, false, "$"),
127        (0x25, false, "%"),
128        (0x26, false, "&"),
129        (0x27, false, "'"),
130        (0x28, false, "("),
131        (0x29, false, ")"),
132        (0x2A, false, "*"),
133        (0x2B, false, "+"),
134        (0x2C, false, ","),
135        (0x2D, false, "-"),
136        (0x2E, false, "."),
137        (0x2F, false, "/"),
138        (0x30, false, "0"),
139        (0x31, false, "1"),
140        (0x32, false, "2"),
141        (0x33, false, "3"),
142        (0x34, false, "4"),
143        (0x35, false, "5"),
144        (0x36, false, "6"),
145        (0x37, false, "7"),
146        (0x38, false, "8"),
147        (0x39, false, "9"),
148        (0x3A, false, ":"),
149        (0x3B, false, ";"),
150        (0x3C, false, "<"),
151        (0x3D, false, "="),
152        (0x3E, false, ">"),
153        (0x3F, false, "?"),
154        (0x40, false, "@"),
155        (0x41, false, "A"),
156        (0x42, false, "B"),
157        (0x43, false, "C"),
158        (0x44, false, "D"),
159        (0x45, false, "E"),
160        (0x46, false, "F"),
161        (0x47, false, "G"),
162        (0x48, false, "H"),
163        (0x49, false, "I"),
164        (0x4A, false, "J"),
165        (0x4B, false, "K"),
166        (0x4C, false, "L"),
167        (0x4D, false, "M"),
168        (0x4E, false, "N"),
169        (0x4F, false, "O"),
170        (0x50, false, "P"),
171        (0x51, false, "Q"),
172        (0x52, false, "R"),
173        (0x53, false, "S"),
174        (0x54, false, "T"),
175        (0x55, false, "U"),
176        (0x56, false, "V"),
177        (0x57, false, "W"),
178        (0x58, false, "X"),
179        (0x59, false, "Y"),
180        (0x5A, false, "Z"),
181        (0x5B, false, "["),
182        (0x5C, false, r"\\"),
183        (0x5D, false, "]"),
184        (0x5E, false, "^"),
185        (0x5F, false, "_"),
186        (0x60, false, "`"),
187        (0x61, false, "a"),
188        (0x62, false, "b"),
189        (0x63, false, "c"),
190        (0x64, false, "d"),
191        (0x65, false, "e"),
192        (0x66, false, "f"),
193        (0x67, false, "g"),
194        (0x68, false, "h"),
195        (0x69, false, "i"),
196        (0x6A, false, "j"),
197        (0x6B, false, "k"),
198        (0x6C, false, "l"),
199        (0x6D, false, "m"),
200        (0x6E, false, "n"),
201        (0x6F, false, "o"),
202        (0x70, false, "p"),
203        (0x71, false, "q"),
204        (0x72, false, "r"),
205        (0x73, false, "s"),
206        (0x74, false, "t"),
207        (0x75, false, "u"),
208        (0x76, false, "v"),
209        (0x77, false, "w"),
210        (0x78, false, "x"),
211        (0x79, false, "y"),
212        (0x7A, false, "z"),
213        (0x7B, false, "{"),
214        (0x7C, false, "|"),
215        (0x7D, false, "}"),
216        (0x7E, false, "~"),
217        (0x7F, false, r"\x7F"),
218        (0x80, false, r"\x80"),
219        (0x81, false, r"\x81"),
220        (0x82, false, r"\x82"),
221        (0x83, false, r"\x83"),
222        (0x84, false, r"\x84"),
223        (0x85, false, r"\x85"),
224        (0x86, false, r"\x86"),
225        (0x87, false, r"\x87"),
226        (0x88, false, r"\x88"),
227        (0x89, false, r"\x89"),
228        (0x8A, false, r"\x8A"),
229        (0x8B, false, r"\x8B"),
230        (0x8C, false, r"\x8C"),
231        (0x8D, false, r"\x8D"),
232        (0x8E, false, r"\x8E"),
233        (0x8F, false, r"\x8F"),
234        (0x90, false, r"\x90"),
235        (0x91, false, r"\x91"),
236        (0x92, false, r"\x92"),
237        (0x93, false, r"\x93"),
238        (0x94, false, r"\x94"),
239        (0x95, false, r"\x95"),
240        (0x96, false, r"\x96"),
241        (0x97, false, r"\x97"),
242        (0x98, false, r"\x98"),
243        (0x99, false, r"\x99"),
244        (0x9A, false, r"\x9A"),
245        (0x9B, false, r"\x9B"),
246        (0x9C, false, r"\x9C"),
247        (0x9D, false, r"\x9D"),
248        (0x9E, false, r"\x9E"),
249        (0x9F, false, r"\x9F"),
250        (0xA0, false, r"\xA0"),
251        (0xA1, false, r"\xA1"),
252        (0xA2, false, r"\xA2"),
253        (0xA3, false, r"\xA3"),
254        (0xA4, false, r"\xA4"),
255        (0xA5, false, r"\xA5"),
256        (0xA6, false, r"\xA6"),
257        (0xA7, false, r"\xA7"),
258        (0xA8, false, r"\xA8"),
259        (0xA9, false, r"\xA9"),
260        (0xAA, false, r"\xAA"),
261        (0xAB, false, r"\xAB"),
262        (0xAC, false, r"\xAC"),
263        (0xAD, false, r"\xAD"),
264        (0xAE, false, r"\xAE"),
265        (0xAF, false, r"\xAF"),
266        (0xB0, false, r"\xB0"),
267        (0xB1, false, r"\xB1"),
268        (0xB2, false, r"\xB2"),
269        (0xB3, false, r"\xB3"),
270        (0xB4, false, r"\xB4"),
271        (0xB5, false, r"\xB5"),
272        (0xB6, false, r"\xB6"),
273        (0xB7, false, r"\xB7"),
274        (0xB8, false, r"\xB8"),
275        (0xB9, false, r"\xB9"),
276        (0xBA, false, r"\xBA"),
277        (0xBB, false, r"\xBB"),
278        (0xBC, false, r"\xBC"),
279        (0xBD, false, r"\xBD"),
280        (0xBE, false, r"\xBE"),
281        (0xBF, false, r"\xBF"),
282        (0xC0, false, r"\xC0"),
283        (0xC1, false, r"\xC1"),
284        (0xC2, false, r"\xC2"),
285        (0xC3, false, r"\xC3"),
286        (0xC4, false, r"\xC4"),
287        (0xC5, false, r"\xC5"),
288        (0xC6, false, r"\xC6"),
289        (0xC7, false, r"\xC7"),
290        (0xC8, false, r"\xC8"),
291        (0xC9, false, r"\xC9"),
292        (0xCA, false, r"\xCA"),
293        (0xCB, false, r"\xCB"),
294        (0xCC, false, r"\xCC"),
295        (0xCD, false, r"\xCD"),
296        (0xCE, false, r"\xCE"),
297        (0xCF, false, r"\xCF"),
298        (0xD0, false, r"\xD0"),
299        (0xD1, false, r"\xD1"),
300        (0xD2, false, r"\xD2"),
301        (0xD3, false, r"\xD3"),
302        (0xD4, false, r"\xD4"),
303        (0xD5, false, r"\xD5"),
304        (0xD6, false, r"\xD6"),
305        (0xD7, false, r"\xD7"),
306        (0xD8, false, r"\xD8"),
307        (0xD9, false, r"\xD9"),
308        (0xDA, false, r"\xDA"),
309        (0xDB, false, r"\xDB"),
310        (0xDC, false, r"\xDC"),
311        (0xDD, false, r"\xDD"),
312        (0xDE, false, r"\xDE"),
313        (0xDF, false, r"\xDF"),
314        (0xE0, false, r"\xE0"),
315        (0xE1, false, r"\xE1"),
316        (0xE2, false, r"\xE2"),
317        (0xE3, false, r"\xE3"),
318        (0xE4, false, r"\xE4"),
319        (0xE5, false, r"\xE5"),
320        (0xE6, false, r"\xE6"),
321        (0xE7, false, r"\xE7"),
322        (0xE8, false, r"\xE8"),
323        (0xE9, false, r"\xE9"),
324        (0xEA, false, r"\xEA"),
325        (0xEB, false, r"\xEB"),
326        (0xEC, false, r"\xEC"),
327        (0xED, false, r"\xED"),
328        (0xEE, false, r"\xEE"),
329        (0xEF, false, r"\xEF"),
330        (0xF0, false, r"\xF0"),
331        (0xF1, false, r"\xF1"),
332        (0xF2, false, r"\xF2"),
333        (0xF3, false, r"\xF3"),
334        (0xF4, false, r"\xF4"),
335        (0xF5, false, r"\xF5"),
336        (0xF6, false, r"\xF6"),
337        (0xF7, false, r"\xF7"),
338        (0xF8, false, r"\xF8"),
339        (0xF9, false, r"\xF9"),
340        (0xFA, false, r"\xFA"),
341        (0xFB, false, r"\xFB"),
342        (0xFC, false, r"\xFC"),
343        (0xFD, false, r"\xFD"),
344        (0xFE, false, r"\xFE"),
345        (0xFF, false, r"\xFF"),
346    ];
347
348    #[test]
349    fn space_character_class() {
350        let test_cases = BYTE_TO_POSIX_SPACE;
351        for &(byte, is_posix_space, display) in test_cases.iter() {
352            assert_eq!(is_space(byte), is_posix_space, "{} - {}", byte, display);
353        }
354    }
355
356    #[test]
357    fn non_ascii_bytes_are_not_posix_spaces() {
358        for byte in 0..=core::u8::MAX {
359            if byte.is_ascii() {
360                continue;
361            }
362            assert!(!is_space(byte), "for byte {}", byte);
363        }
364    }
365}
366
367// Ensure code blocks in `README.md` compile.
368//
369// This module and macro declaration should be kept at the end of the file, in
370// order to not interfere with code coverage.
371#[cfg(doctest)]
372macro_rules! readme {
373    ($x:expr) => {
374        #[doc = $x]
375        mod readme {}
376    };
377    () => {
378        readme!(include_str!("../README.md"));
379    };
380}
381#[cfg(doctest)]
382readme!();