spinoso_string/enc/utf8/
inspect.rs

1use core::iter::FusedIterator;
2
3use scolapasta_string_escape::{InvalidUtf8ByteSequence, ascii_char_with_escape};
4
5use super::{Utf8Str, Utf8String};
6use crate::inspect::Flags;
7
8#[derive(Debug, Clone)]
9#[must_use = "this `Inspect` is an `Iterator`, which should be consumed if constructed"]
10pub struct Inspect<'a> {
11    flags: Flags,
12    escaped_bytes: &'static [u8],
13    byte_literal: InvalidUtf8ByteSequence,
14    bytes: &'a [u8],
15}
16
17impl<'a> From<&'a Utf8String> for Inspect<'a> {
18    #[inline]
19    fn from(value: &'a Utf8String) -> Self {
20        Self::new(value.as_bytes())
21    }
22}
23
24impl<'a> From<&'a Utf8Str> for Inspect<'a> {
25    #[inline]
26    fn from(value: &'a Utf8Str) -> Self {
27        Self::new(value.as_bytes())
28    }
29}
30
31impl<'a> From<&'a str> for Inspect<'a> {
32    #[inline]
33    fn from(value: &'a str) -> Self {
34        Self::new(value.as_bytes())
35    }
36}
37
38impl<'a> Inspect<'a> {
39    /// Construct a UTF-8 `Inspect` for the given byte slice.
40    ///
41    /// This constructor produces inspect contents like `"fred"`.
42    #[inline]
43    fn new(bytes: &'a [u8]) -> Self {
44        Self {
45            flags: Flags::DEFAULT,
46            escaped_bytes: &[],
47            byte_literal: InvalidUtf8ByteSequence::new(),
48            bytes,
49        }
50    }
51}
52
53impl Default for Inspect<'_> {
54    /// Construct an `Inspect` that will render debug output for the empty
55    /// slice.
56    ///
57    /// This constructor produces inspect contents like `""`.
58    #[inline]
59    fn default() -> Self {
60        Self::new(b"")
61    }
62}
63
64impl Iterator for Inspect<'_> {
65    type Item = char;
66
67    #[inline]
68    fn next(&mut self) -> Option<Self::Item> {
69        if let Some(ch) = self.flags.emit_leading_quote() {
70            return Some(ch);
71        }
72        if let Some((&head, tail)) = self.escaped_bytes.split_first() {
73            self.escaped_bytes = tail;
74            return Some(head.into());
75        }
76        if let Some(ch) = self.byte_literal.next() {
77            return Some(ch);
78        }
79        let (ch, size) = bstr::decode_utf8(self.bytes);
80        // SAFETY: bstr guarantees that the size is within the bounds of the slice.
81        let (chunk, remainder) = unsafe { self.bytes.split_at_unchecked(size) };
82        self.bytes = remainder;
83
84        match ch.map(|ch| {
85            ascii_char_with_escape(ch)
86                .and_then(|esc| esc.as_bytes().split_first())
87                .ok_or(ch)
88        }) {
89            Some(Ok((&head, tail))) => {
90                self.escaped_bytes = tail;
91                return Some(head.into());
92            }
93            Some(Err(ch)) => {
94                return Some(ch);
95            }
96            None if size == 0 => {}
97            None => {
98                let invalid_utf8_bytes = chunk;
99                // This conversion is safe to unwrap due to the documented
100                // behavior of `bstr::decode_utf8` and `InvalidUtf8ByteSequence`
101                // which indicate that `size` is always in the range of 0..=3.
102                self.byte_literal = InvalidUtf8ByteSequence::try_from(invalid_utf8_bytes)
103                    .expect("Invalid UTF-8 byte sequence should be at most 3 bytes long");
104                return self.byte_literal.next();
105            }
106        };
107        self.flags.emit_trailing_quote()
108    }
109}
110
111impl FusedIterator for Inspect<'_> {}
112
113#[cfg(test)]
114mod tests {
115    use alloc::string::String;
116
117    use super::{Inspect, Utf8Str};
118
119    #[test]
120    fn empty() {
121        let inspect = Inspect::from("");
122
123        assert_eq!(inspect.collect::<String>(), r#""""#);
124    }
125
126    #[test]
127    fn fred() {
128        let inspect = Inspect::from("fred");
129
130        assert_eq!(inspect.collect::<String>(), r#""fred""#);
131    }
132
133    #[test]
134    fn invalid_utf8_byte() {
135        let s = Utf8Str::new(b"\xFF");
136        let inspect = Inspect::from(s);
137
138        assert_eq!(inspect.collect::<String>(), r#""\xFF""#);
139    }
140
141    #[test]
142    fn invalid_utf8() {
143        let s = Utf8Str::new(b"invalid-\xFF-utf8");
144        let inspect = Inspect::from(s);
145
146        assert_eq!(inspect.collect::<String>(), r#""invalid-\xFF-utf8""#);
147    }
148
149    #[test]
150    fn quote_collect() {
151        let inspect = Inspect::from(r#"a"b"#);
152        assert_eq!(inspect.collect::<String>(), r#""a\"b""#);
153    }
154
155    #[test]
156    fn quote_iter() {
157        let mut inspect = Inspect::from(r#"a"b"#);
158
159        assert_eq!(inspect.next(), Some('"'));
160        assert_eq!(inspect.next(), Some('a'));
161        assert_eq!(inspect.next(), Some('\\'));
162        assert_eq!(inspect.next(), Some('"'));
163        assert_eq!(inspect.next(), Some('b'));
164        assert_eq!(inspect.next(), Some('"'));
165        assert_eq!(inspect.next(), None);
166    }
167
168    #[test]
169    fn emoji() {
170        let inspect = Inspect::from("💎");
171
172        assert_eq!(inspect.collect::<String>(), r#""💎""#);
173    }
174
175    #[test]
176    fn emoji_global() {
177        let inspect = Inspect::from("$💎");
178
179        assert_eq!(inspect.collect::<String>(), r#""$💎""#);
180    }
181
182    #[test]
183    fn emoji_ivar() {
184        let inspect = Inspect::from("@💎");
185
186        assert_eq!(inspect.collect::<String>(), r#""@💎""#);
187    }
188
189    #[test]
190    fn emoji_cvar() {
191        let inspect = Inspect::from("@@💎");
192
193        assert_eq!(inspect.collect::<String>(), r#""@@💎""#);
194    }
195
196    #[test]
197    fn unicode_replacement_char() {
198        let inspect = Inspect::from("�");
199
200        assert_eq!(inspect.collect::<String>(), r#""�""#);
201    }
202
203    #[test]
204    fn unicode_replacement_char_global() {
205        let inspect = Inspect::from("$�");
206
207        assert_eq!(inspect.collect::<String>(), r#""$�""#);
208    }
209
210    #[test]
211    fn unicode_replacement_char_ivar() {
212        let inspect = Inspect::from("@�");
213
214        assert_eq!(inspect.collect::<String>(), r#""@�""#);
215    }
216
217    #[test]
218    fn unicode_replacement_char_cvar() {
219        let inspect = Inspect::from("@@�");
220
221        assert_eq!(inspect.collect::<String>(), r#""@@�""#);
222    }
223
224    #[test]
225    fn escape_slash() {
226        let inspect = Inspect::from(r"\");
227
228        assert_eq!(inspect.collect::<String>(), r#""\\""#);
229    }
230
231    #[test]
232    fn escape_inner_slash() {
233        let inspect = Inspect::from(r"foo\bar");
234
235        assert_eq!(inspect.collect::<String>(), r#""foo\\bar""#);
236    }
237
238    #[test]
239    fn nul() {
240        let inspect = Inspect::from("\0");
241
242        assert_eq!(inspect.collect::<String>(), r#""\x00""#);
243    }
244
245    #[test]
246    fn del() {
247        let inspect = Inspect::from("\x7F");
248
249        assert_eq!(inspect.collect::<String>(), r#""\x7F""#);
250    }
251
252    #[test]
253    fn ascii_control() {
254        let test_cases = [
255            ["\x00", r#""\x00""#],
256            ["\x01", r#""\x01""#],
257            ["\x02", r#""\x02""#],
258            ["\x03", r#""\x03""#],
259            ["\x04", r#""\x04""#],
260            ["\x05", r#""\x05""#],
261            ["\x06", r#""\x06""#],
262            ["\x07", r#""\a""#],
263            ["\x08", r#""\b""#],
264            ["\x09", r#""\t""#],
265            ["\x0A", r#""\n""#],
266            ["\x0B", r#""\v""#],
267            ["\x0C", r#""\f""#],
268            ["\x0D", r#""\r""#],
269            ["\x0E", r#""\x0E""#],
270            ["\x0F", r#""\x0F""#],
271            ["\x10", r#""\x10""#],
272            ["\x11", r#""\x11""#],
273            ["\x12", r#""\x12""#],
274            ["\x13", r#""\x13""#],
275            ["\x14", r#""\x14""#],
276            ["\x15", r#""\x15""#],
277            ["\x16", r#""\x16""#],
278            ["\x17", r#""\x17""#],
279            ["\x18", r#""\x18""#],
280            ["\x19", r#""\x19""#],
281            ["\x1A", r#""\x1A""#],
282            ["\x1B", r#""\e""#],
283            ["\x1C", r#""\x1C""#],
284            ["\x1D", r#""\x1D""#],
285            ["\x1E", r#""\x1E""#],
286            ["\x1F", r#""\x1F""#],
287            ["\x20", r#"" ""#],
288        ];
289        for [s, r] in test_cases {
290            let inspect = Inspect::from(s);
291            assert_eq!(inspect.collect::<String>(), r, "For {s:?}, expected {r}");
292        }
293    }
294
295    #[test]
296    fn special_double_quote() {
297        let inspect = Inspect::from("\x22");
298
299        assert_eq!(inspect.collect::<String>(), r#""\"""#);
300
301        let inspect = Inspect::from("\"");
302
303        assert_eq!(inspect.collect::<String>(), r#""\"""#);
304    }
305
306    #[test]
307    fn special_backslash() {
308        let inspect = Inspect::from("\x5C");
309
310        assert_eq!(inspect.collect::<String>(), r#""\\""#);
311
312        let inspect = Inspect::from("\\");
313
314        assert_eq!(inspect.collect::<String>(), r#""\\""#);
315    }
316
317    #[test]
318    fn invalid_utf8_special_global() {
319        let s = b"$-\xFF";
320        let s = Utf8Str::from_bytes(s);
321        let inspect = Inspect::from(s);
322
323        assert_eq!(inspect.collect::<String>(), r#""$-\xFF""#);
324    }
325
326    #[test]
327    fn replacement_char_special_global() {
328        let inspect = Inspect::from("$-�");
329
330        assert_eq!(inspect.collect::<String>(), r#""$-�""#);
331
332        let inspect = Inspect::from("$-�a");
333
334        assert_eq!(inspect.collect::<String>(), r#""$-�a""#);
335
336        let inspect = Inspect::from("$-��");
337
338        assert_eq!(inspect.collect::<String>(), r#""$-��""#);
339    }
340}