1use core::iter::FusedIterator;
2
3use scolapasta_string_escape::{InvalidUtf8ByteSequence, ascii_char_with_escape};
4
5use super::{Utf8Str, Utf8String};
6use crate::inspect::Flags;
7
8#[derive(Debug, Clone)]
9#[must_use = "this `Inspect` is an `Iterator`, which should be consumed if constructed"]
10pub struct Inspect<'a> {
11 flags: Flags,
12 escaped_bytes: &'static [u8],
13 byte_literal: InvalidUtf8ByteSequence,
14 bytes: &'a [u8],
15}
16
17impl<'a> From<&'a Utf8String> for Inspect<'a> {
18 #[inline]
19 fn from(value: &'a Utf8String) -> Self {
20 Self::new(value.as_bytes())
21 }
22}
23
24impl<'a> From<&'a Utf8Str> for Inspect<'a> {
25 #[inline]
26 fn from(value: &'a Utf8Str) -> Self {
27 Self::new(value.as_bytes())
28 }
29}
30
31impl<'a> From<&'a str> for Inspect<'a> {
32 #[inline]
33 fn from(value: &'a str) -> Self {
34 Self::new(value.as_bytes())
35 }
36}
37
38impl<'a> Inspect<'a> {
39 #[inline]
43 fn new(bytes: &'a [u8]) -> Self {
44 Self {
45 flags: Flags::DEFAULT,
46 escaped_bytes: &[],
47 byte_literal: InvalidUtf8ByteSequence::new(),
48 bytes,
49 }
50 }
51}
52
53impl Default for Inspect<'_> {
54 #[inline]
59 fn default() -> Self {
60 Self::new(b"")
61 }
62}
63
64impl Iterator for Inspect<'_> {
65 type Item = char;
66
67 #[inline]
68 fn next(&mut self) -> Option<Self::Item> {
69 if let Some(ch) = self.flags.emit_leading_quote() {
70 return Some(ch);
71 }
72 if let Some((&head, tail)) = self.escaped_bytes.split_first() {
73 self.escaped_bytes = tail;
74 return Some(head.into());
75 }
76 if let Some(ch) = self.byte_literal.next() {
77 return Some(ch);
78 }
79 let (ch, size) = bstr::decode_utf8(self.bytes);
80 let (chunk, remainder) = unsafe { self.bytes.split_at_unchecked(size) };
82 self.bytes = remainder;
83
84 match ch.map(|ch| {
85 ascii_char_with_escape(ch)
86 .and_then(|esc| esc.as_bytes().split_first())
87 .ok_or(ch)
88 }) {
89 Some(Ok((&head, tail))) => {
90 self.escaped_bytes = tail;
91 return Some(head.into());
92 }
93 Some(Err(ch)) => {
94 return Some(ch);
95 }
96 None if size == 0 => {}
97 None => {
98 let invalid_utf8_bytes = chunk;
99 self.byte_literal = InvalidUtf8ByteSequence::try_from(invalid_utf8_bytes)
103 .expect("Invalid UTF-8 byte sequence should be at most 3 bytes long");
104 return self.byte_literal.next();
105 }
106 };
107 self.flags.emit_trailing_quote()
108 }
109}
110
111impl FusedIterator for Inspect<'_> {}
112
113#[cfg(test)]
114mod tests {
115 use alloc::string::String;
116
117 use super::{Inspect, Utf8Str};
118
119 #[test]
120 fn empty() {
121 let inspect = Inspect::from("");
122
123 assert_eq!(inspect.collect::<String>(), r#""""#);
124 }
125
126 #[test]
127 fn fred() {
128 let inspect = Inspect::from("fred");
129
130 assert_eq!(inspect.collect::<String>(), r#""fred""#);
131 }
132
133 #[test]
134 fn invalid_utf8_byte() {
135 let s = Utf8Str::new(b"\xFF");
136 let inspect = Inspect::from(s);
137
138 assert_eq!(inspect.collect::<String>(), r#""\xFF""#);
139 }
140
141 #[test]
142 fn invalid_utf8() {
143 let s = Utf8Str::new(b"invalid-\xFF-utf8");
144 let inspect = Inspect::from(s);
145
146 assert_eq!(inspect.collect::<String>(), r#""invalid-\xFF-utf8""#);
147 }
148
149 #[test]
150 fn quote_collect() {
151 let inspect = Inspect::from(r#"a"b"#);
152 assert_eq!(inspect.collect::<String>(), r#""a\"b""#);
153 }
154
155 #[test]
156 fn quote_iter() {
157 let mut inspect = Inspect::from(r#"a"b"#);
158
159 assert_eq!(inspect.next(), Some('"'));
160 assert_eq!(inspect.next(), Some('a'));
161 assert_eq!(inspect.next(), Some('\\'));
162 assert_eq!(inspect.next(), Some('"'));
163 assert_eq!(inspect.next(), Some('b'));
164 assert_eq!(inspect.next(), Some('"'));
165 assert_eq!(inspect.next(), None);
166 }
167
168 #[test]
169 fn emoji() {
170 let inspect = Inspect::from("💎");
171
172 assert_eq!(inspect.collect::<String>(), r#""💎""#);
173 }
174
175 #[test]
176 fn emoji_global() {
177 let inspect = Inspect::from("$💎");
178
179 assert_eq!(inspect.collect::<String>(), r#""$💎""#);
180 }
181
182 #[test]
183 fn emoji_ivar() {
184 let inspect = Inspect::from("@💎");
185
186 assert_eq!(inspect.collect::<String>(), r#""@💎""#);
187 }
188
189 #[test]
190 fn emoji_cvar() {
191 let inspect = Inspect::from("@@💎");
192
193 assert_eq!(inspect.collect::<String>(), r#""@@💎""#);
194 }
195
196 #[test]
197 fn unicode_replacement_char() {
198 let inspect = Inspect::from("�");
199
200 assert_eq!(inspect.collect::<String>(), r#""�""#);
201 }
202
203 #[test]
204 fn unicode_replacement_char_global() {
205 let inspect = Inspect::from("$�");
206
207 assert_eq!(inspect.collect::<String>(), r#""$�""#);
208 }
209
210 #[test]
211 fn unicode_replacement_char_ivar() {
212 let inspect = Inspect::from("@�");
213
214 assert_eq!(inspect.collect::<String>(), r#""@�""#);
215 }
216
217 #[test]
218 fn unicode_replacement_char_cvar() {
219 let inspect = Inspect::from("@@�");
220
221 assert_eq!(inspect.collect::<String>(), r#""@@�""#);
222 }
223
224 #[test]
225 fn escape_slash() {
226 let inspect = Inspect::from(r"\");
227
228 assert_eq!(inspect.collect::<String>(), r#""\\""#);
229 }
230
231 #[test]
232 fn escape_inner_slash() {
233 let inspect = Inspect::from(r"foo\bar");
234
235 assert_eq!(inspect.collect::<String>(), r#""foo\\bar""#);
236 }
237
238 #[test]
239 fn nul() {
240 let inspect = Inspect::from("\0");
241
242 assert_eq!(inspect.collect::<String>(), r#""\x00""#);
243 }
244
245 #[test]
246 fn del() {
247 let inspect = Inspect::from("\x7F");
248
249 assert_eq!(inspect.collect::<String>(), r#""\x7F""#);
250 }
251
252 #[test]
253 fn ascii_control() {
254 let test_cases = [
255 ["\x00", r#""\x00""#],
256 ["\x01", r#""\x01""#],
257 ["\x02", r#""\x02""#],
258 ["\x03", r#""\x03""#],
259 ["\x04", r#""\x04""#],
260 ["\x05", r#""\x05""#],
261 ["\x06", r#""\x06""#],
262 ["\x07", r#""\a""#],
263 ["\x08", r#""\b""#],
264 ["\x09", r#""\t""#],
265 ["\x0A", r#""\n""#],
266 ["\x0B", r#""\v""#],
267 ["\x0C", r#""\f""#],
268 ["\x0D", r#""\r""#],
269 ["\x0E", r#""\x0E""#],
270 ["\x0F", r#""\x0F""#],
271 ["\x10", r#""\x10""#],
272 ["\x11", r#""\x11""#],
273 ["\x12", r#""\x12""#],
274 ["\x13", r#""\x13""#],
275 ["\x14", r#""\x14""#],
276 ["\x15", r#""\x15""#],
277 ["\x16", r#""\x16""#],
278 ["\x17", r#""\x17""#],
279 ["\x18", r#""\x18""#],
280 ["\x19", r#""\x19""#],
281 ["\x1A", r#""\x1A""#],
282 ["\x1B", r#""\e""#],
283 ["\x1C", r#""\x1C""#],
284 ["\x1D", r#""\x1D""#],
285 ["\x1E", r#""\x1E""#],
286 ["\x1F", r#""\x1F""#],
287 ["\x20", r#"" ""#],
288 ];
289 for [s, r] in test_cases {
290 let inspect = Inspect::from(s);
291 assert_eq!(inspect.collect::<String>(), r, "For {s:?}, expected {r}");
292 }
293 }
294
295 #[test]
296 fn special_double_quote() {
297 let inspect = Inspect::from("\x22");
298
299 assert_eq!(inspect.collect::<String>(), r#""\"""#);
300
301 let inspect = Inspect::from("\"");
302
303 assert_eq!(inspect.collect::<String>(), r#""\"""#);
304 }
305
306 #[test]
307 fn special_backslash() {
308 let inspect = Inspect::from("\x5C");
309
310 assert_eq!(inspect.collect::<String>(), r#""\\""#);
311
312 let inspect = Inspect::from("\\");
313
314 assert_eq!(inspect.collect::<String>(), r#""\\""#);
315 }
316
317 #[test]
318 fn invalid_utf8_special_global() {
319 let s = b"$-\xFF";
320 let s = Utf8Str::from_bytes(s);
321 let inspect = Inspect::from(s);
322
323 assert_eq!(inspect.collect::<String>(), r#""$-\xFF""#);
324 }
325
326 #[test]
327 fn replacement_char_special_global() {
328 let inspect = Inspect::from("$-�");
329
330 assert_eq!(inspect.collect::<String>(), r#""$-�""#);
331
332 let inspect = Inspect::from("$-�a");
333
334 assert_eq!(inspect.collect::<String>(), r#""$-�a""#);
335
336 let inspect = Inspect::from("$-��");
337
338 assert_eq!(inspect.collect::<String>(), r#""$-��""#);
339 }
340}