1use core::fmt;
2use core::iter::FusedIterator;
3
4use scolapasta_string_escape::{InvalidUtf8ByteSequence, ascii_char_with_escape};
5
6use crate::ident::IdentifierType;
7
8#[derive(Default, Debug, Clone)]
57#[must_use = "this `Inspect` is an `Iterator`, which should be consumed if constructed"]
58#[cfg_attr(docsrs, doc(cfg(feature = "inspect")))]
59pub struct Inspect<'a>(State<'a>);
60
61impl<'a> From<&'a str> for Inspect<'a> {
62 #[inline]
63 fn from(value: &'a str) -> Self {
64 Self::from(value.as_bytes())
65 }
66}
67
68impl<'a> From<&'a [u8]> for Inspect<'a> {
69 #[inline]
70 fn from(value: &'a [u8]) -> Self {
71 match value {
72 [] => Self::default(),
73 value if IdentifierType::try_from(value).is_ok() => Self(State::ident(value)),
74 value => Self(State::quoted(value)),
75 }
76 }
77}
78
79impl Iterator for Inspect<'_> {
80 type Item = char;
81
82 fn next(&mut self) -> Option<Self::Item> {
83 self.0.next()
84 }
85}
86
87impl FusedIterator for Inspect<'_> {}
88
89impl Inspect<'_> {
90 #[inline]
124 pub fn format_into<W>(self, mut dest: W) -> fmt::Result
125 where
126 W: fmt::Write,
127 {
128 for ch in self {
129 dest.write_char(ch)?;
130 }
131 Ok(())
132 }
133
134 #[inline]
168 #[cfg(feature = "std")]
169 #[cfg_attr(docsrs, doc(cfg(feature = "std")))]
170 pub fn write_into<W>(self, mut dest: W) -> std::io::Result<()>
171 where
172 W: std::io::Write,
173 {
174 let mut buf = [0; 4];
175 for ch in self {
176 let utf8 = ch.encode_utf8(&mut buf);
177 dest.write_all(utf8.as_bytes())?;
178 }
179 Ok(())
180 }
181}
182
183#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
184struct Flags {
185 bits: u8,
186}
187
188impl Flags {
189 const IS_IDENT: Self = Self { bits: 0b1000_0000 };
191 const EMIT_LEADING_COLON: Self = Self { bits: 0b0000_1000 };
192 const EMIT_LEADING_QUOTE: Self = Self { bits: 0b0000_0001 };
193 const EMIT_TRAILING_QUOTE: Self = Self { bits: 0b0000_0010 };
194
195 const IDENT: Self = Self {
197 bits: Self::IS_IDENT.bits | Self::EMIT_LEADING_COLON.bits,
198 };
199 const QUOTED: Self = Self {
200 bits: Self::EMIT_LEADING_COLON.bits | Self::EMIT_LEADING_QUOTE.bits | Self::EMIT_TRAILING_QUOTE.bits,
201 };
202
203 #[inline]
204 fn emit_leading_colon(&mut self) -> Option<char> {
205 if (self.bits & Self::EMIT_LEADING_COLON.bits) == Self::EMIT_LEADING_COLON.bits {
206 self.bits &= !Self::EMIT_LEADING_COLON.bits;
207 Some(':')
208 } else {
209 None
210 }
211 }
212
213 #[inline]
214 fn emit_leading_quote(&mut self) -> Option<char> {
215 if (self.bits & Self::EMIT_LEADING_QUOTE.bits) == Self::EMIT_LEADING_QUOTE.bits {
216 self.bits &= !Self::EMIT_LEADING_QUOTE.bits;
217 Some('"')
218 } else {
219 None
220 }
221 }
222
223 #[inline]
224 fn emit_trailing_quote(&mut self) -> Option<char> {
225 if (self.bits & Self::EMIT_TRAILING_QUOTE.bits) == Self::EMIT_TRAILING_QUOTE.bits {
226 self.bits &= !Self::EMIT_TRAILING_QUOTE.bits;
227 Some('"')
228 } else {
229 None
230 }
231 }
232
233 #[inline]
234 const fn is_ident(self) -> bool {
235 (self.bits & Self::IS_IDENT.bits) == Self::IS_IDENT.bits
236 }
237}
238
239#[derive(Debug, Clone)]
240#[must_use = "this `State` is an `Iterator`, which should be consumed if constructed"]
241struct State<'a> {
242 flags: Flags,
243 escaped_bytes: &'static [u8],
244 forward_byte_literal: InvalidUtf8ByteSequence,
245 bytes: &'a [u8],
246}
247
248impl<'a> State<'a> {
249 #[inline]
254 fn ident(bytes: &'a [u8]) -> Self {
255 Self {
256 flags: Flags::IDENT,
257 escaped_bytes: &[],
258 forward_byte_literal: InvalidUtf8ByteSequence::new(),
259 bytes,
260 }
261 }
262
263 #[inline]
267 fn quoted(bytes: &'a [u8]) -> Self {
268 Self {
269 flags: Flags::QUOTED,
270 escaped_bytes: &[],
271 forward_byte_literal: InvalidUtf8ByteSequence::new(),
272 bytes,
273 }
274 }
275}
276
277impl Default for State<'_> {
278 #[inline]
282 fn default() -> Self {
283 Self::quoted(b"")
284 }
285}
286
287impl Iterator for State<'_> {
288 type Item = char;
289
290 #[inline]
291 fn next(&mut self) -> Option<Self::Item> {
292 if let Some(ch) = self.flags.emit_leading_colon() {
293 return Some(ch);
294 }
295 if let Some(ch) = self.flags.emit_leading_quote() {
296 return Some(ch);
297 }
298 if let Some((&head, tail)) = self.escaped_bytes.split_first() {
299 self.escaped_bytes = tail;
300 return Some(head.into());
301 }
302 if let Some(ch) = self.forward_byte_literal.next() {
303 return Some(ch);
304 }
305 let (ch, size) = bstr::decode_utf8(self.bytes);
306 let (chunk, remainder) = unsafe { self.bytes.split_at_unchecked(size) };
308 self.bytes = remainder;
309
310 match ch {
311 Some('"' | '\\') if self.flags.is_ident() => {
312 return ch;
313 }
314 Some(ch) => {
315 if let Some([head, tail @ ..]) = ascii_char_with_escape(ch).map(str::as_bytes) {
316 self.escaped_bytes = tail;
317 return Some(char::from(*head));
318 }
319 return Some(ch);
320 }
321 None if size == 0 => {}
322 None => {
323 self.forward_byte_literal = InvalidUtf8ByteSequence::try_from(chunk).unwrap();
327 return self.forward_byte_literal.next();
328 }
329 };
330 if let Some(ch) = self.flags.emit_trailing_quote() {
331 return Some(ch);
332 }
333 None
334 }
335}
336
337impl FusedIterator for State<'_> {}
338
339#[cfg(test)]
340mod tests {
341 use std::string::String;
342
343 use super::Inspect;
344
345 #[test]
346 fn empty() {
347 let inspect = Inspect::from("");
348 let debug = inspect.collect::<String>();
349 assert_eq!(debug, r#":"""#);
350 }
351
352 #[test]
353 fn fred() {
354 let inspect = Inspect::from("fred");
355 let debug = inspect.collect::<String>();
356 assert_eq!(debug, ":fred");
357 }
358
359 #[test]
360 fn invalid_utf8_byte() {
361 assert_eq!(Inspect::from(&b"\xFF"[..]).collect::<String>(), r#":"\xFF""#);
362 }
363
364 #[test]
365 fn invalid_utf8() {
366 let inspect = Inspect::from(&b"invalid-\xFF-utf8"[..]);
367 let debug = inspect.collect::<String>();
368 assert_eq!(debug, r#":"invalid-\xFF-utf8""#);
369 }
370
371 #[test]
372 fn quoted() {
373 let mut inspect = Inspect::from(r#"a"b"#);
374 assert_eq!(inspect.next(), Some(':'));
375 assert_eq!(inspect.next(), Some('"'));
376 assert_eq!(inspect.next(), Some('a'));
377 assert_eq!(inspect.next(), Some('\\'));
378 assert_eq!(inspect.next(), Some('"'));
379 assert_eq!(inspect.next(), Some('b'));
380 assert_eq!(inspect.next(), Some('"'));
381
382 assert_eq!(Inspect::from(r#"a"b"#).collect::<String>(), r#":"a\"b""#);
383 }
384
385 #[test]
386 fn emoji() {
387 assert_eq!(Inspect::from("💎").collect::<String>(), ":💎");
388 assert_eq!(Inspect::from("$💎").collect::<String>(), ":$💎");
389 assert_eq!(Inspect::from("@💎").collect::<String>(), ":@💎");
390 assert_eq!(Inspect::from("@@💎").collect::<String>(), ":@@💎");
391 }
392
393 #[test]
394 fn unicode_replacement_char() {
395 assert_eq!(Inspect::from("�").collect::<String>(), ":�");
396 assert_eq!(Inspect::from("$�").collect::<String>(), ":$�");
397 assert_eq!(Inspect::from("@�").collect::<String>(), ":@�");
398 assert_eq!(Inspect::from("@@�").collect::<String>(), ":@@�");
399
400 assert_eq!(Inspect::from("abc�").collect::<String>(), ":abc�");
401 assert_eq!(Inspect::from("$abc�").collect::<String>(), ":$abc�");
402 assert_eq!(Inspect::from("@abc�").collect::<String>(), ":@abc�");
403 assert_eq!(Inspect::from("@@abc�").collect::<String>(), ":@@abc�");
404 }
405
406 #[test]
407 fn escape_slash() {
408 assert_eq!(Inspect::from("\\").collect::<String>(), r#":"\\""#);
409 assert_eq!(Inspect::from("foo\\bar").collect::<String>(), r#":"foo\\bar""#);
410 }
411
412 #[test]
413 fn nul() {
414 assert_eq!(Inspect::from("\0").collect::<String>(), r#":"\x00""#);
415 }
416
417 #[test]
418 fn del() {
419 assert_eq!(Inspect::from("\x7F").collect::<String>(), r#":"\x7F""#);
420 }
421
422 #[test]
423 fn ascii_control() {
424 assert_eq!(Inspect::from("\0").collect::<String>(), r#":"\x00""#);
425 assert_eq!(Inspect::from("\x01").collect::<String>(), r#":"\x01""#);
426 assert_eq!(Inspect::from("\x02").collect::<String>(), r#":"\x02""#);
427 assert_eq!(Inspect::from("\x03").collect::<String>(), r#":"\x03""#);
428 assert_eq!(Inspect::from("\x04").collect::<String>(), r#":"\x04""#);
429 assert_eq!(Inspect::from("\x05").collect::<String>(), r#":"\x05""#);
430 assert_eq!(Inspect::from("\x06").collect::<String>(), r#":"\x06""#);
431 assert_eq!(Inspect::from("\x07").collect::<String>(), r#":"\a""#);
432 assert_eq!(Inspect::from("\x08").collect::<String>(), r#":"\b""#);
433 assert_eq!(Inspect::from("\x09").collect::<String>(), r#":"\t""#);
434 assert_eq!(Inspect::from("\x0A").collect::<String>(), r#":"\n""#);
435 assert_eq!(Inspect::from("\x0B").collect::<String>(), r#":"\v""#);
436 assert_eq!(Inspect::from("\x0C").collect::<String>(), r#":"\f""#);
437 assert_eq!(Inspect::from("\x0D").collect::<String>(), r#":"\r""#);
438 assert_eq!(Inspect::from("\x0E").collect::<String>(), r#":"\x0E""#);
439 assert_eq!(Inspect::from("\x0F").collect::<String>(), r#":"\x0F""#);
440 assert_eq!(Inspect::from("\x10").collect::<String>(), r#":"\x10""#);
441 assert_eq!(Inspect::from("\x11").collect::<String>(), r#":"\x11""#);
442 assert_eq!(Inspect::from("\x12").collect::<String>(), r#":"\x12""#);
443 assert_eq!(Inspect::from("\x13").collect::<String>(), r#":"\x13""#);
444 assert_eq!(Inspect::from("\x14").collect::<String>(), r#":"\x14""#);
445 assert_eq!(Inspect::from("\x15").collect::<String>(), r#":"\x15""#);
446 assert_eq!(Inspect::from("\x16").collect::<String>(), r#":"\x16""#);
447 assert_eq!(Inspect::from("\x17").collect::<String>(), r#":"\x17""#);
448 assert_eq!(Inspect::from("\x18").collect::<String>(), r#":"\x18""#);
449 assert_eq!(Inspect::from("\x19").collect::<String>(), r#":"\x19""#);
450 assert_eq!(Inspect::from("\x1A").collect::<String>(), r#":"\x1A""#);
451 assert_eq!(Inspect::from("\x1B").collect::<String>(), r#":"\e""#);
452 assert_eq!(Inspect::from("\x1C").collect::<String>(), r#":"\x1C""#);
453 assert_eq!(Inspect::from("\x1D").collect::<String>(), r#":"\x1D""#);
454 assert_eq!(Inspect::from("\x1E").collect::<String>(), r#":"\x1E""#);
455 assert_eq!(Inspect::from("\x1F").collect::<String>(), r#":"\x1F""#);
456 assert_eq!(Inspect::from("\x20").collect::<String>(), r#":" ""#);
457 }
458
459 #[test]
460 fn special_escapes() {
461 assert_eq!(Inspect::from("\x22").collect::<String>(), r#":"\"""#);
463 assert_eq!(Inspect::from("\"").collect::<String>(), r#":"\"""#);
464 assert_eq!(Inspect::from("\x5C").collect::<String>(), r#":"\\""#);
466 assert_eq!(Inspect::from("\\").collect::<String>(), r#":"\\""#);
467 }
468
469 #[test]
470 fn invalid_utf8_special_global() {
471 assert_eq!(Inspect::from(&b"$-\xFF"[..]).collect::<String>(), r#":"$-\xFF""#);
472 }
473
474 #[test]
475 fn replacement_char_special_global() {
476 assert_eq!(Inspect::from("$-�").collect::<String>(), ":$-�");
477 assert_eq!(Inspect::from("$-�a").collect::<String>(), r#":"$-�a""#);
478 assert_eq!(Inspect::from("$-��").collect::<String>(), r#":"$-��""#);
479 }
480}
481
482#[cfg(test)]
483mod specs {
484 use std::string::String;
485
486 use super::{Flags, Inspect};
487
488 #[test]
489 fn flags_ident() {
490 let mut flags = Flags::IDENT;
491 assert!(flags.is_ident());
492 assert_eq!(flags.emit_leading_colon(), Some(':'));
493 assert!(flags.is_ident());
494 assert_eq!(flags.emit_leading_colon(), None);
495 assert!(flags.is_ident());
496
497 assert_eq!(flags.emit_leading_quote(), None);
498 assert!(flags.is_ident());
499
500 assert_eq!(flags.emit_trailing_quote(), None);
501 assert!(flags.is_ident());
502 }
503
504 #[test]
505 fn flags_quoted() {
506 let mut flags = Flags::QUOTED;
507 assert!(!flags.is_ident());
508 assert_eq!(flags.emit_leading_colon(), Some(':'));
509 assert!(!flags.is_ident());
510 assert_eq!(flags.emit_leading_colon(), None);
511 assert!(!flags.is_ident());
512
513 assert_eq!(flags.emit_leading_quote(), Some('"'));
514 assert!(!flags.is_ident());
515 assert_eq!(flags.emit_leading_quote(), None);
516 assert!(!flags.is_ident());
517
518 assert_eq!(flags.emit_trailing_quote(), Some('"'));
519 assert!(!flags.is_ident());
520 assert_eq!(flags.emit_trailing_quote(), None);
521 assert!(!flags.is_ident());
522 }
523
524 #[test]
624 fn specs() {
625 assert_eq!(Inspect::from("fred").collect::<String>(), ":fred");
627 assert_eq!(Inspect::from("fred?").collect::<String>(), ":fred?");
628 assert_eq!(Inspect::from("fred!").collect::<String>(), ":fred!");
629 assert_eq!(Inspect::from("$ruby").collect::<String>(), ":$ruby");
630 assert_eq!(Inspect::from("@ruby").collect::<String>(), ":@ruby");
631 assert_eq!(Inspect::from("@@ruby").collect::<String>(), ":@@ruby");
632
633 assert_eq!(Inspect::from("$ruby!").collect::<String>(), r#":"$ruby!""#);
635 assert_eq!(Inspect::from("$ruby?").collect::<String>(), r#":"$ruby?""#);
636 assert_eq!(Inspect::from("@ruby!").collect::<String>(), r#":"@ruby!""#);
637 assert_eq!(Inspect::from("@ruby?").collect::<String>(), r#":"@ruby?""#);
638 assert_eq!(Inspect::from("@@ruby!").collect::<String>(), r#":"@@ruby!""#);
639 assert_eq!(Inspect::from("@@ruby?").collect::<String>(), r#":"@@ruby?""#);
640
641 assert_eq!(Inspect::from("$-w").collect::<String>(), ":$-w");
643 assert_eq!(Inspect::from("$-ww").collect::<String>(), r#":"$-ww""#);
644 assert_eq!(Inspect::from("$+").collect::<String>(), ":$+");
645 assert_eq!(Inspect::from("$~").collect::<String>(), ":$~");
646 assert_eq!(Inspect::from("$:").collect::<String>(), ":$:");
647 assert_eq!(Inspect::from("$?").collect::<String>(), ":$?");
648 assert_eq!(Inspect::from("$<").collect::<String>(), ":$<");
649 assert_eq!(Inspect::from("$_").collect::<String>(), ":$_");
650 assert_eq!(Inspect::from("$/").collect::<String>(), ":$/");
651 assert_eq!(Inspect::from("$\"").collect::<String>(), ":$\"");
652 assert_eq!(Inspect::from("$$").collect::<String>(), ":$$");
653 assert_eq!(Inspect::from("$.").collect::<String>(), ":$.");
654 assert_eq!(Inspect::from("$,").collect::<String>(), ":$,");
655 assert_eq!(Inspect::from("$`").collect::<String>(), ":$`");
656 assert_eq!(Inspect::from("$!").collect::<String>(), ":$!");
657 assert_eq!(Inspect::from("$;").collect::<String>(), ":$;");
658 assert_eq!(Inspect::from("$\\").collect::<String>(), ":$\\");
659 assert_eq!(Inspect::from("$=").collect::<String>(), ":$=");
660 assert_eq!(Inspect::from("$*").collect::<String>(), ":$*");
661 assert_eq!(Inspect::from("$>").collect::<String>(), ":$>");
662 assert_eq!(Inspect::from("$&").collect::<String>(), ":$&");
663 assert_eq!(Inspect::from("$@").collect::<String>(), ":$@");
664 assert_eq!(Inspect::from("$1234").collect::<String>(), ":$1234");
665
666 assert_eq!(Inspect::from("-@").collect::<String>(), ":-@");
668 assert_eq!(Inspect::from("+@").collect::<String>(), ":+@");
669 assert_eq!(Inspect::from("%").collect::<String>(), ":%");
670 assert_eq!(Inspect::from("&").collect::<String>(), ":&");
671 assert_eq!(Inspect::from("*").collect::<String>(), ":*");
672 assert_eq!(Inspect::from("**").collect::<String>(), ":**");
673 assert_eq!(Inspect::from("/").collect::<String>(), ":/");
674 assert_eq!(Inspect::from("<").collect::<String>(), ":<");
675 assert_eq!(Inspect::from("<=").collect::<String>(), ":<=");
676 assert_eq!(Inspect::from("<=>").collect::<String>(), ":<=>");
677 assert_eq!(Inspect::from("==").collect::<String>(), ":==");
678 assert_eq!(Inspect::from("===").collect::<String>(), ":===");
679 assert_eq!(Inspect::from("=~").collect::<String>(), ":=~");
680 assert_eq!(Inspect::from(">").collect::<String>(), ":>");
681 assert_eq!(Inspect::from(">=").collect::<String>(), ":>=");
682 assert_eq!(Inspect::from(">>").collect::<String>(), ":>>");
683 assert_eq!(Inspect::from("[]").collect::<String>(), ":[]");
684 assert_eq!(Inspect::from("[]=").collect::<String>(), ":[]=");
685 assert_eq!(Inspect::from("<<").collect::<String>(), ":<<");
686 assert_eq!(Inspect::from("^").collect::<String>(), ":^");
687 assert_eq!(Inspect::from("`").collect::<String>(), ":`");
688 assert_eq!(Inspect::from("~").collect::<String>(), ":~");
689 assert_eq!(Inspect::from("|").collect::<String>(), ":|");
690
691 assert_eq!(Inspect::from("!").collect::<String>(), ":!");
693 assert_eq!(Inspect::from("!=").collect::<String>(), ":!=");
694 assert_eq!(Inspect::from("!~").collect::<String>(), ":!~");
695 assert_eq!(Inspect::from("$").collect::<String>(), r#":"$""#);
696 assert_eq!(Inspect::from("&&").collect::<String>(), r#":"&&""#);
697 assert_eq!(Inspect::from("'").collect::<String>(), r#":"'""#);
698 assert_eq!(Inspect::from(",").collect::<String>(), r#":",""#);
699 assert_eq!(Inspect::from(".").collect::<String>(), r#":".""#);
700 assert_eq!(Inspect::from("..").collect::<String>(), r#":"..""#);
701 assert_eq!(Inspect::from("...").collect::<String>(), r#":"...""#);
702 assert_eq!(Inspect::from(":").collect::<String>(), r#":":""#);
703 assert_eq!(Inspect::from("::").collect::<String>(), r#":"::""#);
704 assert_eq!(Inspect::from(";").collect::<String>(), r#":";""#);
705 assert_eq!(Inspect::from("=").collect::<String>(), r#":"=""#);
706 assert_eq!(Inspect::from("=>").collect::<String>(), r#":"=>""#);
707 assert_eq!(Inspect::from("?").collect::<String>(), r#":"?""#);
708 assert_eq!(Inspect::from("@").collect::<String>(), r#":"@""#);
709 assert_eq!(Inspect::from("||").collect::<String>(), r#":"||""#);
710 assert_eq!(Inspect::from("|||").collect::<String>(), r#":"|||""#);
711 assert_eq!(Inspect::from("++").collect::<String>(), r#":"++""#);
712
713 assert_eq!(Inspect::from(r#"""#).collect::<String>(), r#":"\"""#);
715 assert_eq!(Inspect::from(r#""""#).collect::<String>(), r#":"\"\"""#);
716
717 assert_eq!(Inspect::from("9").collect::<String>(), r#":"9""#);
718 assert_eq!(Inspect::from("foo bar").collect::<String>(), r#":"foo bar""#);
719 assert_eq!(Inspect::from("*foo").collect::<String>(), r#":"*foo""#);
720 assert_eq!(Inspect::from("foo ").collect::<String>(), r#":"foo ""#);
721 assert_eq!(Inspect::from(" foo").collect::<String>(), r#":" foo""#);
722 assert_eq!(Inspect::from(" ").collect::<String>(), r#":" ""#);
723 }
724}
725
726#[cfg(test)]
745mod functionals {
746 use std::string::String;
747
748 use super::Inspect;
749 use crate::fixtures::{IDENT_INSPECTS, IDENTS};
750
751 #[test]
752 fn mri_symbol_idents() {
753 let pairs = IDENTS.iter().copied().zip(IDENT_INSPECTS.iter().copied());
754 for (sym, expected) in pairs {
755 let inspect = Inspect::from(sym).collect::<String>();
756 assert_eq!(
757 inspect, expected,
758 "Expected '{expected}', to be the result of '{sym}'.inspect; got '{inspect}'",
759 );
760 }
761 }
762}