use core::fmt;
use core::iter::FusedIterator;
use scolapasta_string_escape::{ascii_char_with_escape, InvalidUtf8ByteSequence};
use crate::ident::IdentifierType;
#[derive(Default, Debug, Clone)]
#[must_use = "this `Inspect` is an `Iterator`, which should be consumed if constructed"]
#[cfg_attr(docsrs, doc(cfg(feature = "inspect")))]
pub struct Inspect<'a>(State<'a>);
impl<'a> From<&'a str> for Inspect<'a> {
#[inline]
fn from(value: &'a str) -> Self {
Self::from(value.as_bytes())
}
}
impl<'a> From<&'a [u8]> for Inspect<'a> {
#[inline]
fn from(value: &'a [u8]) -> Self {
match value {
[] => Self::default(),
value if IdentifierType::try_from(value).is_ok() => Self(State::ident(value)),
value => Self(State::quoted(value)),
}
}
}
impl Iterator for Inspect<'_> {
type Item = char;
fn next(&mut self) -> Option<Self::Item> {
self.0.next()
}
}
impl FusedIterator for Inspect<'_> {}
impl Inspect<'_> {
#[inline]
pub fn format_into<W>(self, mut dest: W) -> fmt::Result
where
W: fmt::Write,
{
for ch in self {
dest.write_char(ch)?;
}
Ok(())
}
#[inline]
#[cfg(feature = "std")]
#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
pub fn write_into<W>(self, mut dest: W) -> std::io::Result<()>
where
W: std::io::Write,
{
let mut buf = [0; 4];
for ch in self {
let utf8 = ch.encode_utf8(&mut buf);
dest.write_all(utf8.as_bytes())?;
}
Ok(())
}
}
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
struct Flags {
bits: u8,
}
impl Flags {
const IS_IDENT: Self = Self { bits: 0b1000_0000 };
const EMIT_LEADING_COLON: Self = Self { bits: 0b0000_1000 };
const EMIT_LEADING_QUOTE: Self = Self { bits: 0b0000_0001 };
const EMIT_TRAILING_QUOTE: Self = Self { bits: 0b0000_0010 };
const IDENT: Self = Self {
bits: Self::IS_IDENT.bits | Self::EMIT_LEADING_COLON.bits,
};
const QUOTED: Self = Self {
bits: Self::EMIT_LEADING_COLON.bits | Self::EMIT_LEADING_QUOTE.bits | Self::EMIT_TRAILING_QUOTE.bits,
};
#[inline]
fn emit_leading_colon(&mut self) -> Option<char> {
if (self.bits & Self::EMIT_LEADING_COLON.bits) == Self::EMIT_LEADING_COLON.bits {
self.bits &= !Self::EMIT_LEADING_COLON.bits;
Some(':')
} else {
None
}
}
#[inline]
fn emit_leading_quote(&mut self) -> Option<char> {
if (self.bits & Self::EMIT_LEADING_QUOTE.bits) == Self::EMIT_LEADING_QUOTE.bits {
self.bits &= !Self::EMIT_LEADING_QUOTE.bits;
Some('"')
} else {
None
}
}
#[inline]
fn emit_trailing_quote(&mut self) -> Option<char> {
if (self.bits & Self::EMIT_TRAILING_QUOTE.bits) == Self::EMIT_TRAILING_QUOTE.bits {
self.bits &= !Self::EMIT_TRAILING_QUOTE.bits;
Some('"')
} else {
None
}
}
#[inline]
const fn is_ident(self) -> bool {
(self.bits & Self::IS_IDENT.bits) == Self::IS_IDENT.bits
}
}
#[derive(Debug, Clone)]
#[must_use = "this `State` is an `Iterator`, which should be consumed if constructed"]
struct State<'a> {
flags: Flags,
escaped_bytes: &'static [u8],
forward_byte_literal: InvalidUtf8ByteSequence,
bytes: &'a [u8],
}
impl<'a> State<'a> {
#[inline]
fn ident(bytes: &'a [u8]) -> Self {
Self {
flags: Flags::IDENT,
escaped_bytes: &[],
forward_byte_literal: InvalidUtf8ByteSequence::new(),
bytes,
}
}
#[inline]
fn quoted(bytes: &'a [u8]) -> Self {
Self {
flags: Flags::QUOTED,
escaped_bytes: &[],
forward_byte_literal: InvalidUtf8ByteSequence::new(),
bytes,
}
}
}
impl Default for State<'_> {
#[inline]
fn default() -> Self {
Self::quoted(b"")
}
}
impl Iterator for State<'_> {
type Item = char;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
if let Some(ch) = self.flags.emit_leading_colon() {
return Some(ch);
}
if let Some(ch) = self.flags.emit_leading_quote() {
return Some(ch);
}
if let Some((&head, tail)) = self.escaped_bytes.split_first() {
self.escaped_bytes = tail;
return Some(head.into());
}
if let Some(ch) = self.forward_byte_literal.next() {
return Some(ch);
}
let (ch, size) = bstr::decode_utf8(self.bytes);
let (chunk, remainder) = unsafe { self.bytes.split_at_unchecked(size) };
self.bytes = remainder;
match ch {
Some('"' | '\\') if self.flags.is_ident() => {
return ch;
}
Some(ch) => {
if let Some([head, tail @ ..]) = ascii_char_with_escape(ch).map(str::as_bytes) {
self.escaped_bytes = tail;
return Some(char::from(*head));
}
return Some(ch);
}
None if size == 0 => {}
None => {
self.forward_byte_literal = InvalidUtf8ByteSequence::try_from(chunk).unwrap();
return self.forward_byte_literal.next();
}
};
if let Some(ch) = self.flags.emit_trailing_quote() {
return Some(ch);
}
None
}
}
impl FusedIterator for State<'_> {}
#[cfg(test)]
mod tests {
use std::string::String;
use super::Inspect;
#[test]
fn empty() {
let inspect = Inspect::from("");
let debug = inspect.collect::<String>();
assert_eq!(debug, r#":"""#);
}
#[test]
fn fred() {
let inspect = Inspect::from("fred");
let debug = inspect.collect::<String>();
assert_eq!(debug, ":fred");
}
#[test]
fn invalid_utf8_byte() {
assert_eq!(Inspect::from(&b"\xFF"[..]).collect::<String>(), r#":"\xFF""#);
}
#[test]
fn invalid_utf8() {
let inspect = Inspect::from(&b"invalid-\xFF-utf8"[..]);
let debug = inspect.collect::<String>();
assert_eq!(debug, r#":"invalid-\xFF-utf8""#);
}
#[test]
fn quoted() {
let mut inspect = Inspect::from(r#"a"b"#);
assert_eq!(inspect.next(), Some(':'));
assert_eq!(inspect.next(), Some('"'));
assert_eq!(inspect.next(), Some('a'));
assert_eq!(inspect.next(), Some('\\'));
assert_eq!(inspect.next(), Some('"'));
assert_eq!(inspect.next(), Some('b'));
assert_eq!(inspect.next(), Some('"'));
assert_eq!(Inspect::from(r#"a"b"#).collect::<String>(), r#":"a\"b""#);
}
#[test]
fn emoji() {
assert_eq!(Inspect::from("💎").collect::<String>(), ":💎");
assert_eq!(Inspect::from("$💎").collect::<String>(), ":$💎");
assert_eq!(Inspect::from("@💎").collect::<String>(), ":@💎");
assert_eq!(Inspect::from("@@💎").collect::<String>(), ":@@💎");
}
#[test]
fn unicode_replacement_char() {
assert_eq!(Inspect::from("�").collect::<String>(), ":�");
assert_eq!(Inspect::from("$�").collect::<String>(), ":$�");
assert_eq!(Inspect::from("@�").collect::<String>(), ":@�");
assert_eq!(Inspect::from("@@�").collect::<String>(), ":@@�");
assert_eq!(Inspect::from("abc�").collect::<String>(), ":abc�");
assert_eq!(Inspect::from("$abc�").collect::<String>(), ":$abc�");
assert_eq!(Inspect::from("@abc�").collect::<String>(), ":@abc�");
assert_eq!(Inspect::from("@@abc�").collect::<String>(), ":@@abc�");
}
#[test]
fn escape_slash() {
assert_eq!(Inspect::from("\\").collect::<String>(), r#":"\\""#);
assert_eq!(Inspect::from("foo\\bar").collect::<String>(), r#":"foo\\bar""#);
}
#[test]
fn nul() {
assert_eq!(Inspect::from("\0").collect::<String>(), r#":"\x00""#);
}
#[test]
fn del() {
assert_eq!(Inspect::from("\x7F").collect::<String>(), r#":"\x7F""#);
}
#[test]
fn ascii_control() {
assert_eq!(Inspect::from("\0").collect::<String>(), r#":"\x00""#);
assert_eq!(Inspect::from("\x01").collect::<String>(), r#":"\x01""#);
assert_eq!(Inspect::from("\x02").collect::<String>(), r#":"\x02""#);
assert_eq!(Inspect::from("\x03").collect::<String>(), r#":"\x03""#);
assert_eq!(Inspect::from("\x04").collect::<String>(), r#":"\x04""#);
assert_eq!(Inspect::from("\x05").collect::<String>(), r#":"\x05""#);
assert_eq!(Inspect::from("\x06").collect::<String>(), r#":"\x06""#);
assert_eq!(Inspect::from("\x07").collect::<String>(), r#":"\a""#);
assert_eq!(Inspect::from("\x08").collect::<String>(), r#":"\b""#);
assert_eq!(Inspect::from("\x09").collect::<String>(), r#":"\t""#);
assert_eq!(Inspect::from("\x0A").collect::<String>(), r#":"\n""#);
assert_eq!(Inspect::from("\x0B").collect::<String>(), r#":"\v""#);
assert_eq!(Inspect::from("\x0C").collect::<String>(), r#":"\f""#);
assert_eq!(Inspect::from("\x0D").collect::<String>(), r#":"\r""#);
assert_eq!(Inspect::from("\x0E").collect::<String>(), r#":"\x0E""#);
assert_eq!(Inspect::from("\x0F").collect::<String>(), r#":"\x0F""#);
assert_eq!(Inspect::from("\x10").collect::<String>(), r#":"\x10""#);
assert_eq!(Inspect::from("\x11").collect::<String>(), r#":"\x11""#);
assert_eq!(Inspect::from("\x12").collect::<String>(), r#":"\x12""#);
assert_eq!(Inspect::from("\x13").collect::<String>(), r#":"\x13""#);
assert_eq!(Inspect::from("\x14").collect::<String>(), r#":"\x14""#);
assert_eq!(Inspect::from("\x15").collect::<String>(), r#":"\x15""#);
assert_eq!(Inspect::from("\x16").collect::<String>(), r#":"\x16""#);
assert_eq!(Inspect::from("\x17").collect::<String>(), r#":"\x17""#);
assert_eq!(Inspect::from("\x18").collect::<String>(), r#":"\x18""#);
assert_eq!(Inspect::from("\x19").collect::<String>(), r#":"\x19""#);
assert_eq!(Inspect::from("\x1A").collect::<String>(), r#":"\x1A""#);
assert_eq!(Inspect::from("\x1B").collect::<String>(), r#":"\e""#);
assert_eq!(Inspect::from("\x1C").collect::<String>(), r#":"\x1C""#);
assert_eq!(Inspect::from("\x1D").collect::<String>(), r#":"\x1D""#);
assert_eq!(Inspect::from("\x1E").collect::<String>(), r#":"\x1E""#);
assert_eq!(Inspect::from("\x1F").collect::<String>(), r#":"\x1F""#);
assert_eq!(Inspect::from("\x20").collect::<String>(), r#":" ""#);
}
#[test]
fn special_escapes() {
assert_eq!(Inspect::from("\x22").collect::<String>(), r#":"\"""#);
assert_eq!(Inspect::from("\"").collect::<String>(), r#":"\"""#);
assert_eq!(Inspect::from("\x5C").collect::<String>(), r#":"\\""#);
assert_eq!(Inspect::from("\\").collect::<String>(), r#":"\\""#);
}
#[test]
fn invalid_utf8_special_global() {
assert_eq!(Inspect::from(&b"$-\xFF"[..]).collect::<String>(), r#":"$-\xFF""#);
}
#[test]
fn replacement_char_special_global() {
assert_eq!(Inspect::from("$-�").collect::<String>(), ":$-�");
assert_eq!(Inspect::from("$-�a").collect::<String>(), r#":"$-�a""#);
assert_eq!(Inspect::from("$-��").collect::<String>(), r#":"$-��""#);
}
}
#[cfg(test)]
mod specs {
use std::string::String;
use super::{Flags, Inspect};
#[test]
fn flags_ident() {
let mut flags = Flags::IDENT;
assert!(flags.is_ident());
assert_eq!(flags.emit_leading_colon(), Some(':'));
assert!(flags.is_ident());
assert_eq!(flags.emit_leading_colon(), None);
assert!(flags.is_ident());
assert_eq!(flags.emit_leading_quote(), None);
assert!(flags.is_ident());
assert_eq!(flags.emit_trailing_quote(), None);
assert!(flags.is_ident());
}
#[test]
fn flags_quoted() {
let mut flags = Flags::QUOTED;
assert!(!flags.is_ident());
assert_eq!(flags.emit_leading_colon(), Some(':'));
assert!(!flags.is_ident());
assert_eq!(flags.emit_leading_colon(), None);
assert!(!flags.is_ident());
assert_eq!(flags.emit_leading_quote(), Some('"'));
assert!(!flags.is_ident());
assert_eq!(flags.emit_leading_quote(), None);
assert!(!flags.is_ident());
assert_eq!(flags.emit_trailing_quote(), Some('"'));
assert!(!flags.is_ident());
assert_eq!(flags.emit_trailing_quote(), None);
assert!(!flags.is_ident());
}
#[test]
fn specs() {
assert_eq!(Inspect::from("fred").collect::<String>(), ":fred");
assert_eq!(Inspect::from("fred?").collect::<String>(), ":fred?");
assert_eq!(Inspect::from("fred!").collect::<String>(), ":fred!");
assert_eq!(Inspect::from("$ruby").collect::<String>(), ":$ruby");
assert_eq!(Inspect::from("@ruby").collect::<String>(), ":@ruby");
assert_eq!(Inspect::from("@@ruby").collect::<String>(), ":@@ruby");
assert_eq!(Inspect::from("$ruby!").collect::<String>(), r#":"$ruby!""#);
assert_eq!(Inspect::from("$ruby?").collect::<String>(), r#":"$ruby?""#);
assert_eq!(Inspect::from("@ruby!").collect::<String>(), r#":"@ruby!""#);
assert_eq!(Inspect::from("@ruby?").collect::<String>(), r#":"@ruby?""#);
assert_eq!(Inspect::from("@@ruby!").collect::<String>(), r#":"@@ruby!""#);
assert_eq!(Inspect::from("@@ruby?").collect::<String>(), r#":"@@ruby?""#);
assert_eq!(Inspect::from("$-w").collect::<String>(), ":$-w");
assert_eq!(Inspect::from("$-ww").collect::<String>(), r#":"$-ww""#);
assert_eq!(Inspect::from("$+").collect::<String>(), ":$+");
assert_eq!(Inspect::from("$~").collect::<String>(), ":$~");
assert_eq!(Inspect::from("$:").collect::<String>(), ":$:");
assert_eq!(Inspect::from("$?").collect::<String>(), ":$?");
assert_eq!(Inspect::from("$<").collect::<String>(), ":$<");
assert_eq!(Inspect::from("$_").collect::<String>(), ":$_");
assert_eq!(Inspect::from("$/").collect::<String>(), ":$/");
assert_eq!(Inspect::from("$\"").collect::<String>(), ":$\"");
assert_eq!(Inspect::from("$$").collect::<String>(), ":$$");
assert_eq!(Inspect::from("$.").collect::<String>(), ":$.");
assert_eq!(Inspect::from("$,").collect::<String>(), ":$,");
assert_eq!(Inspect::from("$`").collect::<String>(), ":$`");
assert_eq!(Inspect::from("$!").collect::<String>(), ":$!");
assert_eq!(Inspect::from("$;").collect::<String>(), ":$;");
assert_eq!(Inspect::from("$\\").collect::<String>(), ":$\\");
assert_eq!(Inspect::from("$=").collect::<String>(), ":$=");
assert_eq!(Inspect::from("$*").collect::<String>(), ":$*");
assert_eq!(Inspect::from("$>").collect::<String>(), ":$>");
assert_eq!(Inspect::from("$&").collect::<String>(), ":$&");
assert_eq!(Inspect::from("$@").collect::<String>(), ":$@");
assert_eq!(Inspect::from("$1234").collect::<String>(), ":$1234");
assert_eq!(Inspect::from("-@").collect::<String>(), ":-@");
assert_eq!(Inspect::from("+@").collect::<String>(), ":+@");
assert_eq!(Inspect::from("%").collect::<String>(), ":%");
assert_eq!(Inspect::from("&").collect::<String>(), ":&");
assert_eq!(Inspect::from("*").collect::<String>(), ":*");
assert_eq!(Inspect::from("**").collect::<String>(), ":**");
assert_eq!(Inspect::from("/").collect::<String>(), ":/");
assert_eq!(Inspect::from("<").collect::<String>(), ":<");
assert_eq!(Inspect::from("<=").collect::<String>(), ":<=");
assert_eq!(Inspect::from("<=>").collect::<String>(), ":<=>");
assert_eq!(Inspect::from("==").collect::<String>(), ":==");
assert_eq!(Inspect::from("===").collect::<String>(), ":===");
assert_eq!(Inspect::from("=~").collect::<String>(), ":=~");
assert_eq!(Inspect::from(">").collect::<String>(), ":>");
assert_eq!(Inspect::from(">=").collect::<String>(), ":>=");
assert_eq!(Inspect::from(">>").collect::<String>(), ":>>");
assert_eq!(Inspect::from("[]").collect::<String>(), ":[]");
assert_eq!(Inspect::from("[]=").collect::<String>(), ":[]=");
assert_eq!(Inspect::from("<<").collect::<String>(), ":<<");
assert_eq!(Inspect::from("^").collect::<String>(), ":^");
assert_eq!(Inspect::from("`").collect::<String>(), ":`");
assert_eq!(Inspect::from("~").collect::<String>(), ":~");
assert_eq!(Inspect::from("|").collect::<String>(), ":|");
assert_eq!(Inspect::from("!").collect::<String>(), ":!");
assert_eq!(Inspect::from("!=").collect::<String>(), ":!=");
assert_eq!(Inspect::from("!~").collect::<String>(), ":!~");
assert_eq!(Inspect::from("$").collect::<String>(), r#":"$""#);
assert_eq!(Inspect::from("&&").collect::<String>(), r#":"&&""#);
assert_eq!(Inspect::from("'").collect::<String>(), r#":"'""#);
assert_eq!(Inspect::from(",").collect::<String>(), r#":",""#);
assert_eq!(Inspect::from(".").collect::<String>(), r#":".""#);
assert_eq!(Inspect::from("..").collect::<String>(), r#":"..""#);
assert_eq!(Inspect::from("...").collect::<String>(), r#":"...""#);
assert_eq!(Inspect::from(":").collect::<String>(), r#":":""#);
assert_eq!(Inspect::from("::").collect::<String>(), r#":"::""#);
assert_eq!(Inspect::from(";").collect::<String>(), r#":";""#);
assert_eq!(Inspect::from("=").collect::<String>(), r#":"=""#);
assert_eq!(Inspect::from("=>").collect::<String>(), r#":"=>""#);
assert_eq!(Inspect::from("?").collect::<String>(), r#":"?""#);
assert_eq!(Inspect::from("@").collect::<String>(), r#":"@""#);
assert_eq!(Inspect::from("||").collect::<String>(), r#":"||""#);
assert_eq!(Inspect::from("|||").collect::<String>(), r#":"|||""#);
assert_eq!(Inspect::from("++").collect::<String>(), r#":"++""#);
assert_eq!(Inspect::from(r#"""#).collect::<String>(), r#":"\"""#);
assert_eq!(Inspect::from(r#""""#).collect::<String>(), r#":"\"\"""#);
assert_eq!(Inspect::from("9").collect::<String>(), r#":"9""#);
assert_eq!(Inspect::from("foo bar").collect::<String>(), r#":"foo bar""#);
assert_eq!(Inspect::from("*foo").collect::<String>(), r#":"*foo""#);
assert_eq!(Inspect::from("foo ").collect::<String>(), r#":"foo ""#);
assert_eq!(Inspect::from(" foo").collect::<String>(), r#":" foo""#);
assert_eq!(Inspect::from(" ").collect::<String>(), r#":" ""#);
}
}
#[cfg(test)]
mod functionals {
use std::string::String;
use super::Inspect;
use crate::fixtures::{IDENTS, IDENT_INSPECTS};
#[test]
fn mri_symbol_idents() {
let pairs = IDENTS.iter().copied().zip(IDENT_INSPECTS.iter().copied());
for (sym, expected) in pairs {
let inspect = Inspect::from(sym).collect::<String>();
assert_eq!(
inspect, expected,
"Expected '{expected}', to be the result of '{sym}'.inspect; got '{inspect}'",
);
}
}
}