spinoso_symbol/
ident.rs

1//! Parser for classifying byte strings as Ruby identifiers.
2//!
3//! This module exposes a parser for determining if a sequence of bytes is a
4//! valid Ruby identifier. These routines also classify idents by type, for
5//! example, a local variable (`is_spinoso`), constant name (`SPINOSO_SYMBOL`),
6//! or class variable (`@@spinoso_symbol`).
7//!
8//! # Examples – local variable
9//!
10//! ```
11//! # use spinoso_symbol::IdentifierType;
12//! assert_eq!(
13//!     "spinoso".parse::<IdentifierType>(),
14//!     Ok(IdentifierType::Local)
15//! );
16//! assert_eq!(
17//!     "spinoso_symbol_features".parse::<IdentifierType>(),
18//!     Ok(IdentifierType::Local)
19//! );
20//! ```
21//!
22//! # Examples – constant
23//!
24//! ```
25//! # use spinoso_symbol::IdentifierType;
26//! assert_eq!(
27//!     "Spinoso".parse::<IdentifierType>(),
28//!     Ok(IdentifierType::Constant)
29//! );
30//! assert_eq!(
31//!     "SpinosoSymbol".parse::<IdentifierType>(),
32//!     Ok(IdentifierType::Constant)
33//! );
34//! assert_eq!(
35//!     "SPINOSO_SYMBOL_FEATURES".parse::<IdentifierType>(),
36//!     Ok(IdentifierType::Constant)
37//! );
38//! ```
39//!
40//! # Examples – global
41//!
42//! ```
43//! # use spinoso_symbol::IdentifierType;
44//! assert_eq!(
45//!     "$use_spinoso_symbol".parse::<IdentifierType>(),
46//!     Ok(IdentifierType::Global)
47//! );
48//! assert_eq!(
49//!     "$USE_SPINOSO_SYMBOL".parse::<IdentifierType>(),
50//!     Ok(IdentifierType::Global)
51//! );
52//! ```
53//!
54//! # Examples – instance and class variables
55//!
56//! ```
57//! # use spinoso_symbol::IdentifierType;
58//! assert_eq!(
59//!     "@artichoke".parse::<IdentifierType>(),
60//!     Ok(IdentifierType::Instance)
61//! );
62//! assert_eq!(
63//!     "@@rumble".parse::<IdentifierType>(),
64//!     Ok(IdentifierType::Class)
65//! );
66//! ```
67//!
68//! # Example – attribute setter
69//!
70//! Attribute setters are local idents that end in `=`.
71//!
72//! ```
73//! # use spinoso_symbol::IdentifierType;
74//! assert_eq!(
75//!     "artichoke=".parse::<IdentifierType>(),
76//!     Ok(IdentifierType::AttrSet)
77//! );
78//! assert_eq!(
79//!     "spinoso_symbol=".parse::<IdentifierType>(),
80//!     Ok(IdentifierType::AttrSet)
81//! );
82//! ```
83
84use core::fmt;
85use core::str::FromStr;
86
87use bstr::ByteSlice;
88
89/// Valid types for Ruby identifiers.
90///
91/// Spinoso symbol parses byte strings to determine if they are valid idents for
92/// the [`Inspect`] iterator (which requires the **inspect** Cargo feature to be
93/// enabled). Symbols that are valid idents do not get wrapped in `"` when
94/// generating their debug output.
95///
96/// See variant documentation for the set of ident types.
97///
98/// `IdentifierType`'s primary interface is through the [`TryFrom`] and
99/// [`FromStr`] conversion traits. Parsing `&str` and `&[u8]` is supported.
100///
101/// # Examples – local variable
102///
103/// ```
104/// # use spinoso_symbol::IdentifierType;
105/// assert_eq!(
106///     "spinoso".parse::<IdentifierType>(),
107///     Ok(IdentifierType::Local)
108/// );
109/// assert_eq!(
110///     "spinoso_symbol_features".parse::<IdentifierType>(),
111///     Ok(IdentifierType::Local)
112/// );
113/// ```
114///
115/// # Examples – constant
116///
117/// ```
118/// # use spinoso_symbol::IdentifierType;
119/// assert_eq!(
120///     "Spinoso".parse::<IdentifierType>(),
121///     Ok(IdentifierType::Constant)
122/// );
123/// assert_eq!(
124///     "SpinosoSymbol".parse::<IdentifierType>(),
125///     Ok(IdentifierType::Constant)
126/// );
127/// assert_eq!(
128///     "SPINOSO_SYMBOL_FEATURES".parse::<IdentifierType>(),
129///     Ok(IdentifierType::Constant)
130/// );
131/// ```
132///
133/// # Examples – global
134///
135/// ```
136/// # use spinoso_symbol::IdentifierType;
137/// assert_eq!(
138///     "$use_spinoso_symbol".parse::<IdentifierType>(),
139///     Ok(IdentifierType::Global)
140/// );
141/// assert_eq!(
142///     "$USE_SPINOSO_SYMBOL".parse::<IdentifierType>(),
143///     Ok(IdentifierType::Global)
144/// );
145/// ```
146///
147/// # Examples – instance and class variables
148///
149/// ```
150/// # use spinoso_symbol::IdentifierType;
151/// assert_eq!(
152///     "@artichoke".parse::<IdentifierType>(),
153///     Ok(IdentifierType::Instance)
154/// );
155/// assert_eq!(
156///     "@@rumble".parse::<IdentifierType>(),
157///     Ok(IdentifierType::Class)
158/// );
159/// ```
160///
161/// # Example – attribute setter
162///
163/// Attribute setters are local idents that end in `=`.
164///
165/// ```
166/// # use spinoso_symbol::IdentifierType;
167/// assert_eq!(
168///     "artichoke=".parse::<IdentifierType>(),
169///     Ok(IdentifierType::AttrSet)
170/// );
171/// assert_eq!(
172///     "spinoso_symbol=".parse::<IdentifierType>(),
173///     Ok(IdentifierType::AttrSet)
174/// );
175/// ```
176///
177/// [`Inspect`]: crate::Inspect
178#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
179pub enum IdentifierType {
180    /// Identifier that contains "junk".
181    ///
182    /// Junk mostly equates to non-sigil ASCII symbols. Identifiers like
183    /// `empty?` and `flatten!` are junk idents. All special symbolic Ruby
184    /// methods like `<=>` and `!~` are junk identifiers.
185    ///
186    /// # Examples
187    ///
188    /// ```
189    /// # use spinoso_symbol::IdentifierType;
190    /// assert_eq!("empty?".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
191    /// assert_eq!(
192    ///     "flatten!".parse::<IdentifierType>(),
193    ///     Ok(IdentifierType::Junk)
194    /// );
195    /// assert_eq!("<=>".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
196    /// assert_eq!("!~".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
197    /// assert_eq!("[]".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
198    /// assert_eq!("[]=".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
199    /// assert_eq!("=~".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
200    /// assert_eq!("==".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
201    /// assert_eq!("===".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
202    /// ```
203    Junk,
204    /// Identifier that is a global variable name.
205    ///
206    /// Global variables are prefixed with the sigil `$`. There are two types of
207    /// global variables:
208    ///
209    /// - `$` followed by a `IdentifierType::Ident` sequence.
210    /// - Special global variables, which include `Regexp` globals (`$1`..`$9`)
211    ///   and `$-w` type globals.
212    ///
213    /// # Examples
214    ///
215    /// ```
216    /// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
217    /// assert_eq!(
218    ///     "$".parse::<IdentifierType>(),
219    ///     Err(ParseIdentifierError::new())
220    /// );
221    /// assert_eq!("$foo".parse::<IdentifierType>(), Ok(IdentifierType::Global));
222    /// assert_eq!(
223    ///     "$@foo".parse::<IdentifierType>(),
224    ///     Err(ParseIdentifierError::new())
225    /// );
226    /// assert_eq!("$0".parse::<IdentifierType>(), Ok(IdentifierType::Global));
227    /// assert_eq!("$1".parse::<IdentifierType>(), Ok(IdentifierType::Global));
228    /// assert_eq!("$9".parse::<IdentifierType>(), Ok(IdentifierType::Global));
229    /// assert_eq!("$-w".parse::<IdentifierType>(), Ok(IdentifierType::Global));
230    /// assert_eq!(
231    ///     "$-www".parse::<IdentifierType>(),
232    ///     Err(ParseIdentifierError::new())
233    /// );
234    /// ```
235    Global,
236    /// Identifier that is an instance variable name.
237    ///
238    /// Instance variables are prefixed with a single `@` sigil. The remaining
239    /// bytes must be a valid [`Constant`] or [`Local`] ident.
240    ///
241    /// # Examples
242    ///
243    /// ```
244    /// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
245    /// assert_eq!(
246    ///     "@".parse::<IdentifierType>(),
247    ///     Err(ParseIdentifierError::new())
248    /// );
249    /// assert_eq!(
250    ///     "@foo".parse::<IdentifierType>(),
251    ///     Ok(IdentifierType::Instance)
252    /// );
253    /// assert_eq!(
254    ///     "@Foo".parse::<IdentifierType>(),
255    ///     Ok(IdentifierType::Instance)
256    /// );
257    /// assert_eq!(
258    ///     "@FOO".parse::<IdentifierType>(),
259    ///     Ok(IdentifierType::Instance)
260    /// );
261    /// assert_eq!(
262    ///     "@foo_bar".parse::<IdentifierType>(),
263    ///     Ok(IdentifierType::Instance)
264    /// );
265    /// assert_eq!(
266    ///     "@FooBar".parse::<IdentifierType>(),
267    ///     Ok(IdentifierType::Instance)
268    /// );
269    /// assert_eq!(
270    ///     "@FOO_BAR".parse::<IdentifierType>(),
271    ///     Ok(IdentifierType::Instance)
272    /// );
273    /// assert_eq!(
274    ///     "@$foo".parse::<IdentifierType>(),
275    ///     Err(ParseIdentifierError::new())
276    /// );
277    /// assert_eq!(
278    ///     "@0".parse::<IdentifierType>(),
279    ///     Err(ParseIdentifierError::new())
280    /// );
281    /// assert_eq!(
282    ///     "@1".parse::<IdentifierType>(),
283    ///     Err(ParseIdentifierError::new())
284    /// );
285    /// assert_eq!(
286    ///     "@9".parse::<IdentifierType>(),
287    ///     Err(ParseIdentifierError::new())
288    /// );
289    /// ```
290    ///
291    /// [`Constant`]: Self::Constant
292    /// [`Local`]: Self::Local
293    Instance,
294    /// Identifier that is a class variable name.
295    ///
296    /// Class variables are prefixed with a double `@@` sigil. The remaining
297    /// bytes must be a valid [`Constant`] or [`Local`] ident.
298    ///
299    /// # Examples
300    ///
301    /// ```
302    /// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
303    /// assert_eq!(
304    ///     "@@".parse::<IdentifierType>(),
305    ///     Err(ParseIdentifierError::new())
306    /// );
307    /// assert_eq!("@@foo".parse::<IdentifierType>(), Ok(IdentifierType::Class));
308    /// assert_eq!("@@Foo".parse::<IdentifierType>(), Ok(IdentifierType::Class));
309    /// assert_eq!("@@FOO".parse::<IdentifierType>(), Ok(IdentifierType::Class));
310    /// assert_eq!(
311    ///     "@@foo_bar".parse::<IdentifierType>(),
312    ///     Ok(IdentifierType::Class)
313    /// );
314    /// assert_eq!(
315    ///     "@@FooBar".parse::<IdentifierType>(),
316    ///     Ok(IdentifierType::Class)
317    /// );
318    /// assert_eq!(
319    ///     "@@FOO_BAR".parse::<IdentifierType>(),
320    ///     Ok(IdentifierType::Class)
321    /// );
322    /// assert_eq!(
323    ///     "@@$foo".parse::<IdentifierType>(),
324    ///     Err(ParseIdentifierError::new())
325    /// );
326    /// assert_eq!(
327    ///     "@@0".parse::<IdentifierType>(),
328    ///     Err(ParseIdentifierError::new())
329    /// );
330    /// assert_eq!(
331    ///     "@@1".parse::<IdentifierType>(),
332    ///     Err(ParseIdentifierError::new())
333    /// );
334    /// assert_eq!(
335    ///     "@@9".parse::<IdentifierType>(),
336    ///     Err(ParseIdentifierError::new())
337    /// );
338    /// ```
339    ///
340    /// [`Constant`]: Self::Constant
341    /// [`Local`]: Self::Local
342    Class,
343    /// Identifier that is an "attribute setter" method name.
344    ///
345    /// `AttrSet` idents end in the `=` sigil and are otherwise valid [`Local`]
346    /// or [`Constant`] idents. `AttrSet` idents cannot have any other "junk"
347    /// symbols.
348    ///
349    /// # Examples
350    ///
351    /// ```
352    /// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
353    /// assert_eq!(
354    ///     "Foo=".parse::<IdentifierType>(),
355    ///     Ok(IdentifierType::AttrSet)
356    /// );
357    /// assert_eq!(
358    ///     "foo=".parse::<IdentifierType>(),
359    ///     Ok(IdentifierType::AttrSet)
360    /// );
361    /// assert_eq!(
362    ///     "foo_bar=".parse::<IdentifierType>(),
363    ///     Ok(IdentifierType::AttrSet)
364    /// );
365    /// assert_eq!(
366    ///     "foo_bar?=".parse::<IdentifierType>(),
367    ///     Err(ParseIdentifierError::new())
368    /// );
369    /// assert_eq!("ω=".parse::<IdentifierType>(), Ok(IdentifierType::AttrSet));
370    /// ```
371    ///
372    /// [`Constant`]: Self::Constant
373    /// [`Local`]: Self::Local
374    AttrSet,
375    /// Identifier that is a constant name.
376    ///
377    /// Constant names can be either ASCII or Unicode and must start with a
378    /// uppercase character.
379    ///
380    /// # Examples
381    ///
382    /// ```
383    /// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
384    /// assert_eq!(
385    ///     "Foo".parse::<IdentifierType>(),
386    ///     Ok(IdentifierType::Constant)
387    /// );
388    /// assert_eq!(
389    ///     "FOO".parse::<IdentifierType>(),
390    ///     Ok(IdentifierType::Constant)
391    /// );
392    /// assert_eq!(
393    ///     "FooBar".parse::<IdentifierType>(),
394    ///     Ok(IdentifierType::Constant)
395    /// );
396    /// assert_eq!(
397    ///     "FOO_BAR".parse::<IdentifierType>(),
398    ///     Ok(IdentifierType::Constant)
399    /// );
400    /// assert_eq!("Ω".parse::<IdentifierType>(), Ok(IdentifierType::Constant));
401    /// ```
402    Constant,
403    /// Identifier that is a local variable or method name.
404    ///
405    /// Local names can be either ASCII or Unicode and must start with a
406    /// lowercase character.
407    ///
408    /// # Examples
409    ///
410    /// ```
411    /// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
412    /// assert_eq!("foo".parse::<IdentifierType>(), Ok(IdentifierType::Local));
413    /// assert_eq!("fOO".parse::<IdentifierType>(), Ok(IdentifierType::Local));
414    /// assert_eq!(
415    ///     "fooBar".parse::<IdentifierType>(),
416    ///     Ok(IdentifierType::Local)
417    /// );
418    /// assert_eq!(
419    ///     "foo_bar".parse::<IdentifierType>(),
420    ///     Ok(IdentifierType::Local)
421    /// );
422    /// assert_eq!("ω".parse::<IdentifierType>(), Ok(IdentifierType::Local));
423    /// ```
424    Local,
425}
426
427impl IdentifierType {
428    /// Return a new, default `IdentifierType`.
429    ///
430    /// Prefer to use `new()` over `default()` since `new()` is const.
431    ///
432    /// # Examples
433    ///
434    /// ```
435    /// # use spinoso_symbol::IdentifierType;
436    /// const ID_TYPE: IdentifierType = IdentifierType::new();
437    /// assert_eq!(ID_TYPE, IdentifierType::Junk);
438    /// assert_eq!(ID_TYPE, IdentifierType::default());
439    /// ```
440    #[must_use]
441    pub const fn new() -> Self {
442        Self::Junk
443    }
444}
445
446impl Default for IdentifierType {
447    /// Construct a "junk" identifier type.
448    ///
449    /// # Examples
450    ///
451    /// ```
452    /// # use spinoso_symbol::IdentifierType;
453    /// const ID_TYPE: IdentifierType = IdentifierType::new();
454    /// assert_eq!(ID_TYPE, IdentifierType::Junk);
455    /// assert_eq!(ID_TYPE, IdentifierType::default());
456    /// ```
457    #[inline]
458    fn default() -> Self {
459        Self::new()
460    }
461}
462
463impl FromStr for IdentifierType {
464    type Err = ParseIdentifierError;
465
466    #[inline]
467    fn from_str(s: &str) -> Result<Self, Self::Err> {
468        parse(s.as_bytes()).ok_or_else(ParseIdentifierError::new)
469    }
470}
471
472impl TryFrom<&str> for IdentifierType {
473    type Error = ParseIdentifierError;
474
475    #[inline]
476    fn try_from(value: &str) -> Result<Self, Self::Error> {
477        parse(value.as_bytes()).ok_or_else(ParseIdentifierError::new)
478    }
479}
480
481impl TryFrom<&[u8]> for IdentifierType {
482    type Error = ParseIdentifierError;
483
484    #[inline]
485    fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
486        parse(value).ok_or_else(ParseIdentifierError::new)
487    }
488}
489
490/// Error type returned from the [`FromStr`] implementation on [`IdentifierType`].
491///
492/// # Examples
493///
494/// ```
495/// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
496/// const ERR: ParseIdentifierError = ParseIdentifierError::new();
497/// assert_eq!("not a valid ident".parse::<IdentifierType>(), Err(ERR));
498/// ```
499#[derive(Default, Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
500pub struct ParseIdentifierError {
501    _private: (),
502}
503
504impl ParseIdentifierError {
505    /// Construct a new `ParseIdentifierError`.
506    ///
507    /// Prefer to use `new()` over `default()` since `new()` is const.
508    ///
509    /// # Examples
510    ///
511    /// ```
512    /// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
513    /// const ERR: ParseIdentifierError = ParseIdentifierError::new();
514    /// assert_eq!("not a valid ident".parse::<IdentifierType>(), Err(ERR));
515    /// assert_eq!(ERR, ParseIdentifierError::default());
516    /// ```
517    #[must_use]
518    pub const fn new() -> Self {
519        Self { _private: () }
520    }
521}
522
523impl fmt::Display for ParseIdentifierError {
524    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
525        f.write_str("Failed to parse given string as a known identifier type")
526    }
527}
528
529#[inline]
530fn parse(name: &[u8]) -> Option<IdentifierType> {
531    match name {
532        [] | [b'\0'] => None,
533        // special global variable
534        [b'$', name @ ..] if is_special_global_name(name) => Some(IdentifierType::Global),
535        // global variable
536        [b'$', name @ ..] => parse_ident(name, IdentifierType::Global),
537        // class variable
538        [b'@', b'@', name @ ..] => parse_ident(name, IdentifierType::Class),
539        // instance variable
540        [b'@', name @ ..] => parse_ident(name, IdentifierType::Instance),
541        // Symbolic method names
542        name if is_symbolic_method_name(name) => Some(IdentifierType::Junk),
543        [b'=' | b'!' | b'[', ..] => None,
544        [first, ..] if *first != b'_' && first.is_ascii() && !first.is_ascii_alphabetic() => None,
545        // Constant name
546        name if is_const_name(name) => parse_ident(name, IdentifierType::Constant),
547        // Local variable
548        name => parse_ident(name, IdentifierType::Local),
549    }
550}
551
552#[inline]
553fn parse_ident(name: &[u8], id_type: IdentifierType) -> Option<IdentifierType> {
554    match name {
555        [] => None,
556        [first, name @ .., b'='] if *first != b'_' && first.is_ascii() && !first.is_ascii_alphabetic() => {
557            if let None | Some(IdentifierType::AttrSet) = parse_ident(name, id_type) {
558                None
559            } else {
560                Some(id_type)
561            }
562        }
563        [first, ..] if *first != b'_' && first.is_ascii() && !first.is_ascii_alphabetic() => None,
564        name if is_ident_until(name).is_none() => Some(id_type),
565        [name @ .., b'!' | b'?'] if is_ident_until(name).is_none() => {
566            if matches!(
567                id_type,
568                IdentifierType::Global | IdentifierType::Class | IdentifierType::Instance
569            ) {
570                return None;
571            }
572            Some(IdentifierType::Junk)
573        }
574        [name @ .., b'='] if is_ident_until(name).is_none() => {
575            if matches!(id_type, IdentifierType::Local | IdentifierType::Constant) {
576                return Some(IdentifierType::AttrSet);
577            }
578            None
579        }
580        _ => None,
581    }
582}
583
584#[inline]
585#[expect(clippy::match_same_arms, reason = "for clarity")]
586fn is_special_global_name(name: &[u8]) -> bool {
587    match name {
588        [] => false,
589        [first, rest @ ..] if is_special_global_punct(*first) => rest.is_empty(),
590        [b'-'] => false,
591        [b'-', rest @ ..] if is_next_ident_exhausting(rest) => true,
592        [b'-', ..] => false,
593        name => name.chars().all(char::is_numeric),
594    }
595}
596
597/// Return whether the input is a "junk" symbolic method name.
598///
599/// There are fixed number of valid Ruby method names that only contain ASCII
600/// symbols.
601#[inline]
602fn is_symbolic_method_name(name: &[u8]) -> bool {
603    matches!(
604        name,
605        b"<" | b"<<"
606            | b"<="
607            | b"<=>"
608            | b">"
609            | b">>"
610            | b">="
611            | b"=~"
612            | b"=="
613            | b"==="
614            | b"*"
615            | b"**"
616            | b"+"
617            | b"-"
618            | b"+@"
619            | b"-@"
620            | b"|"
621            | b"^"
622            | b"&"
623            | b"/"
624            | b"%"
625            | b"~"
626            | b"`"
627            | b"[]"
628            | b"[]="
629            | b"!"
630            | b"!="
631            | b"!~"
632    )
633}
634
635/// Return whether the input is a valid constant name.
636///
637/// Constant names require the first character to be either ASCII or Unicode
638/// uppercase.
639#[inline]
640fn is_const_name(name: &[u8]) -> bool {
641    match name {
642        [] => false,
643        name if name.is_ascii() => name.iter().next().is_some_and(u8::is_ascii_uppercase),
644        // uses Unicode `Uppercase` property
645        name if name.is_utf8() => name.chars().next().is_some_and(char::is_uppercase),
646        _ => false,
647    }
648}
649
650/// Determine if a [`char`] can be used in a valid identifier.
651///
652/// # Header declaration
653///
654/// Ported from the following C macro in `string.c`:
655///
656/// ```c
657/// #define is_identchar(p,e,enc) (ISALNUM((unsigned char)*(p)) || (*(p)) == '_' || !ISASCII(*(p)))
658/// ```
659#[inline]
660fn is_ident_char(ch: char) -> bool {
661    ch.is_alphanumeric() || ch == '_' || !ch.is_ascii()
662}
663
664/// Consume the input until a non-ident character is found.
665///
666/// Scan the [`char`]s in the input until either invalid UTF-8 or an invalid
667/// ident is found. See [`is_ident_char`].
668///
669/// This method returns `Some(index)` of the start of the first invalid ident
670/// or `None` if the whole input is a valid ident.
671///
672/// Empty slices are not valid idents.
673#[inline]
674fn is_ident_until(mut name: &[u8]) -> Option<usize> {
675    // Empty strings are not idents.
676    if name.is_empty() {
677        return Some(0);
678    }
679    let mut start = 0;
680    while !name.is_empty() {
681        let (ch, size) = bstr::decode_utf8(name);
682        match ch {
683            Some(ch) if !is_ident_char(ch) => return Some(start),
684            None => return Some(start),
685            Some(_) => {
686                name = &name[size..];
687                start += size;
688            }
689        }
690    }
691    None
692}
693
694/// Determine if the next char is a valid ident char and consumes all bytes in
695/// the input.
696///
697/// This function is used to determine whether certain kinds of single character
698/// globals are valid idents.
699///
700/// See also [`is_ident_char`].
701#[inline]
702fn is_next_ident_exhausting(name: &[u8]) -> bool {
703    let (ch, size) = bstr::decode_utf8(name);
704    match ch {
705        Some(ch) if is_ident_char(ch) => name.len() == size,
706        Some(_) | None => false,
707    }
708}
709
710/// Returns true if `ch` is one of the “special global punctuation” bytes
711/// as defined by MRI’s `SPECIAL_PUNCT` macro.
712#[inline]
713pub const fn is_special_global_punct(ch: u8) -> bool {
714    // Derived from Ruby MRI `parse.y`:
715    //
716    // ```c
717    // #define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
718    // #define SPECIAL_PUNCT(idx) (
719    //     BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) |
720    //     BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) |
721    //     BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) |
722    //     BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) |
723    //     BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) |
724    //     BIT('0', idx))
725    // const unsigned int ruby_global_name_punct_bits[] = {
726    //     SPECIAL_PUNCT(0),
727    //     SPECIAL_PUNCT(1),
728    //     SPECIAL_PUNCT(2),
729    // };
730    // ```
731    //
732    // The three constants computed by Ruby are:
733    //   SPECIAL_PUNCT(0) → 4227980502
734    //   SPECIAL_PUNCT(1) →  268435457
735    //   SPECIAL_PUNCT(2) → 1073741825
736    const fn make_ruby_global_name_punct_bits() -> [u32; 3] {
737        let mut bits = [0u32; 3];
738        // exactly the 21 chars from SPECIAL_PUNCT in MRI
739        let chars = [
740            b'~', b'*', b'$', b'?', b'!', b'@', b'/', b'\\', b';', b',', b'.', b'=', b':', b'<', b'>', b'"', b'&',
741            b'`', b'\'', b'+', b'0',
742        ];
743        let mut i = 0;
744        while i < chars.len() {
745            let c = chars[i];
746            // idx = (c / 32) - 1
747            let idx = (c / 32).wrapping_sub(1) as usize;
748            if idx < bits.len() {
749                bits[idx] |= 1 << (c % 32);
750            }
751            i += 1;
752        }
753        bits
754    }
755
756    // Precomputed table exactly matching MRI’s `ruby_global_name_punct_bits`.
757    const RUBY_GLOBAL_NAME_PUNCT_BITS: [u32; 3] = make_ruby_global_name_punct_bits();
758
759    // MRI does: idx = (ch / 32) - 1; then tests bit (ch % 32) in table[idx]
760    let idx = (ch / 32).wrapping_sub(1) as usize;
761    if idx < RUBY_GLOBAL_NAME_PUNCT_BITS.len() {
762        ((RUBY_GLOBAL_NAME_PUNCT_BITS[idx] >> (ch % 32)) & 1) != 0
763    } else {
764        false
765    }
766}
767
768#[cfg(test)]
769mod tests {
770    use super::{
771        IdentifierType, ParseIdentifierError, is_ident_until, is_next_ident_exhausting, is_special_global_name,
772    };
773
774    #[test]
775    fn special_global_name() {
776        let name = &b"a"[..];
777        assert!(!is_special_global_name(name));
778        let name = "💎";
779        assert!(!is_special_global_name(name.as_bytes()));
780        let name = &b"ab"[..];
781        assert!(!is_special_global_name(name));
782        let name = "-💎";
783        assert!(is_special_global_name(name.as_bytes()));
784        let name = &b"$"[..];
785        assert!(is_special_global_name(name));
786        let name = &b"~"[..];
787        assert!(is_special_global_name(name));
788        let name = "�";
789        assert!(!is_special_global_name(name.as_bytes()));
790        let name = "-�";
791        assert!(is_special_global_name(name.as_bytes()));
792    }
793
794    #[test]
795    fn is_ident_until_empty() {
796        let name = &[];
797        assert_eq!(is_ident_until(name), Some(0));
798    }
799
800    #[test]
801    fn is_ident_until_lowercase_ascii() {
802        let name = &b"abc"[..];
803        assert_eq!(is_ident_until(name), None);
804        let name = &b"abc_123"[..];
805        assert_eq!(is_ident_until(name), None);
806        let name = &b"_"[..];
807        assert_eq!(is_ident_until(name), None);
808        let name = &b"_e"[..];
809        assert_eq!(is_ident_until(name), None);
810        let name = &b"_1"[..];
811        assert_eq!(is_ident_until(name), None);
812    }
813
814    #[test]
815    fn is_ident_until_ascii_constant() {
816        let name = &b"Abc"[..];
817        assert_eq!(is_ident_until(name), None);
818        let name = &b"ABC"[..];
819        assert_eq!(is_ident_until(name), None);
820        let name = &b"ABC_XYZ"[..];
821        assert_eq!(is_ident_until(name), None);
822        let name = &b"ABC_123"[..];
823        assert_eq!(is_ident_until(name), None);
824        let name = &b"HTTP2"[..];
825        assert_eq!(is_ident_until(name), None);
826    }
827
828    #[test]
829    fn is_ident_until_unicode() {
830        let name = "ábc";
831        assert_eq!(is_ident_until(name.as_bytes()), None);
832        let name = "abç";
833        assert_eq!(is_ident_until(name.as_bytes()), None);
834        let name = "abc_�";
835        assert_eq!(is_ident_until(name.as_bytes()), None);
836        let name = "abc_💎";
837        assert_eq!(is_ident_until(name.as_bytes()), None);
838
839        let name = "Ábc";
840        assert_eq!(is_ident_until(name.as_bytes()), None);
841        let name = "Abç";
842        assert_eq!(is_ident_until(name.as_bytes()), None);
843        let name = "Abc_�";
844        assert_eq!(is_ident_until(name.as_bytes()), None);
845        let name = "Abc_💎";
846        assert_eq!(is_ident_until(name.as_bytes()), None);
847
848        let name = "💎";
849        assert_eq!(is_ident_until(name.as_bytes()), None);
850        let name = "💎abc";
851        assert_eq!(is_ident_until(name.as_bytes()), None);
852    }
853
854    #[test]
855    fn is_ident_until_invalid_utf8() {
856        let name = &b"\xFF"[..];
857        assert_eq!(is_ident_until(name), Some(0));
858        let name = &b"abc\xFF"[..];
859        assert_eq!(is_ident_until(name), Some(3));
860        let name = &b"abc\xFFxyz"[..];
861        assert_eq!(is_ident_until(name), Some(3));
862
863        let name = &b"\xFF\xFE"[..];
864        assert_eq!(is_ident_until(name), Some(0));
865        let name = &b"abc\xFF\xFE"[..];
866        assert_eq!(is_ident_until(name), Some(3));
867        let name = &b"abc\xFF\xFExyz"[..];
868        assert_eq!(is_ident_until(name), Some(3));
869
870        let name = &b"\xEF\xBF\xBD\xFF"[..];
871        assert_eq!(is_ident_until(name), Some(3));
872        let name = &b"\xF0\x9F\x92\x8E\xFF"[..];
873        assert_eq!(is_ident_until(name), Some(4));
874    }
875
876    #[test]
877    fn is_next_ident_exhausting_empty() {
878        let name = &[];
879        assert!(!is_next_ident_exhausting(name));
880    }
881
882    #[test]
883    fn is_next_ident_exhausting_lowercase_ascii() {
884        let name = &b"a"[..];
885        assert!(is_next_ident_exhausting(name));
886        let name = &b"abc"[..];
887        assert!(!is_next_ident_exhausting(name));
888        let name = &b"1"[..];
889        assert!(is_next_ident_exhausting(name));
890        let name = &b"abc_123"[..];
891        assert!(!is_next_ident_exhausting(name));
892        let name = &b"_"[..];
893        assert!(is_next_ident_exhausting(name));
894        let name = &b"_e"[..];
895        assert!(!is_next_ident_exhausting(name));
896        let name = &b"_1"[..];
897        assert!(!is_next_ident_exhausting(name));
898    }
899
900    #[test]
901    fn is_next_ident_exhausting_ascii_constant() {
902        let name = &b"A"[..];
903        assert!(is_next_ident_exhausting(name));
904        let name = &b"Abc"[..];
905        assert!(!is_next_ident_exhausting(name));
906        let name = &b"ABC"[..];
907        assert!(!is_next_ident_exhausting(name));
908        let name = &b"ABC_XYZ"[..];
909        assert!(!is_next_ident_exhausting(name));
910        let name = &b"ABC_123"[..];
911        assert!(!is_next_ident_exhausting(name));
912        let name = &b"HTTP2"[..];
913        assert!(!is_next_ident_exhausting(name));
914    }
915
916    #[test]
917    fn is_next_ident_exhausting_unicode() {
918        let name = "ábc";
919        assert!(!is_next_ident_exhausting(name.as_bytes()));
920        let name = "abç";
921        assert!(!is_next_ident_exhausting(name.as_bytes()));
922        let name = "abc_�";
923        assert!(!is_next_ident_exhausting(name.as_bytes()));
924        let name = "abc_💎";
925        assert!(!is_next_ident_exhausting(name.as_bytes()));
926
927        let name = "Ábc";
928        assert!(!is_next_ident_exhausting(name.as_bytes()));
929        let name = "Abç";
930        assert!(!is_next_ident_exhausting(name.as_bytes()));
931        let name = "Abc_�";
932        assert!(!is_next_ident_exhausting(name.as_bytes()));
933        let name = "Abc_💎";
934        assert!(!is_next_ident_exhausting(name.as_bytes()));
935        let name = "💎abc";
936        assert!(!is_next_ident_exhausting(name.as_bytes()));
937
938        let name = "á";
939        assert!(is_next_ident_exhausting(name.as_bytes()));
940        let name = "ç";
941        assert!(is_next_ident_exhausting(name.as_bytes()));
942        let name = "�";
943        assert!(is_next_ident_exhausting(name.as_bytes()));
944        let name = "💎";
945        assert!(is_next_ident_exhausting(name.as_bytes()));
946    }
947
948    #[test]
949    fn is_next_ident_exhausting_invalid_utf8() {
950        let name = &b"\xFF"[..];
951        assert!(!is_next_ident_exhausting(name));
952        let name = &b"abc\xFF"[..];
953        assert!(!is_next_ident_exhausting(name));
954        let name = &b"abc\xFFxyz"[..];
955        assert!(!is_next_ident_exhausting(name));
956
957        let name = &b"\xFF\xFE"[..];
958        assert!(!is_next_ident_exhausting(name));
959        let name = &b"abc\xFF\xFE"[..];
960        assert!(!is_next_ident_exhausting(name));
961        let name = &b"abc\xFF\xFExyz"[..];
962        assert!(!is_next_ident_exhausting(name));
963
964        let name = &b"\xEF\xBF\xBD\xFF"[..];
965        assert!(!is_next_ident_exhausting(name));
966        let name = &b"\xF0\x9F\x92\x8E\xFF"[..];
967        assert!(!is_next_ident_exhausting(name));
968    }
969
970    #[test]
971    fn ascii_ident() {
972        assert_eq!("foobar".parse::<IdentifierType>(), Ok(IdentifierType::Local));
973        assert_eq!("ruby_is_simple".parse::<IdentifierType>(), Ok(IdentifierType::Local));
974    }
975
976    #[test]
977    fn ascii_constant() {
978        assert_eq!("Foobar".parse::<IdentifierType>(), Ok(IdentifierType::Constant));
979        assert_eq!("FooBar".parse::<IdentifierType>(), Ok(IdentifierType::Constant));
980        assert_eq!("FOOBAR".parse::<IdentifierType>(), Ok(IdentifierType::Constant));
981        assert_eq!("FOO_BAR".parse::<IdentifierType>(), Ok(IdentifierType::Constant));
982        assert_eq!("RUBY_IS_SIMPLE".parse::<IdentifierType>(), Ok(IdentifierType::Constant));
983    }
984
985    #[test]
986    fn empty() {
987        assert_eq!("".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
988    }
989
990    #[test]
991    fn single_nul() {
992        assert_eq!("\0".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
993    }
994
995    #[test]
996    fn non_ascii_numerics() {
997        assert_eq!("١".parse::<IdentifierType>(), Ok(IdentifierType::Local));
998        assert_eq!(
999            "١١١١١١١١١١١١١١١١١١".parse::<IdentifierType>(),
1000            Ok(IdentifierType::Local)
1001        );
1002        assert_eq!("①".parse::<IdentifierType>(), Ok(IdentifierType::Local));
1003    }
1004
1005    #[test]
1006    fn recursive_ident() {
1007        assert_eq!("@@@foo".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1008        assert_eq!("@@@@foo".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1009        assert_eq!("@$foo".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1010        assert_eq!("@$-w".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1011        assert_eq!("@@$foo".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1012        assert_eq!("@@$-w".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1013        assert_eq!("$@foo".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1014        assert_eq!("$@@foo".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1015        assert_eq!("$$-w".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1016    }
1017
1018    #[test]
1019    fn attr_bang() {
1020        assert_eq!("@foo!".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1021        assert_eq!("@@foo!".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1022        assert_eq!("$foo!".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1023    }
1024
1025    #[test]
1026    fn attr_question() {
1027        assert_eq!("@foo?".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1028        assert_eq!("@@foo?".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1029        assert_eq!("$foo?".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1030    }
1031
1032    #[test]
1033    fn attr_setter() {
1034        assert_eq!("@foo=".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1035        assert_eq!("@@foo=".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1036        assert_eq!("$foo=".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1037    }
1038
1039    #[test]
1040    fn invalid_utf8() {
1041        assert_eq!(
1042            IdentifierType::try_from(&b"invalid-\xFF-utf8"[..]),
1043            Err(ParseIdentifierError::new())
1044        );
1045    }
1046
1047    #[test]
1048    fn emoji() {
1049        assert_eq!(IdentifierType::try_from("💎"), Ok(IdentifierType::Local));
1050        assert_eq!(IdentifierType::try_from("$💎"), Ok(IdentifierType::Global));
1051        assert_eq!(IdentifierType::try_from("@💎"), Ok(IdentifierType::Instance));
1052        assert_eq!(IdentifierType::try_from("@@💎"), Ok(IdentifierType::Class));
1053    }
1054
1055    #[test]
1056    fn unicode_replacement_char() {
1057        assert_eq!(IdentifierType::try_from("�"), Ok(IdentifierType::Local));
1058        assert_eq!(IdentifierType::try_from("$�"), Ok(IdentifierType::Global));
1059        assert_eq!(IdentifierType::try_from("@�"), Ok(IdentifierType::Instance));
1060        assert_eq!(IdentifierType::try_from("@@�"), Ok(IdentifierType::Class));
1061
1062        assert_eq!(IdentifierType::try_from("abc�"), Ok(IdentifierType::Local));
1063        assert_eq!(IdentifierType::try_from("$abc�"), Ok(IdentifierType::Global));
1064        assert_eq!(IdentifierType::try_from("@abc�"), Ok(IdentifierType::Instance));
1065        assert_eq!(IdentifierType::try_from("@@abc�"), Ok(IdentifierType::Class));
1066    }
1067
1068    #[test]
1069    fn invalid_utf8_special_global() {
1070        assert_eq!(
1071            IdentifierType::try_from(&b"$-\xFF"[..]),
1072            Err(ParseIdentifierError::new())
1073        );
1074    }
1075
1076    #[test]
1077    fn replacement_char_special_global() {
1078        assert_eq!(IdentifierType::try_from("$-�"), Ok(IdentifierType::Global));
1079        assert_eq!(IdentifierType::try_from("$-�a"), Err(ParseIdentifierError::new()));
1080        assert_eq!(IdentifierType::try_from("$-��"), Err(ParseIdentifierError::new()));
1081    }
1082}
1083
1084#[cfg(test)]
1085mod specs {
1086    use super::IdentifierType;
1087
1088    // From `spec/core/symbol/inspect_spec.rb`:
1089    //
1090    // ```ruby
1091    // symbols = {
1092    //   fred:         ":fred",
1093    //   :fred?     => ":fred?",
1094    //   :fred!     => ":fred!",
1095    //   :$ruby     => ":$ruby",
1096    //   :@ruby     => ":@ruby",
1097    //   :@@ruby    => ":@@ruby",
1098    //   :"$ruby!"  => ":\"$ruby!\"",
1099    //   :"$ruby?"  => ":\"$ruby?\"",
1100    //   :"@ruby!"  => ":\"@ruby!\"",
1101    //   :"@ruby?"  => ":\"@ruby?\"",
1102    //   :"@@ruby!" => ":\"@@ruby!\"",
1103    //   :"@@ruby?" => ":\"@@ruby?\"",
1104    //
1105    //   :$-w       => ":$-w",
1106    //   :"$-ww"    => ":\"$-ww\"",
1107    //   :"$+"      => ":$+",
1108    //   :"$~"      => ":$~",
1109    //   :"$:"      => ":$:",
1110    //   :"$?"      => ":$?",
1111    //   :"$<"      => ":$<",
1112    //   :"$_"      => ":$_",
1113    //   :"$/"      => ":$/",
1114    //   :"$'"      => ":$'",
1115    //   :"$\""     => ":$\"",
1116    //   :"$$"      => ":$$",
1117    //   :"$."      => ":$.",
1118    //   :"$,"      => ":$,",
1119    //   :"$`"      => ":$`",
1120    //   :"$!"      => ":$!",
1121    //   :"$;"      => ":$;",
1122    //   :"$\\"     => ":$\\",
1123    //   :"$="      => ":$=",
1124    //   :"$*"      => ":$*",
1125    //   :"$>"      => ":$>",
1126    //   :"$&"      => ":$&",
1127    //   :"$@"      => ":$@",
1128    //   :"$1234"   => ":$1234",
1129    //
1130    //   :-@        => ":-@",
1131    //   :+@        => ":+@",
1132    //   :%         => ":%",
1133    //   :&         => ":&",
1134    //   :*         => ":*",
1135    //   :**        => ":**",
1136    //   :"/"       => ":/",     # lhs quoted for emacs happiness
1137    //   :<         => ":<",
1138    //   :<=        => ":<=",
1139    //   :<=>       => ":<=>",
1140    //   :==        => ":==",
1141    //   :===       => ":===",
1142    //   :=~        => ":=~",
1143    //   :>         => ":>",
1144    //   :>=        => ":>=",
1145    //   :>>        => ":>>",
1146    //   :[]        => ":[]",
1147    //   :[]=       => ":[]=",
1148    //   :"\<\<"    => ":\<\<",
1149    //   :^         => ":^",
1150    //   :"`"       => ":`",     # for emacs, and justice!
1151    //   :~         => ":~",
1152    //   :|         => ":|",
1153    //
1154    //   :"!"       => [":\"!\"",  ":!" ],
1155    //   :"!="      => [":\"!=\"", ":!="],
1156    //   :"!~"      => [":\"!~\"", ":!~"],
1157    //   :"\$"      => ":\"$\"", # for justice!
1158    //   :"&&"      => ":\"&&\"",
1159    //   :"'"       => ":\"\'\"",
1160    //   :","       => ":\",\"",
1161    //   :"."       => ":\".\"",
1162    //   :".."      => ":\"..\"",
1163    //   :"..."     => ":\"...\"",
1164    //   :":"       => ":\":\"",
1165    //   :"::"      => ":\"::\"",
1166    //   :";"       => ":\";\"",
1167    //   :"="       => ":\"=\"",
1168    //   :"=>"      => ":\"=>\"",
1169    //   :"\?"      => ":\"?\"", # rawr!
1170    //   :"@"       => ":\"@\"",
1171    //   :"||"      => ":\"||\"",
1172    //   :"|||"     => ":\"|||\"",
1173    //   :"++"      => ":\"++\"",
1174    //
1175    //   :"\""      => ":\"\\\"\"",
1176    //   :"\"\""    => ":\"\\\"\\\"\"",
1177    //
1178    //   :"9"       => ":\"9\"",
1179    //   :"foo bar" => ":\"foo bar\"",
1180    //   :"*foo"    => ":\"*foo\"",
1181    //   :"foo "    => ":\"foo \"",
1182    //   :" foo"    => ":\" foo\"",
1183    //   :" "       => ":\" \"",
1184    // }
1185    // ```
1186
1187    #[test]
1188    fn specs() {
1189        // idents
1190        assert!("fred".parse::<IdentifierType>().is_ok());
1191        assert!("fred?".parse::<IdentifierType>().is_ok());
1192        assert!("fred!".parse::<IdentifierType>().is_ok());
1193        assert!("$ruby".parse::<IdentifierType>().is_ok());
1194        assert!("@ruby".parse::<IdentifierType>().is_ok());
1195        assert!("@@ruby".parse::<IdentifierType>().is_ok());
1196
1197        // idents can't end in bang or question
1198        assert!("$ruby!".parse::<IdentifierType>().is_err());
1199        assert!("$ruby?".parse::<IdentifierType>().is_err());
1200        assert!("@ruby!".parse::<IdentifierType>().is_err());
1201        assert!("@ruby?".parse::<IdentifierType>().is_err());
1202        assert!("@@ruby!".parse::<IdentifierType>().is_err());
1203        assert!("@@ruby?".parse::<IdentifierType>().is_err());
1204
1205        // globals
1206        assert!("$-w".parse::<IdentifierType>().is_ok());
1207        assert!("$-ww".parse::<IdentifierType>().is_err());
1208        assert!("$+".parse::<IdentifierType>().is_ok());
1209        assert!("$~".parse::<IdentifierType>().is_ok());
1210        assert!("$:".parse::<IdentifierType>().is_ok());
1211        assert!("$?".parse::<IdentifierType>().is_ok());
1212        assert!("$<".parse::<IdentifierType>().is_ok());
1213        assert!("$_".parse::<IdentifierType>().is_ok());
1214        assert!("$/".parse::<IdentifierType>().is_ok());
1215        assert!("$\"".parse::<IdentifierType>().is_ok());
1216        assert!("$$".parse::<IdentifierType>().is_ok());
1217        assert!("$.".parse::<IdentifierType>().is_ok());
1218        assert!("$,".parse::<IdentifierType>().is_ok());
1219        assert!("$`".parse::<IdentifierType>().is_ok());
1220        assert!("$!".parse::<IdentifierType>().is_ok());
1221        assert!("$;".parse::<IdentifierType>().is_ok());
1222        assert!("$\\".parse::<IdentifierType>().is_ok());
1223        assert!("$=".parse::<IdentifierType>().is_ok());
1224        assert!("$*".parse::<IdentifierType>().is_ok());
1225        assert!("$>".parse::<IdentifierType>().is_ok());
1226        assert!("$&".parse::<IdentifierType>().is_ok());
1227        assert!("$@".parse::<IdentifierType>().is_ok());
1228        assert!("$1234".parse::<IdentifierType>().is_ok());
1229
1230        // symbolic methods
1231        assert!("-@".parse::<IdentifierType>().is_ok());
1232        assert!("+@".parse::<IdentifierType>().is_ok());
1233        assert!("%".parse::<IdentifierType>().is_ok());
1234        assert!("&".parse::<IdentifierType>().is_ok());
1235        assert!("*".parse::<IdentifierType>().is_ok());
1236        assert!("**".parse::<IdentifierType>().is_ok());
1237        assert!("/".parse::<IdentifierType>().is_ok());
1238        assert!("<".parse::<IdentifierType>().is_ok());
1239        assert!("<=".parse::<IdentifierType>().is_ok());
1240        assert!("<=>".parse::<IdentifierType>().is_ok());
1241        assert!("==".parse::<IdentifierType>().is_ok());
1242        assert!("===".parse::<IdentifierType>().is_ok());
1243        assert!("=~".parse::<IdentifierType>().is_ok());
1244        assert!(">".parse::<IdentifierType>().is_ok());
1245        assert!(">=".parse::<IdentifierType>().is_ok());
1246        assert!(">>".parse::<IdentifierType>().is_ok());
1247        assert!("[]".parse::<IdentifierType>().is_ok());
1248        assert!("[]=".parse::<IdentifierType>().is_ok());
1249        assert!("<<".parse::<IdentifierType>().is_ok());
1250        assert!("^".parse::<IdentifierType>().is_ok());
1251        assert!("`".parse::<IdentifierType>().is_ok());
1252        assert!("~".parse::<IdentifierType>().is_ok());
1253        assert!("|".parse::<IdentifierType>().is_ok());
1254
1255        // non-symbol symbolics
1256        assert!("!".parse::<IdentifierType>().is_ok());
1257        assert!("!=".parse::<IdentifierType>().is_ok());
1258        assert!("!~".parse::<IdentifierType>().is_ok());
1259        assert!("$".parse::<IdentifierType>().is_err());
1260        assert!("&&".parse::<IdentifierType>().is_err());
1261        assert!("'".parse::<IdentifierType>().is_err());
1262        assert!(",".parse::<IdentifierType>().is_err());
1263        assert!(".".parse::<IdentifierType>().is_err());
1264        assert!("..".parse::<IdentifierType>().is_err());
1265        assert!("...".parse::<IdentifierType>().is_err());
1266        assert!(":".parse::<IdentifierType>().is_err());
1267        assert!("::".parse::<IdentifierType>().is_err());
1268        assert!(";".parse::<IdentifierType>().is_err());
1269        assert!("=".parse::<IdentifierType>().is_err());
1270        assert!("=>".parse::<IdentifierType>().is_err());
1271        assert!("?".parse::<IdentifierType>().is_err());
1272        assert!("@".parse::<IdentifierType>().is_err());
1273        assert!("||".parse::<IdentifierType>().is_err());
1274        assert!("|||".parse::<IdentifierType>().is_err());
1275        assert!("++".parse::<IdentifierType>().is_err());
1276
1277        // quotes
1278        assert!(r#"""#.parse::<IdentifierType>().is_err());
1279        assert!(r#""""#.parse::<IdentifierType>().is_err());
1280
1281        assert!("9".parse::<IdentifierType>().is_err());
1282        assert!("foo bar".parse::<IdentifierType>().is_err());
1283        assert!("*foo".parse::<IdentifierType>().is_err());
1284        assert!("foo ".parse::<IdentifierType>().is_err());
1285        assert!(" foo".parse::<IdentifierType>().is_err());
1286        assert!(" ".parse::<IdentifierType>().is_err());
1287    }
1288}
1289
1290/// Tests generated from symbols loaded at MRI interpreter boot.
1291///
1292/// # Generation
1293///
1294/// ```shell
1295/// cat <<EOF | ruby --disable-gems --disable-did_you_mean
1296/// def boot_identifier_symbols
1297///   syms = Symbol.all_symbols.map(&:inspect)
1298///   # remove symbols that must be debug wrapped in quotes
1299///   syms = syms.reject { |s| s[0..1] == ':"' }
1300///
1301///   fixture = syms.map { |s| "r##\"#{s[1..]}\"##" }
1302///   puts fixture.join(",\n")
1303/// end
1304///
1305/// boot_identifier_symbols
1306/// EOF
1307/// ```
1308#[cfg(test)]
1309mod functionals {
1310    use super::IdentifierType;
1311    use crate::fixtures::IDENTS;
1312
1313    #[test]
1314    fn mri_symbol_idents() {
1315        for &sym in IDENTS {
1316            assert!(
1317                sym.parse::<IdentifierType>().is_ok(),
1318                "'{sym}' should parse as a valid identifier, but did not."
1319            );
1320        }
1321    }
1322}