spinoso_symbol/
ident.rs

1//! Parser for classifying byte strings as Ruby identifiers.
2//!
3//! This module exposes a parser for determining if a sequence of bytes is a
4//! valid Ruby identifier. These routines also classify idents by type, for
5//! example, a local variable (`is_spinoso`), constant name (`SPINOSO_SYMBOL`),
6//! or class variable (`@@spinoso_symbol`).
7//!
8//! # Examples – local variable
9//!
10//! ```
11//! # use spinoso_symbol::IdentifierType;
12//! assert_eq!(
13//!     "spinoso".parse::<IdentifierType>(),
14//!     Ok(IdentifierType::Local)
15//! );
16//! assert_eq!(
17//!     "spinoso_symbol_features".parse::<IdentifierType>(),
18//!     Ok(IdentifierType::Local)
19//! );
20//! ```
21//!
22//! # Examples – constant
23//!
24//! ```
25//! # use spinoso_symbol::IdentifierType;
26//! assert_eq!(
27//!     "Spinoso".parse::<IdentifierType>(),
28//!     Ok(IdentifierType::Constant)
29//! );
30//! assert_eq!(
31//!     "SpinosoSymbol".parse::<IdentifierType>(),
32//!     Ok(IdentifierType::Constant)
33//! );
34//! assert_eq!(
35//!     "SPINOSO_SYMBOL_FEATURES".parse::<IdentifierType>(),
36//!     Ok(IdentifierType::Constant)
37//! );
38//! ```
39//!
40//! # Examples – global
41//!
42//! ```
43//! # use spinoso_symbol::IdentifierType;
44//! assert_eq!(
45//!     "$use_spinoso_symbol".parse::<IdentifierType>(),
46//!     Ok(IdentifierType::Global)
47//! );
48//! assert_eq!(
49//!     "$USE_SPINOSO_SYMBOL".parse::<IdentifierType>(),
50//!     Ok(IdentifierType::Global)
51//! );
52//! ```
53//!
54//! # Examples – instance and class variables
55//!
56//! ```
57//! # use spinoso_symbol::IdentifierType;
58//! assert_eq!(
59//!     "@artichoke".parse::<IdentifierType>(),
60//!     Ok(IdentifierType::Instance)
61//! );
62//! assert_eq!(
63//!     "@@rumble".parse::<IdentifierType>(),
64//!     Ok(IdentifierType::Class)
65//! );
66//! ```
67//!
68//! # Example – attribute setter
69//!
70//! Attribute setters are local idents that end in `=`.
71//!
72//! ```
73//! # use spinoso_symbol::IdentifierType;
74//! assert_eq!(
75//!     "artichoke=".parse::<IdentifierType>(),
76//!     Ok(IdentifierType::AttrSet)
77//! );
78//! assert_eq!(
79//!     "spinoso_symbol=".parse::<IdentifierType>(),
80//!     Ok(IdentifierType::AttrSet)
81//! );
82//! ```
83
84use core::fmt;
85use core::str::FromStr;
86
87use bstr::ByteSlice;
88
89/// Valid types for Ruby identifiers.
90///
91/// Spinoso symbol parses byte strings to determine if they are valid idents for
92/// the [`Inspect`] iterator (which requires the **inspect** Cargo feature to be
93/// enabled). Symbols that are valid idents do not get wrapped in `"` when
94/// generating their debug output.
95///
96/// See variant documentation for the set of ident types.
97///
98/// `IdentifierType`'s primary interface is through the [`TryFrom`] and
99/// [`FromStr`] conversion traits. Parsing `&str` and `&[u8]` is supported.
100///
101/// # Examples – local variable
102///
103/// ```
104/// # use spinoso_symbol::IdentifierType;
105/// assert_eq!(
106///     "spinoso".parse::<IdentifierType>(),
107///     Ok(IdentifierType::Local)
108/// );
109/// assert_eq!(
110///     "spinoso_symbol_features".parse::<IdentifierType>(),
111///     Ok(IdentifierType::Local)
112/// );
113/// ```
114///
115/// # Examples – constant
116///
117/// ```
118/// # use spinoso_symbol::IdentifierType;
119/// assert_eq!(
120///     "Spinoso".parse::<IdentifierType>(),
121///     Ok(IdentifierType::Constant)
122/// );
123/// assert_eq!(
124///     "SpinosoSymbol".parse::<IdentifierType>(),
125///     Ok(IdentifierType::Constant)
126/// );
127/// assert_eq!(
128///     "SPINOSO_SYMBOL_FEATURES".parse::<IdentifierType>(),
129///     Ok(IdentifierType::Constant)
130/// );
131/// ```
132///
133/// # Examples – global
134///
135/// ```
136/// # use spinoso_symbol::IdentifierType;
137/// assert_eq!(
138///     "$use_spinoso_symbol".parse::<IdentifierType>(),
139///     Ok(IdentifierType::Global)
140/// );
141/// assert_eq!(
142///     "$USE_SPINOSO_SYMBOL".parse::<IdentifierType>(),
143///     Ok(IdentifierType::Global)
144/// );
145/// ```
146///
147/// # Examples – instance and class variables
148///
149/// ```
150/// # use spinoso_symbol::IdentifierType;
151/// assert_eq!(
152///     "@artichoke".parse::<IdentifierType>(),
153///     Ok(IdentifierType::Instance)
154/// );
155/// assert_eq!(
156///     "@@rumble".parse::<IdentifierType>(),
157///     Ok(IdentifierType::Class)
158/// );
159/// ```
160///
161/// # Example – attribute setter
162///
163/// Attribute setters are local idents that end in `=`.
164///
165/// ```
166/// # use spinoso_symbol::IdentifierType;
167/// assert_eq!(
168///     "artichoke=".parse::<IdentifierType>(),
169///     Ok(IdentifierType::AttrSet)
170/// );
171/// assert_eq!(
172///     "spinoso_symbol=".parse::<IdentifierType>(),
173///     Ok(IdentifierType::AttrSet)
174/// );
175/// ```
176///
177/// [`Inspect`]: crate::Inspect
178#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
179pub enum IdentifierType {
180    /// Identifier that contains "junk".
181    ///
182    /// Junk mostly equates to non-sigil ASCII symbols. Identifiers like
183    /// `empty?` and `flatten!` are junk idents. All special symbolic Ruby
184    /// methods like `<=>` and `!~` are junk identifiers.
185    ///
186    /// # Examples
187    ///
188    /// ```
189    /// # use spinoso_symbol::IdentifierType;
190    /// assert_eq!("empty?".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
191    /// assert_eq!(
192    ///     "flatten!".parse::<IdentifierType>(),
193    ///     Ok(IdentifierType::Junk)
194    /// );
195    /// assert_eq!("<=>".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
196    /// assert_eq!("!~".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
197    /// assert_eq!("[]".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
198    /// assert_eq!("[]=".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
199    /// assert_eq!("=~".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
200    /// assert_eq!("==".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
201    /// assert_eq!("===".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
202    /// ```
203    Junk,
204    /// Identifier that is a global variable name.
205    ///
206    /// Global variables are prefixed with the sigil `$`. There are two types of
207    /// global variables:
208    ///
209    /// - `$` followed by a `IdentifierType::Ident` sequence.
210    /// - Special global variables, which include `Regexp` globals (`$1`..`$9`)
211    ///   and `$-w` type globals.
212    ///
213    /// # Examples
214    ///
215    /// ```
216    /// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
217    /// assert_eq!(
218    ///     "$".parse::<IdentifierType>(),
219    ///     Err(ParseIdentifierError::new())
220    /// );
221    /// assert_eq!("$foo".parse::<IdentifierType>(), Ok(IdentifierType::Global));
222    /// assert_eq!(
223    ///     "$@foo".parse::<IdentifierType>(),
224    ///     Err(ParseIdentifierError::new())
225    /// );
226    /// assert_eq!("$0".parse::<IdentifierType>(), Ok(IdentifierType::Global));
227    /// assert_eq!("$1".parse::<IdentifierType>(), Ok(IdentifierType::Global));
228    /// assert_eq!("$9".parse::<IdentifierType>(), Ok(IdentifierType::Global));
229    /// assert_eq!("$-w".parse::<IdentifierType>(), Ok(IdentifierType::Global));
230    /// assert_eq!(
231    ///     "$-www".parse::<IdentifierType>(),
232    ///     Err(ParseIdentifierError::new())
233    /// );
234    /// ```
235    Global,
236    /// Identifier that is an instance variable name.
237    ///
238    /// Instance variables are prefixed with a single `@` sigil. The remaining
239    /// bytes must be a valid [`Constant`] or [`Local`] ident.
240    ///
241    /// # Examples
242    ///
243    /// ```
244    /// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
245    /// assert_eq!(
246    ///     "@".parse::<IdentifierType>(),
247    ///     Err(ParseIdentifierError::new())
248    /// );
249    /// assert_eq!(
250    ///     "@foo".parse::<IdentifierType>(),
251    ///     Ok(IdentifierType::Instance)
252    /// );
253    /// assert_eq!(
254    ///     "@Foo".parse::<IdentifierType>(),
255    ///     Ok(IdentifierType::Instance)
256    /// );
257    /// assert_eq!(
258    ///     "@FOO".parse::<IdentifierType>(),
259    ///     Ok(IdentifierType::Instance)
260    /// );
261    /// assert_eq!(
262    ///     "@foo_bar".parse::<IdentifierType>(),
263    ///     Ok(IdentifierType::Instance)
264    /// );
265    /// assert_eq!(
266    ///     "@FooBar".parse::<IdentifierType>(),
267    ///     Ok(IdentifierType::Instance)
268    /// );
269    /// assert_eq!(
270    ///     "@FOO_BAR".parse::<IdentifierType>(),
271    ///     Ok(IdentifierType::Instance)
272    /// );
273    /// assert_eq!(
274    ///     "@$foo".parse::<IdentifierType>(),
275    ///     Err(ParseIdentifierError::new())
276    /// );
277    /// assert_eq!(
278    ///     "@0".parse::<IdentifierType>(),
279    ///     Err(ParseIdentifierError::new())
280    /// );
281    /// assert_eq!(
282    ///     "@1".parse::<IdentifierType>(),
283    ///     Err(ParseIdentifierError::new())
284    /// );
285    /// assert_eq!(
286    ///     "@9".parse::<IdentifierType>(),
287    ///     Err(ParseIdentifierError::new())
288    /// );
289    /// ```
290    ///
291    /// [`Constant`]: Self::Constant
292    /// [`Local`]: Self::Local
293    Instance,
294    /// Identifier that is a class variable name.
295    ///
296    /// Class variables are prefixed with a double `@@` sigil. The remaining
297    /// bytes must be a valid [`Constant`] or [`Local`] ident.
298    ///
299    /// # Examples
300    ///
301    /// ```
302    /// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
303    /// assert_eq!(
304    ///     "@@".parse::<IdentifierType>(),
305    ///     Err(ParseIdentifierError::new())
306    /// );
307    /// assert_eq!("@@foo".parse::<IdentifierType>(), Ok(IdentifierType::Class));
308    /// assert_eq!("@@Foo".parse::<IdentifierType>(), Ok(IdentifierType::Class));
309    /// assert_eq!("@@FOO".parse::<IdentifierType>(), Ok(IdentifierType::Class));
310    /// assert_eq!(
311    ///     "@@foo_bar".parse::<IdentifierType>(),
312    ///     Ok(IdentifierType::Class)
313    /// );
314    /// assert_eq!(
315    ///     "@@FooBar".parse::<IdentifierType>(),
316    ///     Ok(IdentifierType::Class)
317    /// );
318    /// assert_eq!(
319    ///     "@@FOO_BAR".parse::<IdentifierType>(),
320    ///     Ok(IdentifierType::Class)
321    /// );
322    /// assert_eq!(
323    ///     "@@$foo".parse::<IdentifierType>(),
324    ///     Err(ParseIdentifierError::new())
325    /// );
326    /// assert_eq!(
327    ///     "@@0".parse::<IdentifierType>(),
328    ///     Err(ParseIdentifierError::new())
329    /// );
330    /// assert_eq!(
331    ///     "@@1".parse::<IdentifierType>(),
332    ///     Err(ParseIdentifierError::new())
333    /// );
334    /// assert_eq!(
335    ///     "@@9".parse::<IdentifierType>(),
336    ///     Err(ParseIdentifierError::new())
337    /// );
338    /// ```
339    ///
340    /// [`Constant`]: Self::Constant
341    /// [`Local`]: Self::Local
342    Class,
343    /// Identifier that is an "attribute setter" method name.
344    ///
345    /// `AttrSet` idents end in the `=` sigil and are otherwise valid [`Local`]
346    /// or [`Constant`] idents. `AttrSet` idents cannot have any other "junk"
347    /// symbols.
348    ///
349    /// # Examples
350    ///
351    /// ```
352    /// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
353    /// assert_eq!(
354    ///     "Foo=".parse::<IdentifierType>(),
355    ///     Ok(IdentifierType::AttrSet)
356    /// );
357    /// assert_eq!(
358    ///     "foo=".parse::<IdentifierType>(),
359    ///     Ok(IdentifierType::AttrSet)
360    /// );
361    /// assert_eq!(
362    ///     "foo_bar=".parse::<IdentifierType>(),
363    ///     Ok(IdentifierType::AttrSet)
364    /// );
365    /// assert_eq!(
366    ///     "foo_bar?=".parse::<IdentifierType>(),
367    ///     Err(ParseIdentifierError::new())
368    /// );
369    /// assert_eq!("ω=".parse::<IdentifierType>(), Ok(IdentifierType::AttrSet));
370    /// ```
371    ///
372    /// [`Constant`]: Self::Constant
373    /// [`Local`]: Self::Local
374    AttrSet,
375    /// Identifier that is a constant name.
376    ///
377    /// Constant names can be either ASCII or Unicode and must start with a
378    /// uppercase character.
379    ///
380    /// # Examples
381    ///
382    /// ```
383    /// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
384    /// assert_eq!(
385    ///     "Foo".parse::<IdentifierType>(),
386    ///     Ok(IdentifierType::Constant)
387    /// );
388    /// assert_eq!(
389    ///     "FOO".parse::<IdentifierType>(),
390    ///     Ok(IdentifierType::Constant)
391    /// );
392    /// assert_eq!(
393    ///     "FooBar".parse::<IdentifierType>(),
394    ///     Ok(IdentifierType::Constant)
395    /// );
396    /// assert_eq!(
397    ///     "FOO_BAR".parse::<IdentifierType>(),
398    ///     Ok(IdentifierType::Constant)
399    /// );
400    /// assert_eq!("Ω".parse::<IdentifierType>(), Ok(IdentifierType::Constant));
401    /// ```
402    Constant,
403    /// Identifier that is a local variable or method name.
404    ///
405    /// Local names can be either ASCII or Unicode and must start with a
406    /// lowercase character.
407    ///
408    /// # Examples
409    ///
410    /// ```
411    /// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
412    /// assert_eq!("foo".parse::<IdentifierType>(), Ok(IdentifierType::Local));
413    /// assert_eq!("fOO".parse::<IdentifierType>(), Ok(IdentifierType::Local));
414    /// assert_eq!(
415    ///     "fooBar".parse::<IdentifierType>(),
416    ///     Ok(IdentifierType::Local)
417    /// );
418    /// assert_eq!(
419    ///     "foo_bar".parse::<IdentifierType>(),
420    ///     Ok(IdentifierType::Local)
421    /// );
422    /// assert_eq!("ω".parse::<IdentifierType>(), Ok(IdentifierType::Local));
423    /// ```
424    Local,
425}
426
427impl IdentifierType {
428    /// Return a new, default `IdentifierType`.
429    ///
430    /// Prefer to use `new()` over `default()` since `new()` is const.
431    ///
432    /// # Examples
433    ///
434    /// ```
435    /// # use spinoso_symbol::IdentifierType;
436    /// const ID_TYPE: IdentifierType = IdentifierType::new();
437    /// assert_eq!(ID_TYPE, IdentifierType::Junk);
438    /// assert_eq!(ID_TYPE, IdentifierType::default());
439    /// ```
440    #[must_use]
441    pub const fn new() -> Self {
442        Self::Junk
443    }
444}
445
446impl Default for IdentifierType {
447    /// Construct a "junk" identifier type.
448    ///
449    /// # Examples
450    ///
451    /// ```
452    /// # use spinoso_symbol::IdentifierType;
453    /// const ID_TYPE: IdentifierType = IdentifierType::new();
454    /// assert_eq!(ID_TYPE, IdentifierType::Junk);
455    /// assert_eq!(ID_TYPE, IdentifierType::default());
456    /// ```
457    #[inline]
458    fn default() -> Self {
459        Self::new()
460    }
461}
462
463impl FromStr for IdentifierType {
464    type Err = ParseIdentifierError;
465
466    #[inline]
467    fn from_str(s: &str) -> Result<Self, Self::Err> {
468        parse(s.as_bytes()).ok_or_else(ParseIdentifierError::new)
469    }
470}
471
472impl TryFrom<&str> for IdentifierType {
473    type Error = ParseIdentifierError;
474
475    #[inline]
476    fn try_from(value: &str) -> Result<Self, Self::Error> {
477        parse(value.as_bytes()).ok_or_else(ParseIdentifierError::new)
478    }
479}
480
481impl TryFrom<&[u8]> for IdentifierType {
482    type Error = ParseIdentifierError;
483
484    #[inline]
485    fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
486        parse(value).ok_or_else(ParseIdentifierError::new)
487    }
488}
489
490/// Error type returned from the [`FromStr`] implementation on [`IdentifierType`].
491///
492/// # Examples
493///
494/// ```
495/// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
496/// const ERR: ParseIdentifierError = ParseIdentifierError::new();
497/// assert_eq!("not a valid ident".parse::<IdentifierType>(), Err(ERR));
498/// ```
499#[derive(Default, Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
500pub struct ParseIdentifierError {
501    _private: (),
502}
503
504impl ParseIdentifierError {
505    /// Construct a new `ParseIdentifierError`.
506    ///
507    /// Prefer to use `new()` over `default()` since `new()` is const.
508    ///
509    /// # Examples
510    ///
511    /// ```
512    /// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
513    /// const ERR: ParseIdentifierError = ParseIdentifierError::new();
514    /// assert_eq!("not a valid ident".parse::<IdentifierType>(), Err(ERR));
515    /// assert_eq!(ERR, ParseIdentifierError::default());
516    /// ```
517    #[must_use]
518    pub const fn new() -> Self {
519        Self { _private: () }
520    }
521}
522
523impl fmt::Display for ParseIdentifierError {
524    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
525        f.write_str("Failed to parse given string as a known identifier type")
526    }
527}
528
529#[inline]
530fn parse(name: &[u8]) -> Option<IdentifierType> {
531    match name {
532        [] | [b'\0'] => None,
533        // special global variable
534        [b'$', name @ ..] if is_special_global_name(name) => Some(IdentifierType::Global),
535        // global variable
536        [b'$', name @ ..] => parse_ident(name, IdentifierType::Global),
537        // class variable
538        [b'@', b'@', name @ ..] => parse_ident(name, IdentifierType::Class),
539        // instance variable
540        [b'@', name @ ..] => parse_ident(name, IdentifierType::Instance),
541        // Symbolic method names
542        name if is_symbolic_method_name(name) => Some(IdentifierType::Junk),
543        [b'=' | b'!' | b'[', ..] => None,
544        [first, ..] if *first != b'_' && first.is_ascii() && !first.is_ascii_alphabetic() => None,
545        // Constant name
546        name if is_const_name(name) => parse_ident(name, IdentifierType::Constant),
547        // Local variable
548        name => parse_ident(name, IdentifierType::Local),
549    }
550}
551
552#[inline]
553fn parse_ident(name: &[u8], id_type: IdentifierType) -> Option<IdentifierType> {
554    match name {
555        [] => None,
556        [first, name @ .., b'='] if *first != b'_' && first.is_ascii() && !first.is_ascii_alphabetic() => {
557            if let None | Some(IdentifierType::AttrSet) = parse_ident(name, id_type) {
558                None
559            } else {
560                Some(id_type)
561            }
562        }
563        [first, ..] if *first != b'_' && first.is_ascii() && !first.is_ascii_alphabetic() => None,
564        name if is_ident_until(name).is_none() => Some(id_type),
565        [name @ .., b'!' | b'?'] if is_ident_until(name).is_none() => {
566            if matches!(
567                id_type,
568                IdentifierType::Global | IdentifierType::Class | IdentifierType::Instance
569            ) {
570                return None;
571            }
572            Some(IdentifierType::Junk)
573        }
574        [name @ .., b'='] if is_ident_until(name).is_none() => {
575            if matches!(id_type, IdentifierType::Local | IdentifierType::Constant) {
576                return Some(IdentifierType::AttrSet);
577            }
578            None
579        }
580        _ => None,
581    }
582}
583
584#[inline]
585#[expect(clippy::match_same_arms, reason = "for clarity")]
586fn is_special_global_name(name: &[u8]) -> bool {
587    match name {
588        [] => false,
589        [first, rest @ ..] if is_special_global_punct(*first) => rest.is_empty(),
590        [b'-'] => false,
591        [b'-', rest @ ..] if is_next_ident_exhausting(rest) => true,
592        [b'-', ..] => false,
593        name => name.chars().all(char::is_numeric),
594    }
595}
596
597/// Return whether the input is a "junk" symbolic method name.
598///
599/// There are fixed number of valid Ruby method names that only contain ASCII
600/// symbols.
601#[inline]
602fn is_symbolic_method_name(name: &[u8]) -> bool {
603    matches!(
604        name,
605        b"<" | b"<<"
606            | b"<="
607            | b"<=>"
608            | b">"
609            | b">>"
610            | b">="
611            | b"=~"
612            | b"=="
613            | b"==="
614            | b"*"
615            | b"**"
616            | b"+"
617            | b"-"
618            | b"+@"
619            | b"-@"
620            | b"|"
621            | b"^"
622            | b"&"
623            | b"/"
624            | b"%"
625            | b"~"
626            | b"`"
627            | b"[]"
628            | b"[]="
629            | b"!"
630            | b"!="
631            | b"!~"
632    )
633}
634
635/// Return whether the input is a valid constant name.
636///
637/// Constant names require the first character to be either ASCII or Unicode
638/// uppercase.
639#[inline]
640fn is_const_name(name: &[u8]) -> bool {
641    match name {
642        [] => false,
643        name if name.is_ascii() => name.iter().next().is_some_and(u8::is_ascii_uppercase),
644        // uses Unicode `Uppercase` property
645        name if name.is_utf8() => name.chars().next().is_some_and(char::is_uppercase),
646        _ => false,
647    }
648}
649
650/// Determine if a [`char`] can be used in a valid identifier.
651///
652/// # Header declaration
653///
654/// Ported from the following C macro in `string.c`:
655///
656/// ```c
657/// #define is_identchar(p,e,enc) (ISALNUM((unsigned char)*(p)) || (*(p)) == '_' || !ISASCII(*(p)))
658/// ```
659#[inline]
660fn is_ident_char(ch: char) -> bool {
661    ch.is_alphanumeric() || ch == '_' || !ch.is_ascii()
662}
663
664/// Consume the input until a non-ident character is found.
665///
666/// Scan the [`char`]s in the input until either invalid UTF-8 or an invalid
667/// ident is found. See [`is_ident_char`].
668///
669/// This method returns `Some(index)` of the start of the first invalid ident
670/// or `None` if the whole input is a valid ident.
671///
672/// Empty slices are not valid idents.
673#[inline]
674fn is_ident_until(mut name: &[u8]) -> Option<usize> {
675    // Empty strings are not idents.
676    if name.is_empty() {
677        return Some(0);
678    }
679    let mut start = 0;
680    while !name.is_empty() {
681        let (ch, size) = bstr::decode_utf8(name);
682        match ch {
683            Some(ch) if !is_ident_char(ch) => return Some(start),
684            None => return Some(start),
685            Some(_) => {
686                name = &name[size..];
687                start += size;
688            }
689        }
690    }
691    None
692}
693
694/// Determine if the next char is a valid ident char and consumes all bytes in
695/// the input.
696///
697/// This function is used to determine whether certain kinds of single character
698/// globals are valid idents.
699///
700/// See also [`is_ident_char`].
701#[inline]
702fn is_next_ident_exhausting(name: &[u8]) -> bool {
703    let (ch, size) = bstr::decode_utf8(name);
704    match ch {
705        Some(ch) if is_ident_char(ch) => name.len() == size,
706        Some(_) | None => false,
707    }
708}
709
710// This function is defined by a macro in `parse.y` in MRI.
711//
712// ```c
713// #define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
714// #define SPECIAL_PUNCT(idx) ( \
715// 	BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
716// 	BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
717// 	BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
718// 	BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
719// 	BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
720// 	BIT('0', idx))
721// const unsigned int ruby_global_name_punct_bits[] = {
722//     SPECIAL_PUNCT(0),
723//     SPECIAL_PUNCT(1),
724//     SPECIAL_PUNCT(2),
725// };
726// ```
727//
728// The contents of `ruby_global_name_punct_bits` are:
729//
730// ```console
731// [2.6.6] > def bit(c, idx); c / 32 - 1 == idx ? 1 << (c % 32) : 0; end
732// [2.6.6] > chars = ["~", "*", "$", "?", "!", "@", "/", "\\", ";", ",", ".", "=", ":", "<", ">", "\"", "&", "`", "'", "+", "0"]
733//
734// [2.6.6] > chars.map(&:ord).map { |ch| bit(ch, 0) }.reduce(0, :|)
735// => 4227980502
736// [2.6.6] > chars.map(&:ord).map { |ch| bit(ch, 1) }.reduce(0, :|)
737// => 268435457
738// [2.6.6] > chars.map(&:ord).map { |ch| bit(ch, 2) }.reduce(0, :|)
739// => 1073741825
740// ```
741//
742// Which corresponds to a fixed set of 21 ASCII symbols:
743//
744// ```ruby
745// def is_special_global_punct(ch)
746//   idx = (ch - 0x20) / 32;
747//   case idx
748//   when 0 then (4_227_980_502 >> (ch % 32)) & 1 > 0
749//   when 1 then (268_435_457 >> (ch % 32)) & 1 > 0
750//   when 2 then (1_073_741_825 >> (ch % 32)) & 1 > 0
751//   else
752//     false
753//   end
754// end
755//
756// h = {}
757// (0..255).each do |ch|
758//   h[ch.chr] = ch if is_special_global_punct(ch)
759// end
760// h.keys.map {|k| "b'#{k.inspect[1..-2]}'"}.join(" | ")
761// ```
762//
763// TODO: Switch to generating this table inside the const function once const
764// functions are expressive enough. This requires const `match`, `if`, and loop
765// which will be stable in Rust 1.46.0.
766#[inline]
767fn is_special_global_punct(ch: u8) -> bool {
768    matches!(
769        ch,
770        b'!' | b'"'
771            | b'$'
772            | b'&'
773            | b'\''
774            | b'*'
775            | b'+'
776            | b','
777            | b'.'
778            | b'/'
779            | b'0'
780            | b':'
781            | b';'
782            | b'<'
783            | b'='
784            | b'>'
785            | b'?'
786            | b'@'
787            | b'\\'
788            | b'`'
789            | b'~'
790    )
791}
792
793#[cfg(test)]
794mod tests {
795    use super::{
796        IdentifierType, ParseIdentifierError, is_ident_until, is_next_ident_exhausting, is_special_global_name,
797    };
798
799    #[test]
800    fn special_global_name() {
801        let name = &b"a"[..];
802        assert!(!is_special_global_name(name));
803        let name = "💎";
804        assert!(!is_special_global_name(name.as_bytes()));
805        let name = &b"ab"[..];
806        assert!(!is_special_global_name(name));
807        let name = "-💎";
808        assert!(is_special_global_name(name.as_bytes()));
809        let name = &b"$"[..];
810        assert!(is_special_global_name(name));
811        let name = &b"~"[..];
812        assert!(is_special_global_name(name));
813        let name = "�";
814        assert!(!is_special_global_name(name.as_bytes()));
815        let name = "-�";
816        assert!(is_special_global_name(name.as_bytes()));
817    }
818
819    #[test]
820    fn is_ident_until_empty() {
821        let name = &[];
822        assert_eq!(is_ident_until(name), Some(0));
823    }
824
825    #[test]
826    fn is_ident_until_lowercase_ascii() {
827        let name = &b"abc"[..];
828        assert_eq!(is_ident_until(name), None);
829        let name = &b"abc_123"[..];
830        assert_eq!(is_ident_until(name), None);
831        let name = &b"_"[..];
832        assert_eq!(is_ident_until(name), None);
833        let name = &b"_e"[..];
834        assert_eq!(is_ident_until(name), None);
835        let name = &b"_1"[..];
836        assert_eq!(is_ident_until(name), None);
837    }
838
839    #[test]
840    fn is_ident_until_ascii_constant() {
841        let name = &b"Abc"[..];
842        assert_eq!(is_ident_until(name), None);
843        let name = &b"ABC"[..];
844        assert_eq!(is_ident_until(name), None);
845        let name = &b"ABC_XYZ"[..];
846        assert_eq!(is_ident_until(name), None);
847        let name = &b"ABC_123"[..];
848        assert_eq!(is_ident_until(name), None);
849        let name = &b"HTTP2"[..];
850        assert_eq!(is_ident_until(name), None);
851    }
852
853    #[test]
854    fn is_ident_until_unicode() {
855        let name = "ábc";
856        assert_eq!(is_ident_until(name.as_bytes()), None);
857        let name = "abç";
858        assert_eq!(is_ident_until(name.as_bytes()), None);
859        let name = "abc_�";
860        assert_eq!(is_ident_until(name.as_bytes()), None);
861        let name = "abc_💎";
862        assert_eq!(is_ident_until(name.as_bytes()), None);
863
864        let name = "Ábc";
865        assert_eq!(is_ident_until(name.as_bytes()), None);
866        let name = "Abç";
867        assert_eq!(is_ident_until(name.as_bytes()), None);
868        let name = "Abc_�";
869        assert_eq!(is_ident_until(name.as_bytes()), None);
870        let name = "Abc_💎";
871        assert_eq!(is_ident_until(name.as_bytes()), None);
872
873        let name = "💎";
874        assert_eq!(is_ident_until(name.as_bytes()), None);
875        let name = "💎abc";
876        assert_eq!(is_ident_until(name.as_bytes()), None);
877    }
878
879    #[test]
880    fn is_ident_until_invalid_utf8() {
881        let name = &b"\xFF"[..];
882        assert_eq!(is_ident_until(name), Some(0));
883        let name = &b"abc\xFF"[..];
884        assert_eq!(is_ident_until(name), Some(3));
885        let name = &b"abc\xFFxyz"[..];
886        assert_eq!(is_ident_until(name), Some(3));
887
888        let name = &b"\xFF\xFE"[..];
889        assert_eq!(is_ident_until(name), Some(0));
890        let name = &b"abc\xFF\xFE"[..];
891        assert_eq!(is_ident_until(name), Some(3));
892        let name = &b"abc\xFF\xFExyz"[..];
893        assert_eq!(is_ident_until(name), Some(3));
894
895        let name = &b"\xEF\xBF\xBD\xFF"[..];
896        assert_eq!(is_ident_until(name), Some(3));
897        let name = &b"\xF0\x9F\x92\x8E\xFF"[..];
898        assert_eq!(is_ident_until(name), Some(4));
899    }
900
901    #[test]
902    fn is_next_ident_exhausting_empty() {
903        let name = &[];
904        assert!(!is_next_ident_exhausting(name));
905    }
906
907    #[test]
908    fn is_next_ident_exhausting_lowercase_ascii() {
909        let name = &b"a"[..];
910        assert!(is_next_ident_exhausting(name));
911        let name = &b"abc"[..];
912        assert!(!is_next_ident_exhausting(name));
913        let name = &b"1"[..];
914        assert!(is_next_ident_exhausting(name));
915        let name = &b"abc_123"[..];
916        assert!(!is_next_ident_exhausting(name));
917        let name = &b"_"[..];
918        assert!(is_next_ident_exhausting(name));
919        let name = &b"_e"[..];
920        assert!(!is_next_ident_exhausting(name));
921        let name = &b"_1"[..];
922        assert!(!is_next_ident_exhausting(name));
923    }
924
925    #[test]
926    fn is_next_ident_exhausting_ascii_constant() {
927        let name = &b"A"[..];
928        assert!(is_next_ident_exhausting(name));
929        let name = &b"Abc"[..];
930        assert!(!is_next_ident_exhausting(name));
931        let name = &b"ABC"[..];
932        assert!(!is_next_ident_exhausting(name));
933        let name = &b"ABC_XYZ"[..];
934        assert!(!is_next_ident_exhausting(name));
935        let name = &b"ABC_123"[..];
936        assert!(!is_next_ident_exhausting(name));
937        let name = &b"HTTP2"[..];
938        assert!(!is_next_ident_exhausting(name));
939    }
940
941    #[test]
942    fn is_next_ident_exhausting_unicode() {
943        let name = "ábc";
944        assert!(!is_next_ident_exhausting(name.as_bytes()));
945        let name = "abç";
946        assert!(!is_next_ident_exhausting(name.as_bytes()));
947        let name = "abc_�";
948        assert!(!is_next_ident_exhausting(name.as_bytes()));
949        let name = "abc_💎";
950        assert!(!is_next_ident_exhausting(name.as_bytes()));
951
952        let name = "Ábc";
953        assert!(!is_next_ident_exhausting(name.as_bytes()));
954        let name = "Abç";
955        assert!(!is_next_ident_exhausting(name.as_bytes()));
956        let name = "Abc_�";
957        assert!(!is_next_ident_exhausting(name.as_bytes()));
958        let name = "Abc_💎";
959        assert!(!is_next_ident_exhausting(name.as_bytes()));
960        let name = "💎abc";
961        assert!(!is_next_ident_exhausting(name.as_bytes()));
962
963        let name = "á";
964        assert!(is_next_ident_exhausting(name.as_bytes()));
965        let name = "ç";
966        assert!(is_next_ident_exhausting(name.as_bytes()));
967        let name = "�";
968        assert!(is_next_ident_exhausting(name.as_bytes()));
969        let name = "💎";
970        assert!(is_next_ident_exhausting(name.as_bytes()));
971    }
972
973    #[test]
974    fn is_next_ident_exhausting_invalid_utf8() {
975        let name = &b"\xFF"[..];
976        assert!(!is_next_ident_exhausting(name));
977        let name = &b"abc\xFF"[..];
978        assert!(!is_next_ident_exhausting(name));
979        let name = &b"abc\xFFxyz"[..];
980        assert!(!is_next_ident_exhausting(name));
981
982        let name = &b"\xFF\xFE"[..];
983        assert!(!is_next_ident_exhausting(name));
984        let name = &b"abc\xFF\xFE"[..];
985        assert!(!is_next_ident_exhausting(name));
986        let name = &b"abc\xFF\xFExyz"[..];
987        assert!(!is_next_ident_exhausting(name));
988
989        let name = &b"\xEF\xBF\xBD\xFF"[..];
990        assert!(!is_next_ident_exhausting(name));
991        let name = &b"\xF0\x9F\x92\x8E\xFF"[..];
992        assert!(!is_next_ident_exhausting(name));
993    }
994
995    #[test]
996    fn ascii_ident() {
997        assert_eq!("foobar".parse::<IdentifierType>(), Ok(IdentifierType::Local));
998        assert_eq!("ruby_is_simple".parse::<IdentifierType>(), Ok(IdentifierType::Local));
999    }
1000
1001    #[test]
1002    fn ascii_constant() {
1003        assert_eq!("Foobar".parse::<IdentifierType>(), Ok(IdentifierType::Constant));
1004        assert_eq!("FooBar".parse::<IdentifierType>(), Ok(IdentifierType::Constant));
1005        assert_eq!("FOOBAR".parse::<IdentifierType>(), Ok(IdentifierType::Constant));
1006        assert_eq!("FOO_BAR".parse::<IdentifierType>(), Ok(IdentifierType::Constant));
1007        assert_eq!("RUBY_IS_SIMPLE".parse::<IdentifierType>(), Ok(IdentifierType::Constant));
1008    }
1009
1010    #[test]
1011    fn empty() {
1012        assert_eq!("".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1013    }
1014
1015    #[test]
1016    fn single_nul() {
1017        assert_eq!("\0".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1018    }
1019
1020    #[test]
1021    fn non_ascii_numerics() {
1022        assert_eq!("١".parse::<IdentifierType>(), Ok(IdentifierType::Local));
1023        assert_eq!(
1024            "١١١١١١١١١١١١١١١١١١".parse::<IdentifierType>(),
1025            Ok(IdentifierType::Local)
1026        );
1027        assert_eq!("①".parse::<IdentifierType>(), Ok(IdentifierType::Local));
1028    }
1029
1030    #[test]
1031    fn recursive_ident() {
1032        assert_eq!("@@@foo".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1033        assert_eq!("@@@@foo".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1034        assert_eq!("@$foo".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1035        assert_eq!("@$-w".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1036        assert_eq!("@@$foo".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1037        assert_eq!("@@$-w".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1038        assert_eq!("$@foo".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1039        assert_eq!("$@@foo".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1040        assert_eq!("$$-w".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1041    }
1042
1043    #[test]
1044    fn attr_bang() {
1045        assert_eq!("@foo!".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1046        assert_eq!("@@foo!".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1047        assert_eq!("$foo!".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1048    }
1049
1050    #[test]
1051    fn attr_question() {
1052        assert_eq!("@foo?".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1053        assert_eq!("@@foo?".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1054        assert_eq!("$foo?".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1055    }
1056
1057    #[test]
1058    fn attr_setter() {
1059        assert_eq!("@foo=".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1060        assert_eq!("@@foo=".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1061        assert_eq!("$foo=".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1062    }
1063
1064    #[test]
1065    fn invalid_utf8() {
1066        assert_eq!(
1067            IdentifierType::try_from(&b"invalid-\xFF-utf8"[..]),
1068            Err(ParseIdentifierError::new())
1069        );
1070    }
1071
1072    #[test]
1073    fn emoji() {
1074        assert_eq!(IdentifierType::try_from("💎"), Ok(IdentifierType::Local));
1075        assert_eq!(IdentifierType::try_from("$💎"), Ok(IdentifierType::Global));
1076        assert_eq!(IdentifierType::try_from("@💎"), Ok(IdentifierType::Instance));
1077        assert_eq!(IdentifierType::try_from("@@💎"), Ok(IdentifierType::Class));
1078    }
1079
1080    #[test]
1081    fn unicode_replacement_char() {
1082        assert_eq!(IdentifierType::try_from("�"), Ok(IdentifierType::Local));
1083        assert_eq!(IdentifierType::try_from("$�"), Ok(IdentifierType::Global));
1084        assert_eq!(IdentifierType::try_from("@�"), Ok(IdentifierType::Instance));
1085        assert_eq!(IdentifierType::try_from("@@�"), Ok(IdentifierType::Class));
1086
1087        assert_eq!(IdentifierType::try_from("abc�"), Ok(IdentifierType::Local));
1088        assert_eq!(IdentifierType::try_from("$abc�"), Ok(IdentifierType::Global));
1089        assert_eq!(IdentifierType::try_from("@abc�"), Ok(IdentifierType::Instance));
1090        assert_eq!(IdentifierType::try_from("@@abc�"), Ok(IdentifierType::Class));
1091    }
1092
1093    #[test]
1094    fn invalid_utf8_special_global() {
1095        assert_eq!(
1096            IdentifierType::try_from(&b"$-\xFF"[..]),
1097            Err(ParseIdentifierError::new())
1098        );
1099    }
1100
1101    #[test]
1102    fn replacement_char_special_global() {
1103        assert_eq!(IdentifierType::try_from("$-�"), Ok(IdentifierType::Global));
1104        assert_eq!(IdentifierType::try_from("$-�a"), Err(ParseIdentifierError::new()));
1105        assert_eq!(IdentifierType::try_from("$-��"), Err(ParseIdentifierError::new()));
1106    }
1107}
1108
1109#[cfg(test)]
1110mod specs {
1111    use super::IdentifierType;
1112
1113    // From `spec/core/symbol/inspect_spec.rb`:
1114    //
1115    // ```ruby
1116    // symbols = {
1117    //   fred:         ":fred",
1118    //   :fred?     => ":fred?",
1119    //   :fred!     => ":fred!",
1120    //   :$ruby     => ":$ruby",
1121    //   :@ruby     => ":@ruby",
1122    //   :@@ruby    => ":@@ruby",
1123    //   :"$ruby!"  => ":\"$ruby!\"",
1124    //   :"$ruby?"  => ":\"$ruby?\"",
1125    //   :"@ruby!"  => ":\"@ruby!\"",
1126    //   :"@ruby?"  => ":\"@ruby?\"",
1127    //   :"@@ruby!" => ":\"@@ruby!\"",
1128    //   :"@@ruby?" => ":\"@@ruby?\"",
1129    //
1130    //   :$-w       => ":$-w",
1131    //   :"$-ww"    => ":\"$-ww\"",
1132    //   :"$+"      => ":$+",
1133    //   :"$~"      => ":$~",
1134    //   :"$:"      => ":$:",
1135    //   :"$?"      => ":$?",
1136    //   :"$<"      => ":$<",
1137    //   :"$_"      => ":$_",
1138    //   :"$/"      => ":$/",
1139    //   :"$'"      => ":$'",
1140    //   :"$\""     => ":$\"",
1141    //   :"$$"      => ":$$",
1142    //   :"$."      => ":$.",
1143    //   :"$,"      => ":$,",
1144    //   :"$`"      => ":$`",
1145    //   :"$!"      => ":$!",
1146    //   :"$;"      => ":$;",
1147    //   :"$\\"     => ":$\\",
1148    //   :"$="      => ":$=",
1149    //   :"$*"      => ":$*",
1150    //   :"$>"      => ":$>",
1151    //   :"$&"      => ":$&",
1152    //   :"$@"      => ":$@",
1153    //   :"$1234"   => ":$1234",
1154    //
1155    //   :-@        => ":-@",
1156    //   :+@        => ":+@",
1157    //   :%         => ":%",
1158    //   :&         => ":&",
1159    //   :*         => ":*",
1160    //   :**        => ":**",
1161    //   :"/"       => ":/",     # lhs quoted for emacs happiness
1162    //   :<         => ":<",
1163    //   :<=        => ":<=",
1164    //   :<=>       => ":<=>",
1165    //   :==        => ":==",
1166    //   :===       => ":===",
1167    //   :=~        => ":=~",
1168    //   :>         => ":>",
1169    //   :>=        => ":>=",
1170    //   :>>        => ":>>",
1171    //   :[]        => ":[]",
1172    //   :[]=       => ":[]=",
1173    //   :"\<\<"    => ":\<\<",
1174    //   :^         => ":^",
1175    //   :"`"       => ":`",     # for emacs, and justice!
1176    //   :~         => ":~",
1177    //   :|         => ":|",
1178    //
1179    //   :"!"       => [":\"!\"",  ":!" ],
1180    //   :"!="      => [":\"!=\"", ":!="],
1181    //   :"!~"      => [":\"!~\"", ":!~"],
1182    //   :"\$"      => ":\"$\"", # for justice!
1183    //   :"&&"      => ":\"&&\"",
1184    //   :"'"       => ":\"\'\"",
1185    //   :","       => ":\",\"",
1186    //   :"."       => ":\".\"",
1187    //   :".."      => ":\"..\"",
1188    //   :"..."     => ":\"...\"",
1189    //   :":"       => ":\":\"",
1190    //   :"::"      => ":\"::\"",
1191    //   :";"       => ":\";\"",
1192    //   :"="       => ":\"=\"",
1193    //   :"=>"      => ":\"=>\"",
1194    //   :"\?"      => ":\"?\"", # rawr!
1195    //   :"@"       => ":\"@\"",
1196    //   :"||"      => ":\"||\"",
1197    //   :"|||"     => ":\"|||\"",
1198    //   :"++"      => ":\"++\"",
1199    //
1200    //   :"\""      => ":\"\\\"\"",
1201    //   :"\"\""    => ":\"\\\"\\\"\"",
1202    //
1203    //   :"9"       => ":\"9\"",
1204    //   :"foo bar" => ":\"foo bar\"",
1205    //   :"*foo"    => ":\"*foo\"",
1206    //   :"foo "    => ":\"foo \"",
1207    //   :" foo"    => ":\" foo\"",
1208    //   :" "       => ":\" \"",
1209    // }
1210    // ```
1211
1212    #[test]
1213    fn specs() {
1214        // idents
1215        assert!("fred".parse::<IdentifierType>().is_ok());
1216        assert!("fred?".parse::<IdentifierType>().is_ok());
1217        assert!("fred!".parse::<IdentifierType>().is_ok());
1218        assert!("$ruby".parse::<IdentifierType>().is_ok());
1219        assert!("@ruby".parse::<IdentifierType>().is_ok());
1220        assert!("@@ruby".parse::<IdentifierType>().is_ok());
1221
1222        // idents can't end in bang or question
1223        assert!("$ruby!".parse::<IdentifierType>().is_err());
1224        assert!("$ruby?".parse::<IdentifierType>().is_err());
1225        assert!("@ruby!".parse::<IdentifierType>().is_err());
1226        assert!("@ruby?".parse::<IdentifierType>().is_err());
1227        assert!("@@ruby!".parse::<IdentifierType>().is_err());
1228        assert!("@@ruby?".parse::<IdentifierType>().is_err());
1229
1230        // globals
1231        assert!("$-w".parse::<IdentifierType>().is_ok());
1232        assert!("$-ww".parse::<IdentifierType>().is_err());
1233        assert!("$+".parse::<IdentifierType>().is_ok());
1234        assert!("$~".parse::<IdentifierType>().is_ok());
1235        assert!("$:".parse::<IdentifierType>().is_ok());
1236        assert!("$?".parse::<IdentifierType>().is_ok());
1237        assert!("$<".parse::<IdentifierType>().is_ok());
1238        assert!("$_".parse::<IdentifierType>().is_ok());
1239        assert!("$/".parse::<IdentifierType>().is_ok());
1240        assert!("$\"".parse::<IdentifierType>().is_ok());
1241        assert!("$$".parse::<IdentifierType>().is_ok());
1242        assert!("$.".parse::<IdentifierType>().is_ok());
1243        assert!("$,".parse::<IdentifierType>().is_ok());
1244        assert!("$`".parse::<IdentifierType>().is_ok());
1245        assert!("$!".parse::<IdentifierType>().is_ok());
1246        assert!("$;".parse::<IdentifierType>().is_ok());
1247        assert!("$\\".parse::<IdentifierType>().is_ok());
1248        assert!("$=".parse::<IdentifierType>().is_ok());
1249        assert!("$*".parse::<IdentifierType>().is_ok());
1250        assert!("$>".parse::<IdentifierType>().is_ok());
1251        assert!("$&".parse::<IdentifierType>().is_ok());
1252        assert!("$@".parse::<IdentifierType>().is_ok());
1253        assert!("$1234".parse::<IdentifierType>().is_ok());
1254
1255        // symbolic methods
1256        assert!("-@".parse::<IdentifierType>().is_ok());
1257        assert!("+@".parse::<IdentifierType>().is_ok());
1258        assert!("%".parse::<IdentifierType>().is_ok());
1259        assert!("&".parse::<IdentifierType>().is_ok());
1260        assert!("*".parse::<IdentifierType>().is_ok());
1261        assert!("**".parse::<IdentifierType>().is_ok());
1262        assert!("/".parse::<IdentifierType>().is_ok());
1263        assert!("<".parse::<IdentifierType>().is_ok());
1264        assert!("<=".parse::<IdentifierType>().is_ok());
1265        assert!("<=>".parse::<IdentifierType>().is_ok());
1266        assert!("==".parse::<IdentifierType>().is_ok());
1267        assert!("===".parse::<IdentifierType>().is_ok());
1268        assert!("=~".parse::<IdentifierType>().is_ok());
1269        assert!(">".parse::<IdentifierType>().is_ok());
1270        assert!(">=".parse::<IdentifierType>().is_ok());
1271        assert!(">>".parse::<IdentifierType>().is_ok());
1272        assert!("[]".parse::<IdentifierType>().is_ok());
1273        assert!("[]=".parse::<IdentifierType>().is_ok());
1274        assert!("<<".parse::<IdentifierType>().is_ok());
1275        assert!("^".parse::<IdentifierType>().is_ok());
1276        assert!("`".parse::<IdentifierType>().is_ok());
1277        assert!("~".parse::<IdentifierType>().is_ok());
1278        assert!("|".parse::<IdentifierType>().is_ok());
1279
1280        // non-symbol symbolics
1281        assert!("!".parse::<IdentifierType>().is_ok());
1282        assert!("!=".parse::<IdentifierType>().is_ok());
1283        assert!("!~".parse::<IdentifierType>().is_ok());
1284        assert!("$".parse::<IdentifierType>().is_err());
1285        assert!("&&".parse::<IdentifierType>().is_err());
1286        assert!("'".parse::<IdentifierType>().is_err());
1287        assert!(",".parse::<IdentifierType>().is_err());
1288        assert!(".".parse::<IdentifierType>().is_err());
1289        assert!("..".parse::<IdentifierType>().is_err());
1290        assert!("...".parse::<IdentifierType>().is_err());
1291        assert!(":".parse::<IdentifierType>().is_err());
1292        assert!("::".parse::<IdentifierType>().is_err());
1293        assert!(";".parse::<IdentifierType>().is_err());
1294        assert!("=".parse::<IdentifierType>().is_err());
1295        assert!("=>".parse::<IdentifierType>().is_err());
1296        assert!("?".parse::<IdentifierType>().is_err());
1297        assert!("@".parse::<IdentifierType>().is_err());
1298        assert!("||".parse::<IdentifierType>().is_err());
1299        assert!("|||".parse::<IdentifierType>().is_err());
1300        assert!("++".parse::<IdentifierType>().is_err());
1301
1302        // quotes
1303        assert!(r#"""#.parse::<IdentifierType>().is_err());
1304        assert!(r#""""#.parse::<IdentifierType>().is_err());
1305
1306        assert!("9".parse::<IdentifierType>().is_err());
1307        assert!("foo bar".parse::<IdentifierType>().is_err());
1308        assert!("*foo".parse::<IdentifierType>().is_err());
1309        assert!("foo ".parse::<IdentifierType>().is_err());
1310        assert!(" foo".parse::<IdentifierType>().is_err());
1311        assert!(" ".parse::<IdentifierType>().is_err());
1312    }
1313}
1314
1315/// Tests generated from symbols loaded at MRI interpreter boot.
1316///
1317/// # Generation
1318///
1319/// ```shell
1320/// cat <<EOF | ruby --disable-gems --disable-did_you_mean
1321/// def boot_identifier_symbols
1322///   syms = Symbol.all_symbols.map(&:inspect)
1323///   # remove symbols that must be debug wrapped in quotes
1324///   syms = syms.reject { |s| s[0..1] == ':"' }
1325///
1326///   fixture = syms.map { |s| "r##\"#{s[1..]}\"##" }
1327///   puts fixture.join(",\n")
1328/// end
1329///
1330/// boot_identifier_symbols
1331/// EOF
1332/// ```
1333#[cfg(test)]
1334mod functionals {
1335    use super::IdentifierType;
1336    use crate::fixtures::IDENTS;
1337
1338    #[test]
1339    fn mri_symbol_idents() {
1340        for &sym in IDENTS {
1341            assert!(
1342                sym.parse::<IdentifierType>().is_ok(),
1343                "'{sym}' should parse as a valid identifier, but did not."
1344            );
1345        }
1346    }
1347}