spinoso_symbol/ident.rs
1//! Parser for classifying byte strings as Ruby identifiers.
2//!
3//! This module exposes a parser for determining if a sequence of bytes is a
4//! valid Ruby identifier. These routines also classify idents by type, for
5//! example, a local variable (`is_spinoso`), constant name (`SPINOSO_SYMBOL`),
6//! or class variable (`@@spinoso_symbol`).
7//!
8//! # Examples – local variable
9//!
10//! ```
11//! # use spinoso_symbol::IdentifierType;
12//! assert_eq!(
13//! "spinoso".parse::<IdentifierType>(),
14//! Ok(IdentifierType::Local)
15//! );
16//! assert_eq!(
17//! "spinoso_symbol_features".parse::<IdentifierType>(),
18//! Ok(IdentifierType::Local)
19//! );
20//! ```
21//!
22//! # Examples – constant
23//!
24//! ```
25//! # use spinoso_symbol::IdentifierType;
26//! assert_eq!(
27//! "Spinoso".parse::<IdentifierType>(),
28//! Ok(IdentifierType::Constant)
29//! );
30//! assert_eq!(
31//! "SpinosoSymbol".parse::<IdentifierType>(),
32//! Ok(IdentifierType::Constant)
33//! );
34//! assert_eq!(
35//! "SPINOSO_SYMBOL_FEATURES".parse::<IdentifierType>(),
36//! Ok(IdentifierType::Constant)
37//! );
38//! ```
39//!
40//! # Examples – global
41//!
42//! ```
43//! # use spinoso_symbol::IdentifierType;
44//! assert_eq!(
45//! "$use_spinoso_symbol".parse::<IdentifierType>(),
46//! Ok(IdentifierType::Global)
47//! );
48//! assert_eq!(
49//! "$USE_SPINOSO_SYMBOL".parse::<IdentifierType>(),
50//! Ok(IdentifierType::Global)
51//! );
52//! ```
53//!
54//! # Examples – instance and class variables
55//!
56//! ```
57//! # use spinoso_symbol::IdentifierType;
58//! assert_eq!(
59//! "@artichoke".parse::<IdentifierType>(),
60//! Ok(IdentifierType::Instance)
61//! );
62//! assert_eq!(
63//! "@@rumble".parse::<IdentifierType>(),
64//! Ok(IdentifierType::Class)
65//! );
66//! ```
67//!
68//! # Example – attribute setter
69//!
70//! Attribute setters are local idents that end in `=`.
71//!
72//! ```
73//! # use spinoso_symbol::IdentifierType;
74//! assert_eq!(
75//! "artichoke=".parse::<IdentifierType>(),
76//! Ok(IdentifierType::AttrSet)
77//! );
78//! assert_eq!(
79//! "spinoso_symbol=".parse::<IdentifierType>(),
80//! Ok(IdentifierType::AttrSet)
81//! );
82//! ```
83
84use core::fmt;
85use core::str::FromStr;
86
87use bstr::ByteSlice;
88
89/// Valid types for Ruby identifiers.
90///
91/// Spinoso symbol parses byte strings to determine if they are valid idents for
92/// the [`Inspect`] iterator (which requires the **inspect** Cargo feature to be
93/// enabled). Symbols that are valid idents do not get wrapped in `"` when
94/// generating their debug output.
95///
96/// See variant documentation for the set of ident types.
97///
98/// `IdentifierType`'s primary interface is through the [`TryFrom`] and
99/// [`FromStr`] conversion traits. Parsing `&str` and `&[u8]` is supported.
100///
101/// # Examples – local variable
102///
103/// ```
104/// # use spinoso_symbol::IdentifierType;
105/// assert_eq!(
106/// "spinoso".parse::<IdentifierType>(),
107/// Ok(IdentifierType::Local)
108/// );
109/// assert_eq!(
110/// "spinoso_symbol_features".parse::<IdentifierType>(),
111/// Ok(IdentifierType::Local)
112/// );
113/// ```
114///
115/// # Examples – constant
116///
117/// ```
118/// # use spinoso_symbol::IdentifierType;
119/// assert_eq!(
120/// "Spinoso".parse::<IdentifierType>(),
121/// Ok(IdentifierType::Constant)
122/// );
123/// assert_eq!(
124/// "SpinosoSymbol".parse::<IdentifierType>(),
125/// Ok(IdentifierType::Constant)
126/// );
127/// assert_eq!(
128/// "SPINOSO_SYMBOL_FEATURES".parse::<IdentifierType>(),
129/// Ok(IdentifierType::Constant)
130/// );
131/// ```
132///
133/// # Examples – global
134///
135/// ```
136/// # use spinoso_symbol::IdentifierType;
137/// assert_eq!(
138/// "$use_spinoso_symbol".parse::<IdentifierType>(),
139/// Ok(IdentifierType::Global)
140/// );
141/// assert_eq!(
142/// "$USE_SPINOSO_SYMBOL".parse::<IdentifierType>(),
143/// Ok(IdentifierType::Global)
144/// );
145/// ```
146///
147/// # Examples – instance and class variables
148///
149/// ```
150/// # use spinoso_symbol::IdentifierType;
151/// assert_eq!(
152/// "@artichoke".parse::<IdentifierType>(),
153/// Ok(IdentifierType::Instance)
154/// );
155/// assert_eq!(
156/// "@@rumble".parse::<IdentifierType>(),
157/// Ok(IdentifierType::Class)
158/// );
159/// ```
160///
161/// # Example – attribute setter
162///
163/// Attribute setters are local idents that end in `=`.
164///
165/// ```
166/// # use spinoso_symbol::IdentifierType;
167/// assert_eq!(
168/// "artichoke=".parse::<IdentifierType>(),
169/// Ok(IdentifierType::AttrSet)
170/// );
171/// assert_eq!(
172/// "spinoso_symbol=".parse::<IdentifierType>(),
173/// Ok(IdentifierType::AttrSet)
174/// );
175/// ```
176///
177/// [`Inspect`]: crate::Inspect
178#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
179pub enum IdentifierType {
180 /// Identifier that contains "junk".
181 ///
182 /// Junk mostly equates to non-sigil ASCII symbols. Identifiers like
183 /// `empty?` and `flatten!` are junk idents. All special symbolic Ruby
184 /// methods like `<=>` and `!~` are junk identifiers.
185 ///
186 /// # Examples
187 ///
188 /// ```
189 /// # use spinoso_symbol::IdentifierType;
190 /// assert_eq!("empty?".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
191 /// assert_eq!(
192 /// "flatten!".parse::<IdentifierType>(),
193 /// Ok(IdentifierType::Junk)
194 /// );
195 /// assert_eq!("<=>".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
196 /// assert_eq!("!~".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
197 /// assert_eq!("[]".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
198 /// assert_eq!("[]=".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
199 /// assert_eq!("=~".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
200 /// assert_eq!("==".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
201 /// assert_eq!("===".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
202 /// ```
203 Junk,
204 /// Identifier that is a global variable name.
205 ///
206 /// Global variables are prefixed with the sigil `$`. There are two types of
207 /// global variables:
208 ///
209 /// - `$` followed by a `IdentifierType::Ident` sequence.
210 /// - Special global variables, which include `Regexp` globals (`$1`..`$9`)
211 /// and `$-w` type globals.
212 ///
213 /// # Examples
214 ///
215 /// ```
216 /// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
217 /// assert_eq!(
218 /// "$".parse::<IdentifierType>(),
219 /// Err(ParseIdentifierError::new())
220 /// );
221 /// assert_eq!("$foo".parse::<IdentifierType>(), Ok(IdentifierType::Global));
222 /// assert_eq!(
223 /// "$@foo".parse::<IdentifierType>(),
224 /// Err(ParseIdentifierError::new())
225 /// );
226 /// assert_eq!("$0".parse::<IdentifierType>(), Ok(IdentifierType::Global));
227 /// assert_eq!("$1".parse::<IdentifierType>(), Ok(IdentifierType::Global));
228 /// assert_eq!("$9".parse::<IdentifierType>(), Ok(IdentifierType::Global));
229 /// assert_eq!("$-w".parse::<IdentifierType>(), Ok(IdentifierType::Global));
230 /// assert_eq!(
231 /// "$-www".parse::<IdentifierType>(),
232 /// Err(ParseIdentifierError::new())
233 /// );
234 /// ```
235 Global,
236 /// Identifier that is an instance variable name.
237 ///
238 /// Instance variables are prefixed with a single `@` sigil. The remaining
239 /// bytes must be a valid [`Constant`] or [`Local`] ident.
240 ///
241 /// # Examples
242 ///
243 /// ```
244 /// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
245 /// assert_eq!(
246 /// "@".parse::<IdentifierType>(),
247 /// Err(ParseIdentifierError::new())
248 /// );
249 /// assert_eq!(
250 /// "@foo".parse::<IdentifierType>(),
251 /// Ok(IdentifierType::Instance)
252 /// );
253 /// assert_eq!(
254 /// "@Foo".parse::<IdentifierType>(),
255 /// Ok(IdentifierType::Instance)
256 /// );
257 /// assert_eq!(
258 /// "@FOO".parse::<IdentifierType>(),
259 /// Ok(IdentifierType::Instance)
260 /// );
261 /// assert_eq!(
262 /// "@foo_bar".parse::<IdentifierType>(),
263 /// Ok(IdentifierType::Instance)
264 /// );
265 /// assert_eq!(
266 /// "@FooBar".parse::<IdentifierType>(),
267 /// Ok(IdentifierType::Instance)
268 /// );
269 /// assert_eq!(
270 /// "@FOO_BAR".parse::<IdentifierType>(),
271 /// Ok(IdentifierType::Instance)
272 /// );
273 /// assert_eq!(
274 /// "@$foo".parse::<IdentifierType>(),
275 /// Err(ParseIdentifierError::new())
276 /// );
277 /// assert_eq!(
278 /// "@0".parse::<IdentifierType>(),
279 /// Err(ParseIdentifierError::new())
280 /// );
281 /// assert_eq!(
282 /// "@1".parse::<IdentifierType>(),
283 /// Err(ParseIdentifierError::new())
284 /// );
285 /// assert_eq!(
286 /// "@9".parse::<IdentifierType>(),
287 /// Err(ParseIdentifierError::new())
288 /// );
289 /// ```
290 ///
291 /// [`Constant`]: Self::Constant
292 /// [`Local`]: Self::Local
293 Instance,
294 /// Identifier that is a class variable name.
295 ///
296 /// Class variables are prefixed with a double `@@` sigil. The remaining
297 /// bytes must be a valid [`Constant`] or [`Local`] ident.
298 ///
299 /// # Examples
300 ///
301 /// ```
302 /// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
303 /// assert_eq!(
304 /// "@@".parse::<IdentifierType>(),
305 /// Err(ParseIdentifierError::new())
306 /// );
307 /// assert_eq!("@@foo".parse::<IdentifierType>(), Ok(IdentifierType::Class));
308 /// assert_eq!("@@Foo".parse::<IdentifierType>(), Ok(IdentifierType::Class));
309 /// assert_eq!("@@FOO".parse::<IdentifierType>(), Ok(IdentifierType::Class));
310 /// assert_eq!(
311 /// "@@foo_bar".parse::<IdentifierType>(),
312 /// Ok(IdentifierType::Class)
313 /// );
314 /// assert_eq!(
315 /// "@@FooBar".parse::<IdentifierType>(),
316 /// Ok(IdentifierType::Class)
317 /// );
318 /// assert_eq!(
319 /// "@@FOO_BAR".parse::<IdentifierType>(),
320 /// Ok(IdentifierType::Class)
321 /// );
322 /// assert_eq!(
323 /// "@@$foo".parse::<IdentifierType>(),
324 /// Err(ParseIdentifierError::new())
325 /// );
326 /// assert_eq!(
327 /// "@@0".parse::<IdentifierType>(),
328 /// Err(ParseIdentifierError::new())
329 /// );
330 /// assert_eq!(
331 /// "@@1".parse::<IdentifierType>(),
332 /// Err(ParseIdentifierError::new())
333 /// );
334 /// assert_eq!(
335 /// "@@9".parse::<IdentifierType>(),
336 /// Err(ParseIdentifierError::new())
337 /// );
338 /// ```
339 ///
340 /// [`Constant`]: Self::Constant
341 /// [`Local`]: Self::Local
342 Class,
343 /// Identifier that is an "attribute setter" method name.
344 ///
345 /// `AttrSet` idents end in the `=` sigil and are otherwise valid [`Local`]
346 /// or [`Constant`] idents. `AttrSet` idents cannot have any other "junk"
347 /// symbols.
348 ///
349 /// # Examples
350 ///
351 /// ```
352 /// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
353 /// assert_eq!(
354 /// "Foo=".parse::<IdentifierType>(),
355 /// Ok(IdentifierType::AttrSet)
356 /// );
357 /// assert_eq!(
358 /// "foo=".parse::<IdentifierType>(),
359 /// Ok(IdentifierType::AttrSet)
360 /// );
361 /// assert_eq!(
362 /// "foo_bar=".parse::<IdentifierType>(),
363 /// Ok(IdentifierType::AttrSet)
364 /// );
365 /// assert_eq!(
366 /// "foo_bar?=".parse::<IdentifierType>(),
367 /// Err(ParseIdentifierError::new())
368 /// );
369 /// assert_eq!("ω=".parse::<IdentifierType>(), Ok(IdentifierType::AttrSet));
370 /// ```
371 ///
372 /// [`Constant`]: Self::Constant
373 /// [`Local`]: Self::Local
374 AttrSet,
375 /// Identifier that is a constant name.
376 ///
377 /// Constant names can be either ASCII or Unicode and must start with a
378 /// uppercase character.
379 ///
380 /// # Examples
381 ///
382 /// ```
383 /// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
384 /// assert_eq!(
385 /// "Foo".parse::<IdentifierType>(),
386 /// Ok(IdentifierType::Constant)
387 /// );
388 /// assert_eq!(
389 /// "FOO".parse::<IdentifierType>(),
390 /// Ok(IdentifierType::Constant)
391 /// );
392 /// assert_eq!(
393 /// "FooBar".parse::<IdentifierType>(),
394 /// Ok(IdentifierType::Constant)
395 /// );
396 /// assert_eq!(
397 /// "FOO_BAR".parse::<IdentifierType>(),
398 /// Ok(IdentifierType::Constant)
399 /// );
400 /// assert_eq!("Ω".parse::<IdentifierType>(), Ok(IdentifierType::Constant));
401 /// ```
402 Constant,
403 /// Identifier that is a local variable or method name.
404 ///
405 /// Local names can be either ASCII or Unicode and must start with a
406 /// lowercase character.
407 ///
408 /// # Examples
409 ///
410 /// ```
411 /// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
412 /// assert_eq!("foo".parse::<IdentifierType>(), Ok(IdentifierType::Local));
413 /// assert_eq!("fOO".parse::<IdentifierType>(), Ok(IdentifierType::Local));
414 /// assert_eq!(
415 /// "fooBar".parse::<IdentifierType>(),
416 /// Ok(IdentifierType::Local)
417 /// );
418 /// assert_eq!(
419 /// "foo_bar".parse::<IdentifierType>(),
420 /// Ok(IdentifierType::Local)
421 /// );
422 /// assert_eq!("ω".parse::<IdentifierType>(), Ok(IdentifierType::Local));
423 /// ```
424 Local,
425}
426
427impl IdentifierType {
428 /// Return a new, default `IdentifierType`.
429 ///
430 /// Prefer to use `new()` over `default()` since `new()` is const.
431 ///
432 /// # Examples
433 ///
434 /// ```
435 /// # use spinoso_symbol::IdentifierType;
436 /// const ID_TYPE: IdentifierType = IdentifierType::new();
437 /// assert_eq!(ID_TYPE, IdentifierType::Junk);
438 /// assert_eq!(ID_TYPE, IdentifierType::default());
439 /// ```
440 #[must_use]
441 pub const fn new() -> Self {
442 Self::Junk
443 }
444}
445
446impl Default for IdentifierType {
447 /// Construct a "junk" identifier type.
448 ///
449 /// # Examples
450 ///
451 /// ```
452 /// # use spinoso_symbol::IdentifierType;
453 /// const ID_TYPE: IdentifierType = IdentifierType::new();
454 /// assert_eq!(ID_TYPE, IdentifierType::Junk);
455 /// assert_eq!(ID_TYPE, IdentifierType::default());
456 /// ```
457 #[inline]
458 fn default() -> Self {
459 Self::new()
460 }
461}
462
463impl FromStr for IdentifierType {
464 type Err = ParseIdentifierError;
465
466 #[inline]
467 fn from_str(s: &str) -> Result<Self, Self::Err> {
468 parse(s.as_bytes()).ok_or_else(ParseIdentifierError::new)
469 }
470}
471
472impl TryFrom<&str> for IdentifierType {
473 type Error = ParseIdentifierError;
474
475 #[inline]
476 fn try_from(value: &str) -> Result<Self, Self::Error> {
477 parse(value.as_bytes()).ok_or_else(ParseIdentifierError::new)
478 }
479}
480
481impl TryFrom<&[u8]> for IdentifierType {
482 type Error = ParseIdentifierError;
483
484 #[inline]
485 fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
486 parse(value).ok_or_else(ParseIdentifierError::new)
487 }
488}
489
490/// Error type returned from the [`FromStr`] implementation on [`IdentifierType`].
491///
492/// # Examples
493///
494/// ```
495/// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
496/// const ERR: ParseIdentifierError = ParseIdentifierError::new();
497/// assert_eq!("not a valid ident".parse::<IdentifierType>(), Err(ERR));
498/// ```
499#[derive(Default, Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
500pub struct ParseIdentifierError {
501 _private: (),
502}
503
504impl ParseIdentifierError {
505 /// Construct a new `ParseIdentifierError`.
506 ///
507 /// Prefer to use `new()` over `default()` since `new()` is const.
508 ///
509 /// # Examples
510 ///
511 /// ```
512 /// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
513 /// const ERR: ParseIdentifierError = ParseIdentifierError::new();
514 /// assert_eq!("not a valid ident".parse::<IdentifierType>(), Err(ERR));
515 /// assert_eq!(ERR, ParseIdentifierError::default());
516 /// ```
517 #[must_use]
518 pub const fn new() -> Self {
519 Self { _private: () }
520 }
521}
522
523impl fmt::Display for ParseIdentifierError {
524 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
525 f.write_str("Failed to parse given string as a known identifier type")
526 }
527}
528
529#[inline]
530fn parse(name: &[u8]) -> Option<IdentifierType> {
531 match name {
532 [] | [b'\0'] => None,
533 // special global variable
534 [b'$', name @ ..] if is_special_global_name(name) => Some(IdentifierType::Global),
535 // global variable
536 [b'$', name @ ..] => parse_ident(name, IdentifierType::Global),
537 // class variable
538 [b'@', b'@', name @ ..] => parse_ident(name, IdentifierType::Class),
539 // instance variable
540 [b'@', name @ ..] => parse_ident(name, IdentifierType::Instance),
541 // Symbolic method names
542 name if is_symbolic_method_name(name) => Some(IdentifierType::Junk),
543 [b'=' | b'!' | b'[', ..] => None,
544 [first, ..] if *first != b'_' && first.is_ascii() && !first.is_ascii_alphabetic() => None,
545 // Constant name
546 name if is_const_name(name) => parse_ident(name, IdentifierType::Constant),
547 // Local variable
548 name => parse_ident(name, IdentifierType::Local),
549 }
550}
551
552#[inline]
553fn parse_ident(name: &[u8], id_type: IdentifierType) -> Option<IdentifierType> {
554 match name {
555 [] => None,
556 [first, name @ .., b'='] if *first != b'_' && first.is_ascii() && !first.is_ascii_alphabetic() => {
557 if let None | Some(IdentifierType::AttrSet) = parse_ident(name, id_type) {
558 None
559 } else {
560 Some(id_type)
561 }
562 }
563 [first, ..] if *first != b'_' && first.is_ascii() && !first.is_ascii_alphabetic() => None,
564 name if is_ident_until(name).is_none() => Some(id_type),
565 [name @ .., b'!' | b'?'] if is_ident_until(name).is_none() => {
566 if matches!(
567 id_type,
568 IdentifierType::Global | IdentifierType::Class | IdentifierType::Instance
569 ) {
570 return None;
571 }
572 Some(IdentifierType::Junk)
573 }
574 [name @ .., b'='] if is_ident_until(name).is_none() => {
575 if matches!(id_type, IdentifierType::Local | IdentifierType::Constant) {
576 return Some(IdentifierType::AttrSet);
577 }
578 None
579 }
580 _ => None,
581 }
582}
583
584#[inline]
585#[expect(clippy::match_same_arms, reason = "for clarity")]
586fn is_special_global_name(name: &[u8]) -> bool {
587 match name {
588 [] => false,
589 [first, rest @ ..] if is_special_global_punct(*first) => rest.is_empty(),
590 [b'-'] => false,
591 [b'-', rest @ ..] if is_next_ident_exhausting(rest) => true,
592 [b'-', ..] => false,
593 name => name.chars().all(char::is_numeric),
594 }
595}
596
597/// Return whether the input is a "junk" symbolic method name.
598///
599/// There are fixed number of valid Ruby method names that only contain ASCII
600/// symbols.
601#[inline]
602fn is_symbolic_method_name(name: &[u8]) -> bool {
603 matches!(
604 name,
605 b"<" | b"<<"
606 | b"<="
607 | b"<=>"
608 | b">"
609 | b">>"
610 | b">="
611 | b"=~"
612 | b"=="
613 | b"==="
614 | b"*"
615 | b"**"
616 | b"+"
617 | b"-"
618 | b"+@"
619 | b"-@"
620 | b"|"
621 | b"^"
622 | b"&"
623 | b"/"
624 | b"%"
625 | b"~"
626 | b"`"
627 | b"[]"
628 | b"[]="
629 | b"!"
630 | b"!="
631 | b"!~"
632 )
633}
634
635/// Return whether the input is a valid constant name.
636///
637/// Constant names require the first character to be either ASCII or Unicode
638/// uppercase.
639#[inline]
640fn is_const_name(name: &[u8]) -> bool {
641 match name {
642 [] => false,
643 name if name.is_ascii() => name.iter().next().is_some_and(u8::is_ascii_uppercase),
644 // uses Unicode `Uppercase` property
645 name if name.is_utf8() => name.chars().next().is_some_and(char::is_uppercase),
646 _ => false,
647 }
648}
649
650/// Determine if a [`char`] can be used in a valid identifier.
651///
652/// # Header declaration
653///
654/// Ported from the following C macro in `string.c`:
655///
656/// ```c
657/// #define is_identchar(p,e,enc) (ISALNUM((unsigned char)*(p)) || (*(p)) == '_' || !ISASCII(*(p)))
658/// ```
659#[inline]
660fn is_ident_char(ch: char) -> bool {
661 ch.is_alphanumeric() || ch == '_' || !ch.is_ascii()
662}
663
664/// Consume the input until a non-ident character is found.
665///
666/// Scan the [`char`]s in the input until either invalid UTF-8 or an invalid
667/// ident is found. See [`is_ident_char`].
668///
669/// This method returns `Some(index)` of the start of the first invalid ident
670/// or `None` if the whole input is a valid ident.
671///
672/// Empty slices are not valid idents.
673#[inline]
674fn is_ident_until(mut name: &[u8]) -> Option<usize> {
675 // Empty strings are not idents.
676 if name.is_empty() {
677 return Some(0);
678 }
679 let mut start = 0;
680 while !name.is_empty() {
681 let (ch, size) = bstr::decode_utf8(name);
682 match ch {
683 Some(ch) if !is_ident_char(ch) => return Some(start),
684 None => return Some(start),
685 Some(_) => {
686 name = &name[size..];
687 start += size;
688 }
689 }
690 }
691 None
692}
693
694/// Determine if the next char is a valid ident char and consumes all bytes in
695/// the input.
696///
697/// This function is used to determine whether certain kinds of single character
698/// globals are valid idents.
699///
700/// See also [`is_ident_char`].
701#[inline]
702fn is_next_ident_exhausting(name: &[u8]) -> bool {
703 let (ch, size) = bstr::decode_utf8(name);
704 match ch {
705 Some(ch) if is_ident_char(ch) => name.len() == size,
706 Some(_) | None => false,
707 }
708}
709
710/// Returns true if `ch` is one of the “special global punctuation” bytes
711/// as defined by MRI’s `SPECIAL_PUNCT` macro.
712#[inline]
713pub const fn is_special_global_punct(ch: u8) -> bool {
714 // Derived from Ruby MRI `parse.y`:
715 //
716 // ```c
717 // #define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
718 // #define SPECIAL_PUNCT(idx) (
719 // BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) |
720 // BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) |
721 // BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) |
722 // BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) |
723 // BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) |
724 // BIT('0', idx))
725 // const unsigned int ruby_global_name_punct_bits[] = {
726 // SPECIAL_PUNCT(0),
727 // SPECIAL_PUNCT(1),
728 // SPECIAL_PUNCT(2),
729 // };
730 // ```
731 //
732 // The three constants computed by Ruby are:
733 // SPECIAL_PUNCT(0) → 4227980502
734 // SPECIAL_PUNCT(1) → 268435457
735 // SPECIAL_PUNCT(2) → 1073741825
736 const fn make_ruby_global_name_punct_bits() -> [u32; 3] {
737 let mut bits = [0u32; 3];
738 // exactly the 21 chars from SPECIAL_PUNCT in MRI
739 let chars = [
740 b'~', b'*', b'$', b'?', b'!', b'@', b'/', b'\\', b';', b',', b'.', b'=', b':', b'<', b'>', b'"', b'&',
741 b'`', b'\'', b'+', b'0',
742 ];
743 let mut i = 0;
744 while i < chars.len() {
745 let c = chars[i];
746 // idx = (c / 32) - 1
747 let idx = (c / 32).wrapping_sub(1) as usize;
748 if idx < bits.len() {
749 bits[idx] |= 1 << (c % 32);
750 }
751 i += 1;
752 }
753 bits
754 }
755
756 // Precomputed table exactly matching MRI’s `ruby_global_name_punct_bits`.
757 const RUBY_GLOBAL_NAME_PUNCT_BITS: [u32; 3] = make_ruby_global_name_punct_bits();
758
759 // MRI does: idx = (ch / 32) - 1; then tests bit (ch % 32) in table[idx]
760 let idx = (ch / 32).wrapping_sub(1) as usize;
761 if idx < RUBY_GLOBAL_NAME_PUNCT_BITS.len() {
762 ((RUBY_GLOBAL_NAME_PUNCT_BITS[idx] >> (ch % 32)) & 1) != 0
763 } else {
764 false
765 }
766}
767
768#[cfg(test)]
769mod tests {
770 use super::{
771 IdentifierType, ParseIdentifierError, is_ident_until, is_next_ident_exhausting, is_special_global_name,
772 };
773
774 #[test]
775 fn special_global_name() {
776 let name = &b"a"[..];
777 assert!(!is_special_global_name(name));
778 let name = "💎";
779 assert!(!is_special_global_name(name.as_bytes()));
780 let name = &b"ab"[..];
781 assert!(!is_special_global_name(name));
782 let name = "-💎";
783 assert!(is_special_global_name(name.as_bytes()));
784 let name = &b"$"[..];
785 assert!(is_special_global_name(name));
786 let name = &b"~"[..];
787 assert!(is_special_global_name(name));
788 let name = "�";
789 assert!(!is_special_global_name(name.as_bytes()));
790 let name = "-�";
791 assert!(is_special_global_name(name.as_bytes()));
792 }
793
794 #[test]
795 fn is_ident_until_empty() {
796 let name = &[];
797 assert_eq!(is_ident_until(name), Some(0));
798 }
799
800 #[test]
801 fn is_ident_until_lowercase_ascii() {
802 let name = &b"abc"[..];
803 assert_eq!(is_ident_until(name), None);
804 let name = &b"abc_123"[..];
805 assert_eq!(is_ident_until(name), None);
806 let name = &b"_"[..];
807 assert_eq!(is_ident_until(name), None);
808 let name = &b"_e"[..];
809 assert_eq!(is_ident_until(name), None);
810 let name = &b"_1"[..];
811 assert_eq!(is_ident_until(name), None);
812 }
813
814 #[test]
815 fn is_ident_until_ascii_constant() {
816 let name = &b"Abc"[..];
817 assert_eq!(is_ident_until(name), None);
818 let name = &b"ABC"[..];
819 assert_eq!(is_ident_until(name), None);
820 let name = &b"ABC_XYZ"[..];
821 assert_eq!(is_ident_until(name), None);
822 let name = &b"ABC_123"[..];
823 assert_eq!(is_ident_until(name), None);
824 let name = &b"HTTP2"[..];
825 assert_eq!(is_ident_until(name), None);
826 }
827
828 #[test]
829 fn is_ident_until_unicode() {
830 let name = "ábc";
831 assert_eq!(is_ident_until(name.as_bytes()), None);
832 let name = "abç";
833 assert_eq!(is_ident_until(name.as_bytes()), None);
834 let name = "abc_�";
835 assert_eq!(is_ident_until(name.as_bytes()), None);
836 let name = "abc_💎";
837 assert_eq!(is_ident_until(name.as_bytes()), None);
838
839 let name = "Ábc";
840 assert_eq!(is_ident_until(name.as_bytes()), None);
841 let name = "Abç";
842 assert_eq!(is_ident_until(name.as_bytes()), None);
843 let name = "Abc_�";
844 assert_eq!(is_ident_until(name.as_bytes()), None);
845 let name = "Abc_💎";
846 assert_eq!(is_ident_until(name.as_bytes()), None);
847
848 let name = "💎";
849 assert_eq!(is_ident_until(name.as_bytes()), None);
850 let name = "💎abc";
851 assert_eq!(is_ident_until(name.as_bytes()), None);
852 }
853
854 #[test]
855 fn is_ident_until_invalid_utf8() {
856 let name = &b"\xFF"[..];
857 assert_eq!(is_ident_until(name), Some(0));
858 let name = &b"abc\xFF"[..];
859 assert_eq!(is_ident_until(name), Some(3));
860 let name = &b"abc\xFFxyz"[..];
861 assert_eq!(is_ident_until(name), Some(3));
862
863 let name = &b"\xFF\xFE"[..];
864 assert_eq!(is_ident_until(name), Some(0));
865 let name = &b"abc\xFF\xFE"[..];
866 assert_eq!(is_ident_until(name), Some(3));
867 let name = &b"abc\xFF\xFExyz"[..];
868 assert_eq!(is_ident_until(name), Some(3));
869
870 let name = &b"\xEF\xBF\xBD\xFF"[..];
871 assert_eq!(is_ident_until(name), Some(3));
872 let name = &b"\xF0\x9F\x92\x8E\xFF"[..];
873 assert_eq!(is_ident_until(name), Some(4));
874 }
875
876 #[test]
877 fn is_next_ident_exhausting_empty() {
878 let name = &[];
879 assert!(!is_next_ident_exhausting(name));
880 }
881
882 #[test]
883 fn is_next_ident_exhausting_lowercase_ascii() {
884 let name = &b"a"[..];
885 assert!(is_next_ident_exhausting(name));
886 let name = &b"abc"[..];
887 assert!(!is_next_ident_exhausting(name));
888 let name = &b"1"[..];
889 assert!(is_next_ident_exhausting(name));
890 let name = &b"abc_123"[..];
891 assert!(!is_next_ident_exhausting(name));
892 let name = &b"_"[..];
893 assert!(is_next_ident_exhausting(name));
894 let name = &b"_e"[..];
895 assert!(!is_next_ident_exhausting(name));
896 let name = &b"_1"[..];
897 assert!(!is_next_ident_exhausting(name));
898 }
899
900 #[test]
901 fn is_next_ident_exhausting_ascii_constant() {
902 let name = &b"A"[..];
903 assert!(is_next_ident_exhausting(name));
904 let name = &b"Abc"[..];
905 assert!(!is_next_ident_exhausting(name));
906 let name = &b"ABC"[..];
907 assert!(!is_next_ident_exhausting(name));
908 let name = &b"ABC_XYZ"[..];
909 assert!(!is_next_ident_exhausting(name));
910 let name = &b"ABC_123"[..];
911 assert!(!is_next_ident_exhausting(name));
912 let name = &b"HTTP2"[..];
913 assert!(!is_next_ident_exhausting(name));
914 }
915
916 #[test]
917 fn is_next_ident_exhausting_unicode() {
918 let name = "ábc";
919 assert!(!is_next_ident_exhausting(name.as_bytes()));
920 let name = "abç";
921 assert!(!is_next_ident_exhausting(name.as_bytes()));
922 let name = "abc_�";
923 assert!(!is_next_ident_exhausting(name.as_bytes()));
924 let name = "abc_💎";
925 assert!(!is_next_ident_exhausting(name.as_bytes()));
926
927 let name = "Ábc";
928 assert!(!is_next_ident_exhausting(name.as_bytes()));
929 let name = "Abç";
930 assert!(!is_next_ident_exhausting(name.as_bytes()));
931 let name = "Abc_�";
932 assert!(!is_next_ident_exhausting(name.as_bytes()));
933 let name = "Abc_💎";
934 assert!(!is_next_ident_exhausting(name.as_bytes()));
935 let name = "💎abc";
936 assert!(!is_next_ident_exhausting(name.as_bytes()));
937
938 let name = "á";
939 assert!(is_next_ident_exhausting(name.as_bytes()));
940 let name = "ç";
941 assert!(is_next_ident_exhausting(name.as_bytes()));
942 let name = "�";
943 assert!(is_next_ident_exhausting(name.as_bytes()));
944 let name = "💎";
945 assert!(is_next_ident_exhausting(name.as_bytes()));
946 }
947
948 #[test]
949 fn is_next_ident_exhausting_invalid_utf8() {
950 let name = &b"\xFF"[..];
951 assert!(!is_next_ident_exhausting(name));
952 let name = &b"abc\xFF"[..];
953 assert!(!is_next_ident_exhausting(name));
954 let name = &b"abc\xFFxyz"[..];
955 assert!(!is_next_ident_exhausting(name));
956
957 let name = &b"\xFF\xFE"[..];
958 assert!(!is_next_ident_exhausting(name));
959 let name = &b"abc\xFF\xFE"[..];
960 assert!(!is_next_ident_exhausting(name));
961 let name = &b"abc\xFF\xFExyz"[..];
962 assert!(!is_next_ident_exhausting(name));
963
964 let name = &b"\xEF\xBF\xBD\xFF"[..];
965 assert!(!is_next_ident_exhausting(name));
966 let name = &b"\xF0\x9F\x92\x8E\xFF"[..];
967 assert!(!is_next_ident_exhausting(name));
968 }
969
970 #[test]
971 fn ascii_ident() {
972 assert_eq!("foobar".parse::<IdentifierType>(), Ok(IdentifierType::Local));
973 assert_eq!("ruby_is_simple".parse::<IdentifierType>(), Ok(IdentifierType::Local));
974 }
975
976 #[test]
977 fn ascii_constant() {
978 assert_eq!("Foobar".parse::<IdentifierType>(), Ok(IdentifierType::Constant));
979 assert_eq!("FooBar".parse::<IdentifierType>(), Ok(IdentifierType::Constant));
980 assert_eq!("FOOBAR".parse::<IdentifierType>(), Ok(IdentifierType::Constant));
981 assert_eq!("FOO_BAR".parse::<IdentifierType>(), Ok(IdentifierType::Constant));
982 assert_eq!("RUBY_IS_SIMPLE".parse::<IdentifierType>(), Ok(IdentifierType::Constant));
983 }
984
985 #[test]
986 fn empty() {
987 assert_eq!("".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
988 }
989
990 #[test]
991 fn single_nul() {
992 assert_eq!("\0".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
993 }
994
995 #[test]
996 fn non_ascii_numerics() {
997 assert_eq!("١".parse::<IdentifierType>(), Ok(IdentifierType::Local));
998 assert_eq!(
999 "١١١١١١١١١١١١١١١١١١".parse::<IdentifierType>(),
1000 Ok(IdentifierType::Local)
1001 );
1002 assert_eq!("①".parse::<IdentifierType>(), Ok(IdentifierType::Local));
1003 }
1004
1005 #[test]
1006 fn recursive_ident() {
1007 assert_eq!("@@@foo".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1008 assert_eq!("@@@@foo".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1009 assert_eq!("@$foo".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1010 assert_eq!("@$-w".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1011 assert_eq!("@@$foo".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1012 assert_eq!("@@$-w".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1013 assert_eq!("$@foo".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1014 assert_eq!("$@@foo".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1015 assert_eq!("$$-w".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1016 }
1017
1018 #[test]
1019 fn attr_bang() {
1020 assert_eq!("@foo!".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1021 assert_eq!("@@foo!".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1022 assert_eq!("$foo!".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1023 }
1024
1025 #[test]
1026 fn attr_question() {
1027 assert_eq!("@foo?".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1028 assert_eq!("@@foo?".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1029 assert_eq!("$foo?".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1030 }
1031
1032 #[test]
1033 fn attr_setter() {
1034 assert_eq!("@foo=".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1035 assert_eq!("@@foo=".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1036 assert_eq!("$foo=".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1037 }
1038
1039 #[test]
1040 fn invalid_utf8() {
1041 assert_eq!(
1042 IdentifierType::try_from(&b"invalid-\xFF-utf8"[..]),
1043 Err(ParseIdentifierError::new())
1044 );
1045 }
1046
1047 #[test]
1048 fn emoji() {
1049 assert_eq!(IdentifierType::try_from("💎"), Ok(IdentifierType::Local));
1050 assert_eq!(IdentifierType::try_from("$💎"), Ok(IdentifierType::Global));
1051 assert_eq!(IdentifierType::try_from("@💎"), Ok(IdentifierType::Instance));
1052 assert_eq!(IdentifierType::try_from("@@💎"), Ok(IdentifierType::Class));
1053 }
1054
1055 #[test]
1056 fn unicode_replacement_char() {
1057 assert_eq!(IdentifierType::try_from("�"), Ok(IdentifierType::Local));
1058 assert_eq!(IdentifierType::try_from("$�"), Ok(IdentifierType::Global));
1059 assert_eq!(IdentifierType::try_from("@�"), Ok(IdentifierType::Instance));
1060 assert_eq!(IdentifierType::try_from("@@�"), Ok(IdentifierType::Class));
1061
1062 assert_eq!(IdentifierType::try_from("abc�"), Ok(IdentifierType::Local));
1063 assert_eq!(IdentifierType::try_from("$abc�"), Ok(IdentifierType::Global));
1064 assert_eq!(IdentifierType::try_from("@abc�"), Ok(IdentifierType::Instance));
1065 assert_eq!(IdentifierType::try_from("@@abc�"), Ok(IdentifierType::Class));
1066 }
1067
1068 #[test]
1069 fn invalid_utf8_special_global() {
1070 assert_eq!(
1071 IdentifierType::try_from(&b"$-\xFF"[..]),
1072 Err(ParseIdentifierError::new())
1073 );
1074 }
1075
1076 #[test]
1077 fn replacement_char_special_global() {
1078 assert_eq!(IdentifierType::try_from("$-�"), Ok(IdentifierType::Global));
1079 assert_eq!(IdentifierType::try_from("$-�a"), Err(ParseIdentifierError::new()));
1080 assert_eq!(IdentifierType::try_from("$-��"), Err(ParseIdentifierError::new()));
1081 }
1082}
1083
1084#[cfg(test)]
1085mod specs {
1086 use super::IdentifierType;
1087
1088 // From `spec/core/symbol/inspect_spec.rb`:
1089 //
1090 // ```ruby
1091 // symbols = {
1092 // fred: ":fred",
1093 // :fred? => ":fred?",
1094 // :fred! => ":fred!",
1095 // :$ruby => ":$ruby",
1096 // :@ruby => ":@ruby",
1097 // :@@ruby => ":@@ruby",
1098 // :"$ruby!" => ":\"$ruby!\"",
1099 // :"$ruby?" => ":\"$ruby?\"",
1100 // :"@ruby!" => ":\"@ruby!\"",
1101 // :"@ruby?" => ":\"@ruby?\"",
1102 // :"@@ruby!" => ":\"@@ruby!\"",
1103 // :"@@ruby?" => ":\"@@ruby?\"",
1104 //
1105 // :$-w => ":$-w",
1106 // :"$-ww" => ":\"$-ww\"",
1107 // :"$+" => ":$+",
1108 // :"$~" => ":$~",
1109 // :"$:" => ":$:",
1110 // :"$?" => ":$?",
1111 // :"$<" => ":$<",
1112 // :"$_" => ":$_",
1113 // :"$/" => ":$/",
1114 // :"$'" => ":$'",
1115 // :"$\"" => ":$\"",
1116 // :"$$" => ":$$",
1117 // :"$." => ":$.",
1118 // :"$," => ":$,",
1119 // :"$`" => ":$`",
1120 // :"$!" => ":$!",
1121 // :"$;" => ":$;",
1122 // :"$\\" => ":$\\",
1123 // :"$=" => ":$=",
1124 // :"$*" => ":$*",
1125 // :"$>" => ":$>",
1126 // :"$&" => ":$&",
1127 // :"$@" => ":$@",
1128 // :"$1234" => ":$1234",
1129 //
1130 // :-@ => ":-@",
1131 // :+@ => ":+@",
1132 // :% => ":%",
1133 // :& => ":&",
1134 // :* => ":*",
1135 // :** => ":**",
1136 // :"/" => ":/", # lhs quoted for emacs happiness
1137 // :< => ":<",
1138 // :<= => ":<=",
1139 // :<=> => ":<=>",
1140 // :== => ":==",
1141 // :=== => ":===",
1142 // :=~ => ":=~",
1143 // :> => ":>",
1144 // :>= => ":>=",
1145 // :>> => ":>>",
1146 // :[] => ":[]",
1147 // :[]= => ":[]=",
1148 // :"\<\<" => ":\<\<",
1149 // :^ => ":^",
1150 // :"`" => ":`", # for emacs, and justice!
1151 // :~ => ":~",
1152 // :| => ":|",
1153 //
1154 // :"!" => [":\"!\"", ":!" ],
1155 // :"!=" => [":\"!=\"", ":!="],
1156 // :"!~" => [":\"!~\"", ":!~"],
1157 // :"\$" => ":\"$\"", # for justice!
1158 // :"&&" => ":\"&&\"",
1159 // :"'" => ":\"\'\"",
1160 // :"," => ":\",\"",
1161 // :"." => ":\".\"",
1162 // :".." => ":\"..\"",
1163 // :"..." => ":\"...\"",
1164 // :":" => ":\":\"",
1165 // :"::" => ":\"::\"",
1166 // :";" => ":\";\"",
1167 // :"=" => ":\"=\"",
1168 // :"=>" => ":\"=>\"",
1169 // :"\?" => ":\"?\"", # rawr!
1170 // :"@" => ":\"@\"",
1171 // :"||" => ":\"||\"",
1172 // :"|||" => ":\"|||\"",
1173 // :"++" => ":\"++\"",
1174 //
1175 // :"\"" => ":\"\\\"\"",
1176 // :"\"\"" => ":\"\\\"\\\"\"",
1177 //
1178 // :"9" => ":\"9\"",
1179 // :"foo bar" => ":\"foo bar\"",
1180 // :"*foo" => ":\"*foo\"",
1181 // :"foo " => ":\"foo \"",
1182 // :" foo" => ":\" foo\"",
1183 // :" " => ":\" \"",
1184 // }
1185 // ```
1186
1187 #[test]
1188 fn specs() {
1189 // idents
1190 assert!("fred".parse::<IdentifierType>().is_ok());
1191 assert!("fred?".parse::<IdentifierType>().is_ok());
1192 assert!("fred!".parse::<IdentifierType>().is_ok());
1193 assert!("$ruby".parse::<IdentifierType>().is_ok());
1194 assert!("@ruby".parse::<IdentifierType>().is_ok());
1195 assert!("@@ruby".parse::<IdentifierType>().is_ok());
1196
1197 // idents can't end in bang or question
1198 assert!("$ruby!".parse::<IdentifierType>().is_err());
1199 assert!("$ruby?".parse::<IdentifierType>().is_err());
1200 assert!("@ruby!".parse::<IdentifierType>().is_err());
1201 assert!("@ruby?".parse::<IdentifierType>().is_err());
1202 assert!("@@ruby!".parse::<IdentifierType>().is_err());
1203 assert!("@@ruby?".parse::<IdentifierType>().is_err());
1204
1205 // globals
1206 assert!("$-w".parse::<IdentifierType>().is_ok());
1207 assert!("$-ww".parse::<IdentifierType>().is_err());
1208 assert!("$+".parse::<IdentifierType>().is_ok());
1209 assert!("$~".parse::<IdentifierType>().is_ok());
1210 assert!("$:".parse::<IdentifierType>().is_ok());
1211 assert!("$?".parse::<IdentifierType>().is_ok());
1212 assert!("$<".parse::<IdentifierType>().is_ok());
1213 assert!("$_".parse::<IdentifierType>().is_ok());
1214 assert!("$/".parse::<IdentifierType>().is_ok());
1215 assert!("$\"".parse::<IdentifierType>().is_ok());
1216 assert!("$$".parse::<IdentifierType>().is_ok());
1217 assert!("$.".parse::<IdentifierType>().is_ok());
1218 assert!("$,".parse::<IdentifierType>().is_ok());
1219 assert!("$`".parse::<IdentifierType>().is_ok());
1220 assert!("$!".parse::<IdentifierType>().is_ok());
1221 assert!("$;".parse::<IdentifierType>().is_ok());
1222 assert!("$\\".parse::<IdentifierType>().is_ok());
1223 assert!("$=".parse::<IdentifierType>().is_ok());
1224 assert!("$*".parse::<IdentifierType>().is_ok());
1225 assert!("$>".parse::<IdentifierType>().is_ok());
1226 assert!("$&".parse::<IdentifierType>().is_ok());
1227 assert!("$@".parse::<IdentifierType>().is_ok());
1228 assert!("$1234".parse::<IdentifierType>().is_ok());
1229
1230 // symbolic methods
1231 assert!("-@".parse::<IdentifierType>().is_ok());
1232 assert!("+@".parse::<IdentifierType>().is_ok());
1233 assert!("%".parse::<IdentifierType>().is_ok());
1234 assert!("&".parse::<IdentifierType>().is_ok());
1235 assert!("*".parse::<IdentifierType>().is_ok());
1236 assert!("**".parse::<IdentifierType>().is_ok());
1237 assert!("/".parse::<IdentifierType>().is_ok());
1238 assert!("<".parse::<IdentifierType>().is_ok());
1239 assert!("<=".parse::<IdentifierType>().is_ok());
1240 assert!("<=>".parse::<IdentifierType>().is_ok());
1241 assert!("==".parse::<IdentifierType>().is_ok());
1242 assert!("===".parse::<IdentifierType>().is_ok());
1243 assert!("=~".parse::<IdentifierType>().is_ok());
1244 assert!(">".parse::<IdentifierType>().is_ok());
1245 assert!(">=".parse::<IdentifierType>().is_ok());
1246 assert!(">>".parse::<IdentifierType>().is_ok());
1247 assert!("[]".parse::<IdentifierType>().is_ok());
1248 assert!("[]=".parse::<IdentifierType>().is_ok());
1249 assert!("<<".parse::<IdentifierType>().is_ok());
1250 assert!("^".parse::<IdentifierType>().is_ok());
1251 assert!("`".parse::<IdentifierType>().is_ok());
1252 assert!("~".parse::<IdentifierType>().is_ok());
1253 assert!("|".parse::<IdentifierType>().is_ok());
1254
1255 // non-symbol symbolics
1256 assert!("!".parse::<IdentifierType>().is_ok());
1257 assert!("!=".parse::<IdentifierType>().is_ok());
1258 assert!("!~".parse::<IdentifierType>().is_ok());
1259 assert!("$".parse::<IdentifierType>().is_err());
1260 assert!("&&".parse::<IdentifierType>().is_err());
1261 assert!("'".parse::<IdentifierType>().is_err());
1262 assert!(",".parse::<IdentifierType>().is_err());
1263 assert!(".".parse::<IdentifierType>().is_err());
1264 assert!("..".parse::<IdentifierType>().is_err());
1265 assert!("...".parse::<IdentifierType>().is_err());
1266 assert!(":".parse::<IdentifierType>().is_err());
1267 assert!("::".parse::<IdentifierType>().is_err());
1268 assert!(";".parse::<IdentifierType>().is_err());
1269 assert!("=".parse::<IdentifierType>().is_err());
1270 assert!("=>".parse::<IdentifierType>().is_err());
1271 assert!("?".parse::<IdentifierType>().is_err());
1272 assert!("@".parse::<IdentifierType>().is_err());
1273 assert!("||".parse::<IdentifierType>().is_err());
1274 assert!("|||".parse::<IdentifierType>().is_err());
1275 assert!("++".parse::<IdentifierType>().is_err());
1276
1277 // quotes
1278 assert!(r#"""#.parse::<IdentifierType>().is_err());
1279 assert!(r#""""#.parse::<IdentifierType>().is_err());
1280
1281 assert!("9".parse::<IdentifierType>().is_err());
1282 assert!("foo bar".parse::<IdentifierType>().is_err());
1283 assert!("*foo".parse::<IdentifierType>().is_err());
1284 assert!("foo ".parse::<IdentifierType>().is_err());
1285 assert!(" foo".parse::<IdentifierType>().is_err());
1286 assert!(" ".parse::<IdentifierType>().is_err());
1287 }
1288}
1289
1290/// Tests generated from symbols loaded at MRI interpreter boot.
1291///
1292/// # Generation
1293///
1294/// ```shell
1295/// cat <<EOF | ruby --disable-gems --disable-did_you_mean
1296/// def boot_identifier_symbols
1297/// syms = Symbol.all_symbols.map(&:inspect)
1298/// # remove symbols that must be debug wrapped in quotes
1299/// syms = syms.reject { |s| s[0..1] == ':"' }
1300///
1301/// fixture = syms.map { |s| "r##\"#{s[1..]}\"##" }
1302/// puts fixture.join(",\n")
1303/// end
1304///
1305/// boot_identifier_symbols
1306/// EOF
1307/// ```
1308#[cfg(test)]
1309mod functionals {
1310 use super::IdentifierType;
1311 use crate::fixtures::IDENTS;
1312
1313 #[test]
1314 fn mri_symbol_idents() {
1315 for &sym in IDENTS {
1316 assert!(
1317 sym.parse::<IdentifierType>().is_ok(),
1318 "'{sym}' should parse as a valid identifier, but did not."
1319 );
1320 }
1321 }
1322}