spinoso_symbol/ident.rs
1//! Parser for classifying byte strings as Ruby identifiers.
2//!
3//! This module exposes a parser for determining if a sequence of bytes is a
4//! valid Ruby identifier. These routines also classify idents by type, for
5//! example, a local variable (`is_spinoso`), constant name (`SPINOSO_SYMBOL`),
6//! or class variable (`@@spinoso_symbol`).
7//!
8//! # Examples – local variable
9//!
10//! ```
11//! # use spinoso_symbol::IdentifierType;
12//! assert_eq!(
13//! "spinoso".parse::<IdentifierType>(),
14//! Ok(IdentifierType::Local)
15//! );
16//! assert_eq!(
17//! "spinoso_symbol_features".parse::<IdentifierType>(),
18//! Ok(IdentifierType::Local)
19//! );
20//! ```
21//!
22//! # Examples – constant
23//!
24//! ```
25//! # use spinoso_symbol::IdentifierType;
26//! assert_eq!(
27//! "Spinoso".parse::<IdentifierType>(),
28//! Ok(IdentifierType::Constant)
29//! );
30//! assert_eq!(
31//! "SpinosoSymbol".parse::<IdentifierType>(),
32//! Ok(IdentifierType::Constant)
33//! );
34//! assert_eq!(
35//! "SPINOSO_SYMBOL_FEATURES".parse::<IdentifierType>(),
36//! Ok(IdentifierType::Constant)
37//! );
38//! ```
39//!
40//! # Examples – global
41//!
42//! ```
43//! # use spinoso_symbol::IdentifierType;
44//! assert_eq!(
45//! "$use_spinoso_symbol".parse::<IdentifierType>(),
46//! Ok(IdentifierType::Global)
47//! );
48//! assert_eq!(
49//! "$USE_SPINOSO_SYMBOL".parse::<IdentifierType>(),
50//! Ok(IdentifierType::Global)
51//! );
52//! ```
53//!
54//! # Examples – instance and class variables
55//!
56//! ```
57//! # use spinoso_symbol::IdentifierType;
58//! assert_eq!(
59//! "@artichoke".parse::<IdentifierType>(),
60//! Ok(IdentifierType::Instance)
61//! );
62//! assert_eq!(
63//! "@@rumble".parse::<IdentifierType>(),
64//! Ok(IdentifierType::Class)
65//! );
66//! ```
67//!
68//! # Example – attribute setter
69//!
70//! Attribute setters are local idents that end in `=`.
71//!
72//! ```
73//! # use spinoso_symbol::IdentifierType;
74//! assert_eq!(
75//! "artichoke=".parse::<IdentifierType>(),
76//! Ok(IdentifierType::AttrSet)
77//! );
78//! assert_eq!(
79//! "spinoso_symbol=".parse::<IdentifierType>(),
80//! Ok(IdentifierType::AttrSet)
81//! );
82//! ```
83
84use core::fmt;
85use core::str::FromStr;
86
87use bstr::ByteSlice;
88
89/// Valid types for Ruby identifiers.
90///
91/// Spinoso symbol parses byte strings to determine if they are valid idents for
92/// the [`Inspect`] iterator (which requires the **inspect** Cargo feature to be
93/// enabled). Symbols that are valid idents do not get wrapped in `"` when
94/// generating their debug output.
95///
96/// See variant documentation for the set of ident types.
97///
98/// `IdentifierType`'s primary interface is through the [`TryFrom`] and
99/// [`FromStr`] conversion traits. Parsing `&str` and `&[u8]` is supported.
100///
101/// # Examples – local variable
102///
103/// ```
104/// # use spinoso_symbol::IdentifierType;
105/// assert_eq!(
106/// "spinoso".parse::<IdentifierType>(),
107/// Ok(IdentifierType::Local)
108/// );
109/// assert_eq!(
110/// "spinoso_symbol_features".parse::<IdentifierType>(),
111/// Ok(IdentifierType::Local)
112/// );
113/// ```
114///
115/// # Examples – constant
116///
117/// ```
118/// # use spinoso_symbol::IdentifierType;
119/// assert_eq!(
120/// "Spinoso".parse::<IdentifierType>(),
121/// Ok(IdentifierType::Constant)
122/// );
123/// assert_eq!(
124/// "SpinosoSymbol".parse::<IdentifierType>(),
125/// Ok(IdentifierType::Constant)
126/// );
127/// assert_eq!(
128/// "SPINOSO_SYMBOL_FEATURES".parse::<IdentifierType>(),
129/// Ok(IdentifierType::Constant)
130/// );
131/// ```
132///
133/// # Examples – global
134///
135/// ```
136/// # use spinoso_symbol::IdentifierType;
137/// assert_eq!(
138/// "$use_spinoso_symbol".parse::<IdentifierType>(),
139/// Ok(IdentifierType::Global)
140/// );
141/// assert_eq!(
142/// "$USE_SPINOSO_SYMBOL".parse::<IdentifierType>(),
143/// Ok(IdentifierType::Global)
144/// );
145/// ```
146///
147/// # Examples – instance and class variables
148///
149/// ```
150/// # use spinoso_symbol::IdentifierType;
151/// assert_eq!(
152/// "@artichoke".parse::<IdentifierType>(),
153/// Ok(IdentifierType::Instance)
154/// );
155/// assert_eq!(
156/// "@@rumble".parse::<IdentifierType>(),
157/// Ok(IdentifierType::Class)
158/// );
159/// ```
160///
161/// # Example – attribute setter
162///
163/// Attribute setters are local idents that end in `=`.
164///
165/// ```
166/// # use spinoso_symbol::IdentifierType;
167/// assert_eq!(
168/// "artichoke=".parse::<IdentifierType>(),
169/// Ok(IdentifierType::AttrSet)
170/// );
171/// assert_eq!(
172/// "spinoso_symbol=".parse::<IdentifierType>(),
173/// Ok(IdentifierType::AttrSet)
174/// );
175/// ```
176///
177/// [`Inspect`]: crate::Inspect
178#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
179pub enum IdentifierType {
180 /// Identifier that contains "junk".
181 ///
182 /// Junk mostly equates to non-sigil ASCII symbols. Identifiers like
183 /// `empty?` and `flatten!` are junk idents. All special symbolic Ruby
184 /// methods like `<=>` and `!~` are junk identifiers.
185 ///
186 /// # Examples
187 ///
188 /// ```
189 /// # use spinoso_symbol::IdentifierType;
190 /// assert_eq!("empty?".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
191 /// assert_eq!(
192 /// "flatten!".parse::<IdentifierType>(),
193 /// Ok(IdentifierType::Junk)
194 /// );
195 /// assert_eq!("<=>".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
196 /// assert_eq!("!~".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
197 /// assert_eq!("[]".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
198 /// assert_eq!("[]=".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
199 /// assert_eq!("=~".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
200 /// assert_eq!("==".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
201 /// assert_eq!("===".parse::<IdentifierType>(), Ok(IdentifierType::Junk));
202 /// ```
203 Junk,
204 /// Identifier that is a global variable name.
205 ///
206 /// Global variables are prefixed with the sigil `$`. There are two types of
207 /// global variables:
208 ///
209 /// - `$` followed by a `IdentifierType::Ident` sequence.
210 /// - Special global variables, which include `Regexp` globals (`$1`..`$9`)
211 /// and `$-w` type globals.
212 ///
213 /// # Examples
214 ///
215 /// ```
216 /// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
217 /// assert_eq!(
218 /// "$".parse::<IdentifierType>(),
219 /// Err(ParseIdentifierError::new())
220 /// );
221 /// assert_eq!("$foo".parse::<IdentifierType>(), Ok(IdentifierType::Global));
222 /// assert_eq!(
223 /// "$@foo".parse::<IdentifierType>(),
224 /// Err(ParseIdentifierError::new())
225 /// );
226 /// assert_eq!("$0".parse::<IdentifierType>(), Ok(IdentifierType::Global));
227 /// assert_eq!("$1".parse::<IdentifierType>(), Ok(IdentifierType::Global));
228 /// assert_eq!("$9".parse::<IdentifierType>(), Ok(IdentifierType::Global));
229 /// assert_eq!("$-w".parse::<IdentifierType>(), Ok(IdentifierType::Global));
230 /// assert_eq!(
231 /// "$-www".parse::<IdentifierType>(),
232 /// Err(ParseIdentifierError::new())
233 /// );
234 /// ```
235 Global,
236 /// Identifier that is an instance variable name.
237 ///
238 /// Instance variables are prefixed with a single `@` sigil. The remaining
239 /// bytes must be a valid [`Constant`] or [`Local`] ident.
240 ///
241 /// # Examples
242 ///
243 /// ```
244 /// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
245 /// assert_eq!(
246 /// "@".parse::<IdentifierType>(),
247 /// Err(ParseIdentifierError::new())
248 /// );
249 /// assert_eq!(
250 /// "@foo".parse::<IdentifierType>(),
251 /// Ok(IdentifierType::Instance)
252 /// );
253 /// assert_eq!(
254 /// "@Foo".parse::<IdentifierType>(),
255 /// Ok(IdentifierType::Instance)
256 /// );
257 /// assert_eq!(
258 /// "@FOO".parse::<IdentifierType>(),
259 /// Ok(IdentifierType::Instance)
260 /// );
261 /// assert_eq!(
262 /// "@foo_bar".parse::<IdentifierType>(),
263 /// Ok(IdentifierType::Instance)
264 /// );
265 /// assert_eq!(
266 /// "@FooBar".parse::<IdentifierType>(),
267 /// Ok(IdentifierType::Instance)
268 /// );
269 /// assert_eq!(
270 /// "@FOO_BAR".parse::<IdentifierType>(),
271 /// Ok(IdentifierType::Instance)
272 /// );
273 /// assert_eq!(
274 /// "@$foo".parse::<IdentifierType>(),
275 /// Err(ParseIdentifierError::new())
276 /// );
277 /// assert_eq!(
278 /// "@0".parse::<IdentifierType>(),
279 /// Err(ParseIdentifierError::new())
280 /// );
281 /// assert_eq!(
282 /// "@1".parse::<IdentifierType>(),
283 /// Err(ParseIdentifierError::new())
284 /// );
285 /// assert_eq!(
286 /// "@9".parse::<IdentifierType>(),
287 /// Err(ParseIdentifierError::new())
288 /// );
289 /// ```
290 ///
291 /// [`Constant`]: Self::Constant
292 /// [`Local`]: Self::Local
293 Instance,
294 /// Identifier that is a class variable name.
295 ///
296 /// Class variables are prefixed with a double `@@` sigil. The remaining
297 /// bytes must be a valid [`Constant`] or [`Local`] ident.
298 ///
299 /// # Examples
300 ///
301 /// ```
302 /// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
303 /// assert_eq!(
304 /// "@@".parse::<IdentifierType>(),
305 /// Err(ParseIdentifierError::new())
306 /// );
307 /// assert_eq!("@@foo".parse::<IdentifierType>(), Ok(IdentifierType::Class));
308 /// assert_eq!("@@Foo".parse::<IdentifierType>(), Ok(IdentifierType::Class));
309 /// assert_eq!("@@FOO".parse::<IdentifierType>(), Ok(IdentifierType::Class));
310 /// assert_eq!(
311 /// "@@foo_bar".parse::<IdentifierType>(),
312 /// Ok(IdentifierType::Class)
313 /// );
314 /// assert_eq!(
315 /// "@@FooBar".parse::<IdentifierType>(),
316 /// Ok(IdentifierType::Class)
317 /// );
318 /// assert_eq!(
319 /// "@@FOO_BAR".parse::<IdentifierType>(),
320 /// Ok(IdentifierType::Class)
321 /// );
322 /// assert_eq!(
323 /// "@@$foo".parse::<IdentifierType>(),
324 /// Err(ParseIdentifierError::new())
325 /// );
326 /// assert_eq!(
327 /// "@@0".parse::<IdentifierType>(),
328 /// Err(ParseIdentifierError::new())
329 /// );
330 /// assert_eq!(
331 /// "@@1".parse::<IdentifierType>(),
332 /// Err(ParseIdentifierError::new())
333 /// );
334 /// assert_eq!(
335 /// "@@9".parse::<IdentifierType>(),
336 /// Err(ParseIdentifierError::new())
337 /// );
338 /// ```
339 ///
340 /// [`Constant`]: Self::Constant
341 /// [`Local`]: Self::Local
342 Class,
343 /// Identifier that is an "attribute setter" method name.
344 ///
345 /// `AttrSet` idents end in the `=` sigil and are otherwise valid [`Local`]
346 /// or [`Constant`] idents. `AttrSet` idents cannot have any other "junk"
347 /// symbols.
348 ///
349 /// # Examples
350 ///
351 /// ```
352 /// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
353 /// assert_eq!(
354 /// "Foo=".parse::<IdentifierType>(),
355 /// Ok(IdentifierType::AttrSet)
356 /// );
357 /// assert_eq!(
358 /// "foo=".parse::<IdentifierType>(),
359 /// Ok(IdentifierType::AttrSet)
360 /// );
361 /// assert_eq!(
362 /// "foo_bar=".parse::<IdentifierType>(),
363 /// Ok(IdentifierType::AttrSet)
364 /// );
365 /// assert_eq!(
366 /// "foo_bar?=".parse::<IdentifierType>(),
367 /// Err(ParseIdentifierError::new())
368 /// );
369 /// assert_eq!("ω=".parse::<IdentifierType>(), Ok(IdentifierType::AttrSet));
370 /// ```
371 ///
372 /// [`Constant`]: Self::Constant
373 /// [`Local`]: Self::Local
374 AttrSet,
375 /// Identifier that is a constant name.
376 ///
377 /// Constant names can be either ASCII or Unicode and must start with a
378 /// uppercase character.
379 ///
380 /// # Examples
381 ///
382 /// ```
383 /// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
384 /// assert_eq!(
385 /// "Foo".parse::<IdentifierType>(),
386 /// Ok(IdentifierType::Constant)
387 /// );
388 /// assert_eq!(
389 /// "FOO".parse::<IdentifierType>(),
390 /// Ok(IdentifierType::Constant)
391 /// );
392 /// assert_eq!(
393 /// "FooBar".parse::<IdentifierType>(),
394 /// Ok(IdentifierType::Constant)
395 /// );
396 /// assert_eq!(
397 /// "FOO_BAR".parse::<IdentifierType>(),
398 /// Ok(IdentifierType::Constant)
399 /// );
400 /// assert_eq!("Ω".parse::<IdentifierType>(), Ok(IdentifierType::Constant));
401 /// ```
402 Constant,
403 /// Identifier that is a local variable or method name.
404 ///
405 /// Local names can be either ASCII or Unicode and must start with a
406 /// lowercase character.
407 ///
408 /// # Examples
409 ///
410 /// ```
411 /// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
412 /// assert_eq!("foo".parse::<IdentifierType>(), Ok(IdentifierType::Local));
413 /// assert_eq!("fOO".parse::<IdentifierType>(), Ok(IdentifierType::Local));
414 /// assert_eq!(
415 /// "fooBar".parse::<IdentifierType>(),
416 /// Ok(IdentifierType::Local)
417 /// );
418 /// assert_eq!(
419 /// "foo_bar".parse::<IdentifierType>(),
420 /// Ok(IdentifierType::Local)
421 /// );
422 /// assert_eq!("ω".parse::<IdentifierType>(), Ok(IdentifierType::Local));
423 /// ```
424 Local,
425}
426
427impl IdentifierType {
428 /// Return a new, default `IdentifierType`.
429 ///
430 /// Prefer to use `new()` over `default()` since `new()` is const.
431 ///
432 /// # Examples
433 ///
434 /// ```
435 /// # use spinoso_symbol::IdentifierType;
436 /// const ID_TYPE: IdentifierType = IdentifierType::new();
437 /// assert_eq!(ID_TYPE, IdentifierType::Junk);
438 /// assert_eq!(ID_TYPE, IdentifierType::default());
439 /// ```
440 #[must_use]
441 pub const fn new() -> Self {
442 Self::Junk
443 }
444}
445
446impl Default for IdentifierType {
447 /// Construct a "junk" identifier type.
448 ///
449 /// # Examples
450 ///
451 /// ```
452 /// # use spinoso_symbol::IdentifierType;
453 /// const ID_TYPE: IdentifierType = IdentifierType::new();
454 /// assert_eq!(ID_TYPE, IdentifierType::Junk);
455 /// assert_eq!(ID_TYPE, IdentifierType::default());
456 /// ```
457 #[inline]
458 fn default() -> Self {
459 Self::new()
460 }
461}
462
463impl FromStr for IdentifierType {
464 type Err = ParseIdentifierError;
465
466 #[inline]
467 fn from_str(s: &str) -> Result<Self, Self::Err> {
468 parse(s.as_bytes()).ok_or_else(ParseIdentifierError::new)
469 }
470}
471
472impl TryFrom<&str> for IdentifierType {
473 type Error = ParseIdentifierError;
474
475 #[inline]
476 fn try_from(value: &str) -> Result<Self, Self::Error> {
477 parse(value.as_bytes()).ok_or_else(ParseIdentifierError::new)
478 }
479}
480
481impl TryFrom<&[u8]> for IdentifierType {
482 type Error = ParseIdentifierError;
483
484 #[inline]
485 fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
486 parse(value).ok_or_else(ParseIdentifierError::new)
487 }
488}
489
490/// Error type returned from the [`FromStr`] implementation on [`IdentifierType`].
491///
492/// # Examples
493///
494/// ```
495/// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
496/// const ERR: ParseIdentifierError = ParseIdentifierError::new();
497/// assert_eq!("not a valid ident".parse::<IdentifierType>(), Err(ERR));
498/// ```
499#[derive(Default, Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
500pub struct ParseIdentifierError {
501 _private: (),
502}
503
504impl ParseIdentifierError {
505 /// Construct a new `ParseIdentifierError`.
506 ///
507 /// Prefer to use `new()` over `default()` since `new()` is const.
508 ///
509 /// # Examples
510 ///
511 /// ```
512 /// # use spinoso_symbol::{IdentifierType, ParseIdentifierError};
513 /// const ERR: ParseIdentifierError = ParseIdentifierError::new();
514 /// assert_eq!("not a valid ident".parse::<IdentifierType>(), Err(ERR));
515 /// assert_eq!(ERR, ParseIdentifierError::default());
516 /// ```
517 #[must_use]
518 pub const fn new() -> Self {
519 Self { _private: () }
520 }
521}
522
523impl fmt::Display for ParseIdentifierError {
524 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
525 f.write_str("Failed to parse given string as a known identifier type")
526 }
527}
528
529#[inline]
530fn parse(name: &[u8]) -> Option<IdentifierType> {
531 match name {
532 [] | [b'\0'] => None,
533 // special global variable
534 [b'$', name @ ..] if is_special_global_name(name) => Some(IdentifierType::Global),
535 // global variable
536 [b'$', name @ ..] => parse_ident(name, IdentifierType::Global),
537 // class variable
538 [b'@', b'@', name @ ..] => parse_ident(name, IdentifierType::Class),
539 // instance variable
540 [b'@', name @ ..] => parse_ident(name, IdentifierType::Instance),
541 // Symbolic method names
542 name if is_symbolic_method_name(name) => Some(IdentifierType::Junk),
543 [b'=' | b'!' | b'[', ..] => None,
544 [first, ..] if *first != b'_' && first.is_ascii() && !first.is_ascii_alphabetic() => None,
545 // Constant name
546 name if is_const_name(name) => parse_ident(name, IdentifierType::Constant),
547 // Local variable
548 name => parse_ident(name, IdentifierType::Local),
549 }
550}
551
552#[inline]
553fn parse_ident(name: &[u8], id_type: IdentifierType) -> Option<IdentifierType> {
554 match name {
555 [] => None,
556 [first, name @ .., b'='] if *first != b'_' && first.is_ascii() && !first.is_ascii_alphabetic() => {
557 if let None | Some(IdentifierType::AttrSet) = parse_ident(name, id_type) {
558 None
559 } else {
560 Some(id_type)
561 }
562 }
563 [first, ..] if *first != b'_' && first.is_ascii() && !first.is_ascii_alphabetic() => None,
564 name if is_ident_until(name).is_none() => Some(id_type),
565 [name @ .., b'!' | b'?'] if is_ident_until(name).is_none() => {
566 if matches!(
567 id_type,
568 IdentifierType::Global | IdentifierType::Class | IdentifierType::Instance
569 ) {
570 return None;
571 }
572 Some(IdentifierType::Junk)
573 }
574 [name @ .., b'='] if is_ident_until(name).is_none() => {
575 if matches!(id_type, IdentifierType::Local | IdentifierType::Constant) {
576 return Some(IdentifierType::AttrSet);
577 }
578 None
579 }
580 _ => None,
581 }
582}
583
584#[inline]
585#[expect(clippy::match_same_arms, reason = "for clarity")]
586fn is_special_global_name(name: &[u8]) -> bool {
587 match name {
588 [] => false,
589 [first, rest @ ..] if is_special_global_punct(*first) => rest.is_empty(),
590 [b'-'] => false,
591 [b'-', rest @ ..] if is_next_ident_exhausting(rest) => true,
592 [b'-', ..] => false,
593 name => name.chars().all(char::is_numeric),
594 }
595}
596
597/// Return whether the input is a "junk" symbolic method name.
598///
599/// There are fixed number of valid Ruby method names that only contain ASCII
600/// symbols.
601#[inline]
602fn is_symbolic_method_name(name: &[u8]) -> bool {
603 matches!(
604 name,
605 b"<" | b"<<"
606 | b"<="
607 | b"<=>"
608 | b">"
609 | b">>"
610 | b">="
611 | b"=~"
612 | b"=="
613 | b"==="
614 | b"*"
615 | b"**"
616 | b"+"
617 | b"-"
618 | b"+@"
619 | b"-@"
620 | b"|"
621 | b"^"
622 | b"&"
623 | b"/"
624 | b"%"
625 | b"~"
626 | b"`"
627 | b"[]"
628 | b"[]="
629 | b"!"
630 | b"!="
631 | b"!~"
632 )
633}
634
635/// Return whether the input is a valid constant name.
636///
637/// Constant names require the first character to be either ASCII or Unicode
638/// uppercase.
639#[inline]
640fn is_const_name(name: &[u8]) -> bool {
641 match name {
642 [] => false,
643 name if name.is_ascii() => name.iter().next().is_some_and(u8::is_ascii_uppercase),
644 // uses Unicode `Uppercase` property
645 name if name.is_utf8() => name.chars().next().is_some_and(char::is_uppercase),
646 _ => false,
647 }
648}
649
650/// Determine if a [`char`] can be used in a valid identifier.
651///
652/// # Header declaration
653///
654/// Ported from the following C macro in `string.c`:
655///
656/// ```c
657/// #define is_identchar(p,e,enc) (ISALNUM((unsigned char)*(p)) || (*(p)) == '_' || !ISASCII(*(p)))
658/// ```
659#[inline]
660fn is_ident_char(ch: char) -> bool {
661 ch.is_alphanumeric() || ch == '_' || !ch.is_ascii()
662}
663
664/// Consume the input until a non-ident character is found.
665///
666/// Scan the [`char`]s in the input until either invalid UTF-8 or an invalid
667/// ident is found. See [`is_ident_char`].
668///
669/// This method returns `Some(index)` of the start of the first invalid ident
670/// or `None` if the whole input is a valid ident.
671///
672/// Empty slices are not valid idents.
673#[inline]
674fn is_ident_until(mut name: &[u8]) -> Option<usize> {
675 // Empty strings are not idents.
676 if name.is_empty() {
677 return Some(0);
678 }
679 let mut start = 0;
680 while !name.is_empty() {
681 let (ch, size) = bstr::decode_utf8(name);
682 match ch {
683 Some(ch) if !is_ident_char(ch) => return Some(start),
684 None => return Some(start),
685 Some(_) => {
686 name = &name[size..];
687 start += size;
688 }
689 }
690 }
691 None
692}
693
694/// Determine if the next char is a valid ident char and consumes all bytes in
695/// the input.
696///
697/// This function is used to determine whether certain kinds of single character
698/// globals are valid idents.
699///
700/// See also [`is_ident_char`].
701#[inline]
702fn is_next_ident_exhausting(name: &[u8]) -> bool {
703 let (ch, size) = bstr::decode_utf8(name);
704 match ch {
705 Some(ch) if is_ident_char(ch) => name.len() == size,
706 Some(_) | None => false,
707 }
708}
709
710// This function is defined by a macro in `parse.y` in MRI.
711//
712// ```c
713// #define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
714// #define SPECIAL_PUNCT(idx) ( \
715// BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
716// BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
717// BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
718// BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
719// BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
720// BIT('0', idx))
721// const unsigned int ruby_global_name_punct_bits[] = {
722// SPECIAL_PUNCT(0),
723// SPECIAL_PUNCT(1),
724// SPECIAL_PUNCT(2),
725// };
726// ```
727//
728// The contents of `ruby_global_name_punct_bits` are:
729//
730// ```console
731// [2.6.6] > def bit(c, idx); c / 32 - 1 == idx ? 1 << (c % 32) : 0; end
732// [2.6.6] > chars = ["~", "*", "$", "?", "!", "@", "/", "\\", ";", ",", ".", "=", ":", "<", ">", "\"", "&", "`", "'", "+", "0"]
733//
734// [2.6.6] > chars.map(&:ord).map { |ch| bit(ch, 0) }.reduce(0, :|)
735// => 4227980502
736// [2.6.6] > chars.map(&:ord).map { |ch| bit(ch, 1) }.reduce(0, :|)
737// => 268435457
738// [2.6.6] > chars.map(&:ord).map { |ch| bit(ch, 2) }.reduce(0, :|)
739// => 1073741825
740// ```
741//
742// Which corresponds to a fixed set of 21 ASCII symbols:
743//
744// ```ruby
745// def is_special_global_punct(ch)
746// idx = (ch - 0x20) / 32;
747// case idx
748// when 0 then (4_227_980_502 >> (ch % 32)) & 1 > 0
749// when 1 then (268_435_457 >> (ch % 32)) & 1 > 0
750// when 2 then (1_073_741_825 >> (ch % 32)) & 1 > 0
751// else
752// false
753// end
754// end
755//
756// h = {}
757// (0..255).each do |ch|
758// h[ch.chr] = ch if is_special_global_punct(ch)
759// end
760// h.keys.map {|k| "b'#{k.inspect[1..-2]}'"}.join(" | ")
761// ```
762//
763// TODO: Switch to generating this table inside the const function once const
764// functions are expressive enough. This requires const `match`, `if`, and loop
765// which will be stable in Rust 1.46.0.
766#[inline]
767fn is_special_global_punct(ch: u8) -> bool {
768 matches!(
769 ch,
770 b'!' | b'"'
771 | b'$'
772 | b'&'
773 | b'\''
774 | b'*'
775 | b'+'
776 | b','
777 | b'.'
778 | b'/'
779 | b'0'
780 | b':'
781 | b';'
782 | b'<'
783 | b'='
784 | b'>'
785 | b'?'
786 | b'@'
787 | b'\\'
788 | b'`'
789 | b'~'
790 )
791}
792
793#[cfg(test)]
794mod tests {
795 use super::{
796 IdentifierType, ParseIdentifierError, is_ident_until, is_next_ident_exhausting, is_special_global_name,
797 };
798
799 #[test]
800 fn special_global_name() {
801 let name = &b"a"[..];
802 assert!(!is_special_global_name(name));
803 let name = "💎";
804 assert!(!is_special_global_name(name.as_bytes()));
805 let name = &b"ab"[..];
806 assert!(!is_special_global_name(name));
807 let name = "-💎";
808 assert!(is_special_global_name(name.as_bytes()));
809 let name = &b"$"[..];
810 assert!(is_special_global_name(name));
811 let name = &b"~"[..];
812 assert!(is_special_global_name(name));
813 let name = "�";
814 assert!(!is_special_global_name(name.as_bytes()));
815 let name = "-�";
816 assert!(is_special_global_name(name.as_bytes()));
817 }
818
819 #[test]
820 fn is_ident_until_empty() {
821 let name = &[];
822 assert_eq!(is_ident_until(name), Some(0));
823 }
824
825 #[test]
826 fn is_ident_until_lowercase_ascii() {
827 let name = &b"abc"[..];
828 assert_eq!(is_ident_until(name), None);
829 let name = &b"abc_123"[..];
830 assert_eq!(is_ident_until(name), None);
831 let name = &b"_"[..];
832 assert_eq!(is_ident_until(name), None);
833 let name = &b"_e"[..];
834 assert_eq!(is_ident_until(name), None);
835 let name = &b"_1"[..];
836 assert_eq!(is_ident_until(name), None);
837 }
838
839 #[test]
840 fn is_ident_until_ascii_constant() {
841 let name = &b"Abc"[..];
842 assert_eq!(is_ident_until(name), None);
843 let name = &b"ABC"[..];
844 assert_eq!(is_ident_until(name), None);
845 let name = &b"ABC_XYZ"[..];
846 assert_eq!(is_ident_until(name), None);
847 let name = &b"ABC_123"[..];
848 assert_eq!(is_ident_until(name), None);
849 let name = &b"HTTP2"[..];
850 assert_eq!(is_ident_until(name), None);
851 }
852
853 #[test]
854 fn is_ident_until_unicode() {
855 let name = "ábc";
856 assert_eq!(is_ident_until(name.as_bytes()), None);
857 let name = "abç";
858 assert_eq!(is_ident_until(name.as_bytes()), None);
859 let name = "abc_�";
860 assert_eq!(is_ident_until(name.as_bytes()), None);
861 let name = "abc_💎";
862 assert_eq!(is_ident_until(name.as_bytes()), None);
863
864 let name = "Ábc";
865 assert_eq!(is_ident_until(name.as_bytes()), None);
866 let name = "Abç";
867 assert_eq!(is_ident_until(name.as_bytes()), None);
868 let name = "Abc_�";
869 assert_eq!(is_ident_until(name.as_bytes()), None);
870 let name = "Abc_💎";
871 assert_eq!(is_ident_until(name.as_bytes()), None);
872
873 let name = "💎";
874 assert_eq!(is_ident_until(name.as_bytes()), None);
875 let name = "💎abc";
876 assert_eq!(is_ident_until(name.as_bytes()), None);
877 }
878
879 #[test]
880 fn is_ident_until_invalid_utf8() {
881 let name = &b"\xFF"[..];
882 assert_eq!(is_ident_until(name), Some(0));
883 let name = &b"abc\xFF"[..];
884 assert_eq!(is_ident_until(name), Some(3));
885 let name = &b"abc\xFFxyz"[..];
886 assert_eq!(is_ident_until(name), Some(3));
887
888 let name = &b"\xFF\xFE"[..];
889 assert_eq!(is_ident_until(name), Some(0));
890 let name = &b"abc\xFF\xFE"[..];
891 assert_eq!(is_ident_until(name), Some(3));
892 let name = &b"abc\xFF\xFExyz"[..];
893 assert_eq!(is_ident_until(name), Some(3));
894
895 let name = &b"\xEF\xBF\xBD\xFF"[..];
896 assert_eq!(is_ident_until(name), Some(3));
897 let name = &b"\xF0\x9F\x92\x8E\xFF"[..];
898 assert_eq!(is_ident_until(name), Some(4));
899 }
900
901 #[test]
902 fn is_next_ident_exhausting_empty() {
903 let name = &[];
904 assert!(!is_next_ident_exhausting(name));
905 }
906
907 #[test]
908 fn is_next_ident_exhausting_lowercase_ascii() {
909 let name = &b"a"[..];
910 assert!(is_next_ident_exhausting(name));
911 let name = &b"abc"[..];
912 assert!(!is_next_ident_exhausting(name));
913 let name = &b"1"[..];
914 assert!(is_next_ident_exhausting(name));
915 let name = &b"abc_123"[..];
916 assert!(!is_next_ident_exhausting(name));
917 let name = &b"_"[..];
918 assert!(is_next_ident_exhausting(name));
919 let name = &b"_e"[..];
920 assert!(!is_next_ident_exhausting(name));
921 let name = &b"_1"[..];
922 assert!(!is_next_ident_exhausting(name));
923 }
924
925 #[test]
926 fn is_next_ident_exhausting_ascii_constant() {
927 let name = &b"A"[..];
928 assert!(is_next_ident_exhausting(name));
929 let name = &b"Abc"[..];
930 assert!(!is_next_ident_exhausting(name));
931 let name = &b"ABC"[..];
932 assert!(!is_next_ident_exhausting(name));
933 let name = &b"ABC_XYZ"[..];
934 assert!(!is_next_ident_exhausting(name));
935 let name = &b"ABC_123"[..];
936 assert!(!is_next_ident_exhausting(name));
937 let name = &b"HTTP2"[..];
938 assert!(!is_next_ident_exhausting(name));
939 }
940
941 #[test]
942 fn is_next_ident_exhausting_unicode() {
943 let name = "ábc";
944 assert!(!is_next_ident_exhausting(name.as_bytes()));
945 let name = "abç";
946 assert!(!is_next_ident_exhausting(name.as_bytes()));
947 let name = "abc_�";
948 assert!(!is_next_ident_exhausting(name.as_bytes()));
949 let name = "abc_💎";
950 assert!(!is_next_ident_exhausting(name.as_bytes()));
951
952 let name = "Ábc";
953 assert!(!is_next_ident_exhausting(name.as_bytes()));
954 let name = "Abç";
955 assert!(!is_next_ident_exhausting(name.as_bytes()));
956 let name = "Abc_�";
957 assert!(!is_next_ident_exhausting(name.as_bytes()));
958 let name = "Abc_💎";
959 assert!(!is_next_ident_exhausting(name.as_bytes()));
960 let name = "💎abc";
961 assert!(!is_next_ident_exhausting(name.as_bytes()));
962
963 let name = "á";
964 assert!(is_next_ident_exhausting(name.as_bytes()));
965 let name = "ç";
966 assert!(is_next_ident_exhausting(name.as_bytes()));
967 let name = "�";
968 assert!(is_next_ident_exhausting(name.as_bytes()));
969 let name = "💎";
970 assert!(is_next_ident_exhausting(name.as_bytes()));
971 }
972
973 #[test]
974 fn is_next_ident_exhausting_invalid_utf8() {
975 let name = &b"\xFF"[..];
976 assert!(!is_next_ident_exhausting(name));
977 let name = &b"abc\xFF"[..];
978 assert!(!is_next_ident_exhausting(name));
979 let name = &b"abc\xFFxyz"[..];
980 assert!(!is_next_ident_exhausting(name));
981
982 let name = &b"\xFF\xFE"[..];
983 assert!(!is_next_ident_exhausting(name));
984 let name = &b"abc\xFF\xFE"[..];
985 assert!(!is_next_ident_exhausting(name));
986 let name = &b"abc\xFF\xFExyz"[..];
987 assert!(!is_next_ident_exhausting(name));
988
989 let name = &b"\xEF\xBF\xBD\xFF"[..];
990 assert!(!is_next_ident_exhausting(name));
991 let name = &b"\xF0\x9F\x92\x8E\xFF"[..];
992 assert!(!is_next_ident_exhausting(name));
993 }
994
995 #[test]
996 fn ascii_ident() {
997 assert_eq!("foobar".parse::<IdentifierType>(), Ok(IdentifierType::Local));
998 assert_eq!("ruby_is_simple".parse::<IdentifierType>(), Ok(IdentifierType::Local));
999 }
1000
1001 #[test]
1002 fn ascii_constant() {
1003 assert_eq!("Foobar".parse::<IdentifierType>(), Ok(IdentifierType::Constant));
1004 assert_eq!("FooBar".parse::<IdentifierType>(), Ok(IdentifierType::Constant));
1005 assert_eq!("FOOBAR".parse::<IdentifierType>(), Ok(IdentifierType::Constant));
1006 assert_eq!("FOO_BAR".parse::<IdentifierType>(), Ok(IdentifierType::Constant));
1007 assert_eq!("RUBY_IS_SIMPLE".parse::<IdentifierType>(), Ok(IdentifierType::Constant));
1008 }
1009
1010 #[test]
1011 fn empty() {
1012 assert_eq!("".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1013 }
1014
1015 #[test]
1016 fn single_nul() {
1017 assert_eq!("\0".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1018 }
1019
1020 #[test]
1021 fn non_ascii_numerics() {
1022 assert_eq!("١".parse::<IdentifierType>(), Ok(IdentifierType::Local));
1023 assert_eq!(
1024 "١١١١١١١١١١١١١١١١١١".parse::<IdentifierType>(),
1025 Ok(IdentifierType::Local)
1026 );
1027 assert_eq!("①".parse::<IdentifierType>(), Ok(IdentifierType::Local));
1028 }
1029
1030 #[test]
1031 fn recursive_ident() {
1032 assert_eq!("@@@foo".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1033 assert_eq!("@@@@foo".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1034 assert_eq!("@$foo".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1035 assert_eq!("@$-w".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1036 assert_eq!("@@$foo".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1037 assert_eq!("@@$-w".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1038 assert_eq!("$@foo".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1039 assert_eq!("$@@foo".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1040 assert_eq!("$$-w".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1041 }
1042
1043 #[test]
1044 fn attr_bang() {
1045 assert_eq!("@foo!".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1046 assert_eq!("@@foo!".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1047 assert_eq!("$foo!".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1048 }
1049
1050 #[test]
1051 fn attr_question() {
1052 assert_eq!("@foo?".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1053 assert_eq!("@@foo?".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1054 assert_eq!("$foo?".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1055 }
1056
1057 #[test]
1058 fn attr_setter() {
1059 assert_eq!("@foo=".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1060 assert_eq!("@@foo=".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1061 assert_eq!("$foo=".parse::<IdentifierType>(), Err(ParseIdentifierError::new()));
1062 }
1063
1064 #[test]
1065 fn invalid_utf8() {
1066 assert_eq!(
1067 IdentifierType::try_from(&b"invalid-\xFF-utf8"[..]),
1068 Err(ParseIdentifierError::new())
1069 );
1070 }
1071
1072 #[test]
1073 fn emoji() {
1074 assert_eq!(IdentifierType::try_from("💎"), Ok(IdentifierType::Local));
1075 assert_eq!(IdentifierType::try_from("$💎"), Ok(IdentifierType::Global));
1076 assert_eq!(IdentifierType::try_from("@💎"), Ok(IdentifierType::Instance));
1077 assert_eq!(IdentifierType::try_from("@@💎"), Ok(IdentifierType::Class));
1078 }
1079
1080 #[test]
1081 fn unicode_replacement_char() {
1082 assert_eq!(IdentifierType::try_from("�"), Ok(IdentifierType::Local));
1083 assert_eq!(IdentifierType::try_from("$�"), Ok(IdentifierType::Global));
1084 assert_eq!(IdentifierType::try_from("@�"), Ok(IdentifierType::Instance));
1085 assert_eq!(IdentifierType::try_from("@@�"), Ok(IdentifierType::Class));
1086
1087 assert_eq!(IdentifierType::try_from("abc�"), Ok(IdentifierType::Local));
1088 assert_eq!(IdentifierType::try_from("$abc�"), Ok(IdentifierType::Global));
1089 assert_eq!(IdentifierType::try_from("@abc�"), Ok(IdentifierType::Instance));
1090 assert_eq!(IdentifierType::try_from("@@abc�"), Ok(IdentifierType::Class));
1091 }
1092
1093 #[test]
1094 fn invalid_utf8_special_global() {
1095 assert_eq!(
1096 IdentifierType::try_from(&b"$-\xFF"[..]),
1097 Err(ParseIdentifierError::new())
1098 );
1099 }
1100
1101 #[test]
1102 fn replacement_char_special_global() {
1103 assert_eq!(IdentifierType::try_from("$-�"), Ok(IdentifierType::Global));
1104 assert_eq!(IdentifierType::try_from("$-�a"), Err(ParseIdentifierError::new()));
1105 assert_eq!(IdentifierType::try_from("$-��"), Err(ParseIdentifierError::new()));
1106 }
1107}
1108
1109#[cfg(test)]
1110mod specs {
1111 use super::IdentifierType;
1112
1113 // From `spec/core/symbol/inspect_spec.rb`:
1114 //
1115 // ```ruby
1116 // symbols = {
1117 // fred: ":fred",
1118 // :fred? => ":fred?",
1119 // :fred! => ":fred!",
1120 // :$ruby => ":$ruby",
1121 // :@ruby => ":@ruby",
1122 // :@@ruby => ":@@ruby",
1123 // :"$ruby!" => ":\"$ruby!\"",
1124 // :"$ruby?" => ":\"$ruby?\"",
1125 // :"@ruby!" => ":\"@ruby!\"",
1126 // :"@ruby?" => ":\"@ruby?\"",
1127 // :"@@ruby!" => ":\"@@ruby!\"",
1128 // :"@@ruby?" => ":\"@@ruby?\"",
1129 //
1130 // :$-w => ":$-w",
1131 // :"$-ww" => ":\"$-ww\"",
1132 // :"$+" => ":$+",
1133 // :"$~" => ":$~",
1134 // :"$:" => ":$:",
1135 // :"$?" => ":$?",
1136 // :"$<" => ":$<",
1137 // :"$_" => ":$_",
1138 // :"$/" => ":$/",
1139 // :"$'" => ":$'",
1140 // :"$\"" => ":$\"",
1141 // :"$$" => ":$$",
1142 // :"$." => ":$.",
1143 // :"$," => ":$,",
1144 // :"$`" => ":$`",
1145 // :"$!" => ":$!",
1146 // :"$;" => ":$;",
1147 // :"$\\" => ":$\\",
1148 // :"$=" => ":$=",
1149 // :"$*" => ":$*",
1150 // :"$>" => ":$>",
1151 // :"$&" => ":$&",
1152 // :"$@" => ":$@",
1153 // :"$1234" => ":$1234",
1154 //
1155 // :-@ => ":-@",
1156 // :+@ => ":+@",
1157 // :% => ":%",
1158 // :& => ":&",
1159 // :* => ":*",
1160 // :** => ":**",
1161 // :"/" => ":/", # lhs quoted for emacs happiness
1162 // :< => ":<",
1163 // :<= => ":<=",
1164 // :<=> => ":<=>",
1165 // :== => ":==",
1166 // :=== => ":===",
1167 // :=~ => ":=~",
1168 // :> => ":>",
1169 // :>= => ":>=",
1170 // :>> => ":>>",
1171 // :[] => ":[]",
1172 // :[]= => ":[]=",
1173 // :"\<\<" => ":\<\<",
1174 // :^ => ":^",
1175 // :"`" => ":`", # for emacs, and justice!
1176 // :~ => ":~",
1177 // :| => ":|",
1178 //
1179 // :"!" => [":\"!\"", ":!" ],
1180 // :"!=" => [":\"!=\"", ":!="],
1181 // :"!~" => [":\"!~\"", ":!~"],
1182 // :"\$" => ":\"$\"", # for justice!
1183 // :"&&" => ":\"&&\"",
1184 // :"'" => ":\"\'\"",
1185 // :"," => ":\",\"",
1186 // :"." => ":\".\"",
1187 // :".." => ":\"..\"",
1188 // :"..." => ":\"...\"",
1189 // :":" => ":\":\"",
1190 // :"::" => ":\"::\"",
1191 // :";" => ":\";\"",
1192 // :"=" => ":\"=\"",
1193 // :"=>" => ":\"=>\"",
1194 // :"\?" => ":\"?\"", # rawr!
1195 // :"@" => ":\"@\"",
1196 // :"||" => ":\"||\"",
1197 // :"|||" => ":\"|||\"",
1198 // :"++" => ":\"++\"",
1199 //
1200 // :"\"" => ":\"\\\"\"",
1201 // :"\"\"" => ":\"\\\"\\\"\"",
1202 //
1203 // :"9" => ":\"9\"",
1204 // :"foo bar" => ":\"foo bar\"",
1205 // :"*foo" => ":\"*foo\"",
1206 // :"foo " => ":\"foo \"",
1207 // :" foo" => ":\" foo\"",
1208 // :" " => ":\" \"",
1209 // }
1210 // ```
1211
1212 #[test]
1213 fn specs() {
1214 // idents
1215 assert!("fred".parse::<IdentifierType>().is_ok());
1216 assert!("fred?".parse::<IdentifierType>().is_ok());
1217 assert!("fred!".parse::<IdentifierType>().is_ok());
1218 assert!("$ruby".parse::<IdentifierType>().is_ok());
1219 assert!("@ruby".parse::<IdentifierType>().is_ok());
1220 assert!("@@ruby".parse::<IdentifierType>().is_ok());
1221
1222 // idents can't end in bang or question
1223 assert!("$ruby!".parse::<IdentifierType>().is_err());
1224 assert!("$ruby?".parse::<IdentifierType>().is_err());
1225 assert!("@ruby!".parse::<IdentifierType>().is_err());
1226 assert!("@ruby?".parse::<IdentifierType>().is_err());
1227 assert!("@@ruby!".parse::<IdentifierType>().is_err());
1228 assert!("@@ruby?".parse::<IdentifierType>().is_err());
1229
1230 // globals
1231 assert!("$-w".parse::<IdentifierType>().is_ok());
1232 assert!("$-ww".parse::<IdentifierType>().is_err());
1233 assert!("$+".parse::<IdentifierType>().is_ok());
1234 assert!("$~".parse::<IdentifierType>().is_ok());
1235 assert!("$:".parse::<IdentifierType>().is_ok());
1236 assert!("$?".parse::<IdentifierType>().is_ok());
1237 assert!("$<".parse::<IdentifierType>().is_ok());
1238 assert!("$_".parse::<IdentifierType>().is_ok());
1239 assert!("$/".parse::<IdentifierType>().is_ok());
1240 assert!("$\"".parse::<IdentifierType>().is_ok());
1241 assert!("$$".parse::<IdentifierType>().is_ok());
1242 assert!("$.".parse::<IdentifierType>().is_ok());
1243 assert!("$,".parse::<IdentifierType>().is_ok());
1244 assert!("$`".parse::<IdentifierType>().is_ok());
1245 assert!("$!".parse::<IdentifierType>().is_ok());
1246 assert!("$;".parse::<IdentifierType>().is_ok());
1247 assert!("$\\".parse::<IdentifierType>().is_ok());
1248 assert!("$=".parse::<IdentifierType>().is_ok());
1249 assert!("$*".parse::<IdentifierType>().is_ok());
1250 assert!("$>".parse::<IdentifierType>().is_ok());
1251 assert!("$&".parse::<IdentifierType>().is_ok());
1252 assert!("$@".parse::<IdentifierType>().is_ok());
1253 assert!("$1234".parse::<IdentifierType>().is_ok());
1254
1255 // symbolic methods
1256 assert!("-@".parse::<IdentifierType>().is_ok());
1257 assert!("+@".parse::<IdentifierType>().is_ok());
1258 assert!("%".parse::<IdentifierType>().is_ok());
1259 assert!("&".parse::<IdentifierType>().is_ok());
1260 assert!("*".parse::<IdentifierType>().is_ok());
1261 assert!("**".parse::<IdentifierType>().is_ok());
1262 assert!("/".parse::<IdentifierType>().is_ok());
1263 assert!("<".parse::<IdentifierType>().is_ok());
1264 assert!("<=".parse::<IdentifierType>().is_ok());
1265 assert!("<=>".parse::<IdentifierType>().is_ok());
1266 assert!("==".parse::<IdentifierType>().is_ok());
1267 assert!("===".parse::<IdentifierType>().is_ok());
1268 assert!("=~".parse::<IdentifierType>().is_ok());
1269 assert!(">".parse::<IdentifierType>().is_ok());
1270 assert!(">=".parse::<IdentifierType>().is_ok());
1271 assert!(">>".parse::<IdentifierType>().is_ok());
1272 assert!("[]".parse::<IdentifierType>().is_ok());
1273 assert!("[]=".parse::<IdentifierType>().is_ok());
1274 assert!("<<".parse::<IdentifierType>().is_ok());
1275 assert!("^".parse::<IdentifierType>().is_ok());
1276 assert!("`".parse::<IdentifierType>().is_ok());
1277 assert!("~".parse::<IdentifierType>().is_ok());
1278 assert!("|".parse::<IdentifierType>().is_ok());
1279
1280 // non-symbol symbolics
1281 assert!("!".parse::<IdentifierType>().is_ok());
1282 assert!("!=".parse::<IdentifierType>().is_ok());
1283 assert!("!~".parse::<IdentifierType>().is_ok());
1284 assert!("$".parse::<IdentifierType>().is_err());
1285 assert!("&&".parse::<IdentifierType>().is_err());
1286 assert!("'".parse::<IdentifierType>().is_err());
1287 assert!(",".parse::<IdentifierType>().is_err());
1288 assert!(".".parse::<IdentifierType>().is_err());
1289 assert!("..".parse::<IdentifierType>().is_err());
1290 assert!("...".parse::<IdentifierType>().is_err());
1291 assert!(":".parse::<IdentifierType>().is_err());
1292 assert!("::".parse::<IdentifierType>().is_err());
1293 assert!(";".parse::<IdentifierType>().is_err());
1294 assert!("=".parse::<IdentifierType>().is_err());
1295 assert!("=>".parse::<IdentifierType>().is_err());
1296 assert!("?".parse::<IdentifierType>().is_err());
1297 assert!("@".parse::<IdentifierType>().is_err());
1298 assert!("||".parse::<IdentifierType>().is_err());
1299 assert!("|||".parse::<IdentifierType>().is_err());
1300 assert!("++".parse::<IdentifierType>().is_err());
1301
1302 // quotes
1303 assert!(r#"""#.parse::<IdentifierType>().is_err());
1304 assert!(r#""""#.parse::<IdentifierType>().is_err());
1305
1306 assert!("9".parse::<IdentifierType>().is_err());
1307 assert!("foo bar".parse::<IdentifierType>().is_err());
1308 assert!("*foo".parse::<IdentifierType>().is_err());
1309 assert!("foo ".parse::<IdentifierType>().is_err());
1310 assert!(" foo".parse::<IdentifierType>().is_err());
1311 assert!(" ".parse::<IdentifierType>().is_err());
1312 }
1313}
1314
1315/// Tests generated from symbols loaded at MRI interpreter boot.
1316///
1317/// # Generation
1318///
1319/// ```shell
1320/// cat <<EOF | ruby --disable-gems --disable-did_you_mean
1321/// def boot_identifier_symbols
1322/// syms = Symbol.all_symbols.map(&:inspect)
1323/// # remove symbols that must be debug wrapped in quotes
1324/// syms = syms.reject { |s| s[0..1] == ':"' }
1325///
1326/// fixture = syms.map { |s| "r##\"#{s[1..]}\"##" }
1327/// puts fixture.join(",\n")
1328/// end
1329///
1330/// boot_identifier_symbols
1331/// EOF
1332/// ```
1333#[cfg(test)]
1334mod functionals {
1335 use super::IdentifierType;
1336 use crate::fixtures::IDENTS;
1337
1338 #[test]
1339 fn mri_symbol_idents() {
1340 for &sym in IDENTS {
1341 assert!(
1342 sym.parse::<IdentifierType>().is_ok(),
1343 "'{sym}' should parse as a valid identifier, but did not."
1344 );
1345 }
1346 }
1347}