proc_macro2/
parse.rs

1use crate::fallback::{
2    self, is_ident_continue, is_ident_start, Group, Ident, LexError, Literal, Span, TokenStream,
3    TokenStreamBuilder,
4};
5use crate::{Delimiter, Punct, Spacing, TokenTree};
6use core::char;
7use core::str::{Bytes, CharIndices, Chars};
8
9#[derive(Copy, Clone, Eq, PartialEq)]
10pub(crate) struct Cursor<'a> {
11    pub(crate) rest: &'a str,
12    #[cfg(span_locations)]
13    pub(crate) off: u32,
14}
15
16impl<'a> Cursor<'a> {
17    pub(crate) fn advance(&self, bytes: usize) -> Cursor<'a> {
18        let (_front, rest) = self.rest.split_at(bytes);
19        Cursor {
20            rest,
21            #[cfg(span_locations)]
22            off: self.off + _front.chars().count() as u32,
23        }
24    }
25
26    pub(crate) fn starts_with(&self, s: &str) -> bool {
27        self.rest.starts_with(s)
28    }
29
30    pub(crate) fn starts_with_char(&self, ch: char) -> bool {
31        self.rest.starts_with(ch)
32    }
33
34    pub(crate) fn starts_with_fn<Pattern>(&self, f: Pattern) -> bool
35    where
36        Pattern: FnMut(char) -> bool,
37    {
38        self.rest.starts_with(f)
39    }
40
41    pub(crate) fn is_empty(&self) -> bool {
42        self.rest.is_empty()
43    }
44
45    fn len(&self) -> usize {
46        self.rest.len()
47    }
48
49    fn as_bytes(&self) -> &'a [u8] {
50        self.rest.as_bytes()
51    }
52
53    fn bytes(&self) -> Bytes<'a> {
54        self.rest.bytes()
55    }
56
57    fn chars(&self) -> Chars<'a> {
58        self.rest.chars()
59    }
60
61    fn char_indices(&self) -> CharIndices<'a> {
62        self.rest.char_indices()
63    }
64
65    fn parse(&self, tag: &str) -> Result<Cursor<'a>, Reject> {
66        if self.starts_with(tag) {
67            Ok(self.advance(tag.len()))
68        } else {
69            Err(Reject)
70        }
71    }
72}
73
74pub(crate) struct Reject;
75type PResult<'a, O> = Result<(Cursor<'a>, O), Reject>;
76
77fn skip_whitespace(input: Cursor) -> Cursor {
78    let mut s = input;
79
80    while !s.is_empty() {
81        let byte = s.as_bytes()[0];
82        if byte == b'/' {
83            if s.starts_with("//")
84                && (!s.starts_with("///") || s.starts_with("////"))
85                && !s.starts_with("//!")
86            {
87                let (cursor, _) = take_until_newline_or_eof(s);
88                s = cursor;
89                continue;
90            } else if s.starts_with("/**/") {
91                s = s.advance(4);
92                continue;
93            } else if s.starts_with("/*")
94                && (!s.starts_with("/**") || s.starts_with("/***"))
95                && !s.starts_with("/*!")
96            {
97                match block_comment(s) {
98                    Ok((rest, _)) => {
99                        s = rest;
100                        continue;
101                    }
102                    Err(Reject) => return s,
103                }
104            }
105        }
106        match byte {
107            b' ' | 0x09..=0x0d => {
108                s = s.advance(1);
109                continue;
110            }
111            b if b.is_ascii() => {}
112            _ => {
113                let ch = s.chars().next().unwrap();
114                if is_whitespace(ch) {
115                    s = s.advance(ch.len_utf8());
116                    continue;
117                }
118            }
119        }
120        return s;
121    }
122    s
123}
124
125fn block_comment(input: Cursor) -> PResult<&str> {
126    if !input.starts_with("/*") {
127        return Err(Reject);
128    }
129
130    let mut depth = 0usize;
131    let bytes = input.as_bytes();
132    let mut i = 0usize;
133    let upper = bytes.len() - 1;
134
135    while i < upper {
136        if bytes[i] == b'/' && bytes[i + 1] == b'*' {
137            depth += 1;
138            i += 1; // eat '*'
139        } else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
140            depth -= 1;
141            if depth == 0 {
142                return Ok((input.advance(i + 2), &input.rest[..i + 2]));
143            }
144            i += 1; // eat '/'
145        }
146        i += 1;
147    }
148
149    Err(Reject)
150}
151
152fn is_whitespace(ch: char) -> bool {
153    // Rust treats left-to-right mark and right-to-left mark as whitespace
154    ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
155}
156
157fn word_break(input: Cursor) -> Result<Cursor, Reject> {
158    match input.chars().next() {
159        Some(ch) if is_ident_continue(ch) => Err(Reject),
160        Some(_) | None => Ok(input),
161    }
162}
163
164// Rustc's representation of a macro expansion error in expression position or
165// type position.
166const ERROR: &str = "(/*ERROR*/)";
167
168pub(crate) fn token_stream(mut input: Cursor) -> Result<TokenStream, LexError> {
169    let mut trees = TokenStreamBuilder::new();
170    let mut stack = Vec::new();
171
172    loop {
173        input = skip_whitespace(input);
174
175        if let Ok((rest, ())) = doc_comment(input, &mut trees) {
176            input = rest;
177            continue;
178        }
179
180        #[cfg(span_locations)]
181        let lo = input.off;
182
183        let first = match input.bytes().next() {
184            Some(first) => first,
185            None => match stack.last() {
186                None => return Ok(trees.build()),
187                #[cfg(span_locations)]
188                Some((lo, _frame)) => {
189                    return Err(LexError {
190                        span: Span { lo: *lo, hi: *lo },
191                    })
192                }
193                #[cfg(not(span_locations))]
194                Some(_frame) => return Err(LexError { span: Span {} }),
195            },
196        };
197
198        if let Some(open_delimiter) = match first {
199            b'(' if !input.starts_with(ERROR) => Some(Delimiter::Parenthesis),
200            b'[' => Some(Delimiter::Bracket),
201            b'{' => Some(Delimiter::Brace),
202            _ => None,
203        } {
204            input = input.advance(1);
205            let frame = (open_delimiter, trees);
206            #[cfg(span_locations)]
207            let frame = (lo, frame);
208            stack.push(frame);
209            trees = TokenStreamBuilder::new();
210        } else if let Some(close_delimiter) = match first {
211            b')' => Some(Delimiter::Parenthesis),
212            b']' => Some(Delimiter::Bracket),
213            b'}' => Some(Delimiter::Brace),
214            _ => None,
215        } {
216            let frame = match stack.pop() {
217                Some(frame) => frame,
218                None => return Err(lex_error(input)),
219            };
220            #[cfg(span_locations)]
221            let (lo, frame) = frame;
222            let (open_delimiter, outer) = frame;
223            if open_delimiter != close_delimiter {
224                return Err(lex_error(input));
225            }
226            input = input.advance(1);
227            let mut g = Group::new(open_delimiter, trees.build());
228            g.set_span(Span {
229                #[cfg(span_locations)]
230                lo,
231                #[cfg(span_locations)]
232                hi: input.off,
233            });
234            trees = outer;
235            trees.push_token_from_parser(TokenTree::Group(crate::Group::_new_fallback(g)));
236        } else {
237            let (rest, mut tt) = match leaf_token(input) {
238                Ok((rest, tt)) => (rest, tt),
239                Err(Reject) => return Err(lex_error(input)),
240            };
241            tt.set_span(crate::Span::_new_fallback(Span {
242                #[cfg(span_locations)]
243                lo,
244                #[cfg(span_locations)]
245                hi: rest.off,
246            }));
247            trees.push_token_from_parser(tt);
248            input = rest;
249        }
250    }
251}
252
253fn lex_error(cursor: Cursor) -> LexError {
254    #[cfg(not(span_locations))]
255    let _ = cursor;
256    LexError {
257        span: Span {
258            #[cfg(span_locations)]
259            lo: cursor.off,
260            #[cfg(span_locations)]
261            hi: cursor.off,
262        },
263    }
264}
265
266fn leaf_token(input: Cursor) -> PResult<TokenTree> {
267    if let Ok((input, l)) = literal(input) {
268        // must be parsed before ident
269        Ok((input, TokenTree::Literal(crate::Literal::_new_fallback(l))))
270    } else if let Ok((input, p)) = punct(input) {
271        Ok((input, TokenTree::Punct(p)))
272    } else if let Ok((input, i)) = ident(input) {
273        Ok((input, TokenTree::Ident(i)))
274    } else if input.starts_with(ERROR) {
275        let rest = input.advance(ERROR.len());
276        let repr = crate::Literal::_new_fallback(Literal::_new(ERROR.to_owned()));
277        Ok((rest, TokenTree::Literal(repr)))
278    } else {
279        Err(Reject)
280    }
281}
282
283fn ident(input: Cursor) -> PResult<crate::Ident> {
284    if [
285        "r\"", "r#\"", "r##", "b\"", "b\'", "br\"", "br#", "c\"", "cr\"", "cr#",
286    ]
287    .iter()
288    .any(|prefix| input.starts_with(prefix))
289    {
290        Err(Reject)
291    } else {
292        ident_any(input)
293    }
294}
295
296fn ident_any(input: Cursor) -> PResult<crate::Ident> {
297    let raw = input.starts_with("r#");
298    let rest = input.advance((raw as usize) << 1);
299
300    let (rest, sym) = ident_not_raw(rest)?;
301
302    if !raw {
303        let ident =
304            crate::Ident::_new_fallback(Ident::new_unchecked(sym, fallback::Span::call_site()));
305        return Ok((rest, ident));
306    }
307
308    match sym {
309        "_" | "super" | "self" | "Self" | "crate" => return Err(Reject),
310        _ => {}
311    }
312
313    let ident =
314        crate::Ident::_new_fallback(Ident::new_raw_unchecked(sym, fallback::Span::call_site()));
315    Ok((rest, ident))
316}
317
318fn ident_not_raw(input: Cursor) -> PResult<&str> {
319    let mut chars = input.char_indices();
320
321    match chars.next() {
322        Some((_, ch)) if is_ident_start(ch) => {}
323        _ => return Err(Reject),
324    }
325
326    let mut end = input.len();
327    for (i, ch) in chars {
328        if !is_ident_continue(ch) {
329            end = i;
330            break;
331        }
332    }
333
334    Ok((input.advance(end), &input.rest[..end]))
335}
336
337pub(crate) fn literal(input: Cursor) -> PResult<Literal> {
338    let rest = literal_nocapture(input)?;
339    let end = input.len() - rest.len();
340    Ok((rest, Literal::_new(input.rest[..end].to_string())))
341}
342
343fn literal_nocapture(input: Cursor) -> Result<Cursor, Reject> {
344    if let Ok(ok) = string(input) {
345        Ok(ok)
346    } else if let Ok(ok) = byte_string(input) {
347        Ok(ok)
348    } else if let Ok(ok) = c_string(input) {
349        Ok(ok)
350    } else if let Ok(ok) = byte(input) {
351        Ok(ok)
352    } else if let Ok(ok) = character(input) {
353        Ok(ok)
354    } else if let Ok(ok) = float(input) {
355        Ok(ok)
356    } else if let Ok(ok) = int(input) {
357        Ok(ok)
358    } else {
359        Err(Reject)
360    }
361}
362
363fn literal_suffix(input: Cursor) -> Cursor {
364    match ident_not_raw(input) {
365        Ok((input, _)) => input,
366        Err(Reject) => input,
367    }
368}
369
370fn string(input: Cursor) -> Result<Cursor, Reject> {
371    if let Ok(input) = input.parse("\"") {
372        cooked_string(input)
373    } else if let Ok(input) = input.parse("r") {
374        raw_string(input)
375    } else {
376        Err(Reject)
377    }
378}
379
380fn cooked_string(mut input: Cursor) -> Result<Cursor, Reject> {
381    let mut chars = input.char_indices();
382
383    while let Some((i, ch)) = chars.next() {
384        match ch {
385            '"' => {
386                let input = input.advance(i + 1);
387                return Ok(literal_suffix(input));
388            }
389            '\r' => match chars.next() {
390                Some((_, '\n')) => {}
391                _ => break,
392            },
393            '\\' => match chars.next() {
394                Some((_, 'x')) => {
395                    backslash_x_char(&mut chars)?;
396                }
397                Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0')) => {}
398                Some((_, 'u')) => {
399                    backslash_u(&mut chars)?;
400                }
401                Some((newline, ch @ ('\n' | '\r'))) => {
402                    input = input.advance(newline + 1);
403                    trailing_backslash(&mut input, ch as u8)?;
404                    chars = input.char_indices();
405                }
406                _ => break,
407            },
408            _ch => {}
409        }
410    }
411    Err(Reject)
412}
413
414fn raw_string(input: Cursor) -> Result<Cursor, Reject> {
415    let (input, delimiter) = delimiter_of_raw_string(input)?;
416    let mut bytes = input.bytes().enumerate();
417    while let Some((i, byte)) = bytes.next() {
418        match byte {
419            b'"' if input.rest[i + 1..].starts_with(delimiter) => {
420                let rest = input.advance(i + 1 + delimiter.len());
421                return Ok(literal_suffix(rest));
422            }
423            b'\r' => match bytes.next() {
424                Some((_, b'\n')) => {}
425                _ => break,
426            },
427            _ => {}
428        }
429    }
430    Err(Reject)
431}
432
433fn byte_string(input: Cursor) -> Result<Cursor, Reject> {
434    if let Ok(input) = input.parse("b\"") {
435        cooked_byte_string(input)
436    } else if let Ok(input) = input.parse("br") {
437        raw_byte_string(input)
438    } else {
439        Err(Reject)
440    }
441}
442
443fn cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject> {
444    let mut bytes = input.bytes().enumerate();
445    while let Some((offset, b)) = bytes.next() {
446        match b {
447            b'"' => {
448                let input = input.advance(offset + 1);
449                return Ok(literal_suffix(input));
450            }
451            b'\r' => match bytes.next() {
452                Some((_, b'\n')) => {}
453                _ => break,
454            },
455            b'\\' => match bytes.next() {
456                Some((_, b'x')) => {
457                    backslash_x_byte(&mut bytes)?;
458                }
459                Some((_, b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"')) => {}
460                Some((newline, b @ (b'\n' | b'\r'))) => {
461                    input = input.advance(newline + 1);
462                    trailing_backslash(&mut input, b)?;
463                    bytes = input.bytes().enumerate();
464                }
465                _ => break,
466            },
467            b if b.is_ascii() => {}
468            _ => break,
469        }
470    }
471    Err(Reject)
472}
473
474fn delimiter_of_raw_string(input: Cursor) -> PResult<&str> {
475    for (i, byte) in input.bytes().enumerate() {
476        match byte {
477            b'"' => {
478                if i > 255 {
479                    // https://github.com/rust-lang/rust/pull/95251
480                    return Err(Reject);
481                }
482                return Ok((input.advance(i + 1), &input.rest[..i]));
483            }
484            b'#' => {}
485            _ => break,
486        }
487    }
488    Err(Reject)
489}
490
491fn raw_byte_string(input: Cursor) -> Result<Cursor, Reject> {
492    let (input, delimiter) = delimiter_of_raw_string(input)?;
493    let mut bytes = input.bytes().enumerate();
494    while let Some((i, byte)) = bytes.next() {
495        match byte {
496            b'"' if input.rest[i + 1..].starts_with(delimiter) => {
497                let rest = input.advance(i + 1 + delimiter.len());
498                return Ok(literal_suffix(rest));
499            }
500            b'\r' => match bytes.next() {
501                Some((_, b'\n')) => {}
502                _ => break,
503            },
504            other => {
505                if !other.is_ascii() {
506                    break;
507                }
508            }
509        }
510    }
511    Err(Reject)
512}
513
514fn c_string(input: Cursor) -> Result<Cursor, Reject> {
515    if let Ok(input) = input.parse("c\"") {
516        cooked_c_string(input)
517    } else if let Ok(input) = input.parse("cr") {
518        raw_c_string(input)
519    } else {
520        Err(Reject)
521    }
522}
523
524fn raw_c_string(input: Cursor) -> Result<Cursor, Reject> {
525    let (input, delimiter) = delimiter_of_raw_string(input)?;
526    let mut bytes = input.bytes().enumerate();
527    while let Some((i, byte)) = bytes.next() {
528        match byte {
529            b'"' if input.rest[i + 1..].starts_with(delimiter) => {
530                let rest = input.advance(i + 1 + delimiter.len());
531                return Ok(literal_suffix(rest));
532            }
533            b'\r' => match bytes.next() {
534                Some((_, b'\n')) => {}
535                _ => break,
536            },
537            b'\0' => break,
538            _ => {}
539        }
540    }
541    Err(Reject)
542}
543
544fn cooked_c_string(mut input: Cursor) -> Result<Cursor, Reject> {
545    let mut chars = input.char_indices();
546
547    while let Some((i, ch)) = chars.next() {
548        match ch {
549            '"' => {
550                let input = input.advance(i + 1);
551                return Ok(literal_suffix(input));
552            }
553            '\r' => match chars.next() {
554                Some((_, '\n')) => {}
555                _ => break,
556            },
557            '\\' => match chars.next() {
558                Some((_, 'x')) => {
559                    backslash_x_nonzero(&mut chars)?;
560                }
561                Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"')) => {}
562                Some((_, 'u')) => {
563                    if backslash_u(&mut chars)? == '\0' {
564                        break;
565                    }
566                }
567                Some((newline, ch @ ('\n' | '\r'))) => {
568                    input = input.advance(newline + 1);
569                    trailing_backslash(&mut input, ch as u8)?;
570                    chars = input.char_indices();
571                }
572                _ => break,
573            },
574            '\0' => break,
575            _ch => {}
576        }
577    }
578    Err(Reject)
579}
580
581fn byte(input: Cursor) -> Result<Cursor, Reject> {
582    let input = input.parse("b'")?;
583    let mut bytes = input.bytes().enumerate();
584    let ok = match bytes.next().map(|(_, b)| b) {
585        Some(b'\\') => match bytes.next().map(|(_, b)| b) {
586            Some(b'x') => backslash_x_byte(&mut bytes).is_ok(),
587            Some(b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"') => true,
588            _ => false,
589        },
590        b => b.is_some(),
591    };
592    if !ok {
593        return Err(Reject);
594    }
595    let (offset, _) = bytes.next().ok_or(Reject)?;
596    if !input.chars().as_str().is_char_boundary(offset) {
597        return Err(Reject);
598    }
599    let input = input.advance(offset).parse("'")?;
600    Ok(literal_suffix(input))
601}
602
603fn character(input: Cursor) -> Result<Cursor, Reject> {
604    let input = input.parse("'")?;
605    let mut chars = input.char_indices();
606    let ok = match chars.next().map(|(_, ch)| ch) {
607        Some('\\') => match chars.next().map(|(_, ch)| ch) {
608            Some('x') => backslash_x_char(&mut chars).is_ok(),
609            Some('u') => backslash_u(&mut chars).is_ok(),
610            Some('n' | 'r' | 't' | '\\' | '0' | '\'' | '"') => true,
611            _ => false,
612        },
613        ch => ch.is_some(),
614    };
615    if !ok {
616        return Err(Reject);
617    }
618    let (idx, _) = chars.next().ok_or(Reject)?;
619    let input = input.advance(idx).parse("'")?;
620    Ok(literal_suffix(input))
621}
622
623macro_rules! next_ch {
624    ($chars:ident @ $pat:pat) => {
625        match $chars.next() {
626            Some((_, ch)) => match ch {
627                $pat => ch,
628                _ => return Err(Reject),
629            },
630            None => return Err(Reject),
631        }
632    };
633}
634
635fn backslash_x_char<I>(chars: &mut I) -> Result<(), Reject>
636where
637    I: Iterator<Item = (usize, char)>,
638{
639    next_ch!(chars @ '0'..='7');
640    next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
641    Ok(())
642}
643
644fn backslash_x_byte<I>(chars: &mut I) -> Result<(), Reject>
645where
646    I: Iterator<Item = (usize, u8)>,
647{
648    next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
649    next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
650    Ok(())
651}
652
653fn backslash_x_nonzero<I>(chars: &mut I) -> Result<(), Reject>
654where
655    I: Iterator<Item = (usize, char)>,
656{
657    let first = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
658    let second = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
659    if first == '0' && second == '0' {
660        Err(Reject)
661    } else {
662        Ok(())
663    }
664}
665
666fn backslash_u<I>(chars: &mut I) -> Result<char, Reject>
667where
668    I: Iterator<Item = (usize, char)>,
669{
670    next_ch!(chars @ '{');
671    let mut value = 0;
672    let mut len = 0;
673    for (_, ch) in chars {
674        let digit = match ch {
675            '0'..='9' => ch as u8 - b'0',
676            'a'..='f' => 10 + ch as u8 - b'a',
677            'A'..='F' => 10 + ch as u8 - b'A',
678            '_' if len > 0 => continue,
679            '}' if len > 0 => return char::from_u32(value).ok_or(Reject),
680            _ => break,
681        };
682        if len == 6 {
683            break;
684        }
685        value *= 0x10;
686        value += u32::from(digit);
687        len += 1;
688    }
689    Err(Reject)
690}
691
692fn trailing_backslash(input: &mut Cursor, mut last: u8) -> Result<(), Reject> {
693    let mut whitespace = input.bytes().enumerate();
694    loop {
695        if last == b'\r' && whitespace.next().map_or(true, |(_, b)| b != b'\n') {
696            return Err(Reject);
697        }
698        match whitespace.next() {
699            Some((_, b @ (b' ' | b'\t' | b'\n' | b'\r'))) => {
700                last = b;
701            }
702            Some((offset, _)) => {
703                *input = input.advance(offset);
704                return Ok(());
705            }
706            None => return Err(Reject),
707        }
708    }
709}
710
711fn float(input: Cursor) -> Result<Cursor, Reject> {
712    let mut rest = float_digits(input)?;
713    if let Some(ch) = rest.chars().next() {
714        if is_ident_start(ch) {
715            rest = ident_not_raw(rest)?.0;
716        }
717    }
718    word_break(rest)
719}
720
721fn float_digits(input: Cursor) -> Result<Cursor, Reject> {
722    let mut chars = input.chars().peekable();
723    match chars.next() {
724        Some(ch) if '0' <= ch && ch <= '9' => {}
725        _ => return Err(Reject),
726    }
727
728    let mut len = 1;
729    let mut has_dot = false;
730    let mut has_exp = false;
731    while let Some(&ch) = chars.peek() {
732        match ch {
733            '0'..='9' | '_' => {
734                chars.next();
735                len += 1;
736            }
737            '.' => {
738                if has_dot {
739                    break;
740                }
741                chars.next();
742                if chars
743                    .peek()
744                    .map_or(false, |&ch| ch == '.' || is_ident_start(ch))
745                {
746                    return Err(Reject);
747                }
748                len += 1;
749                has_dot = true;
750            }
751            'e' | 'E' => {
752                chars.next();
753                len += 1;
754                has_exp = true;
755                break;
756            }
757            _ => break,
758        }
759    }
760
761    if !(has_dot || has_exp) {
762        return Err(Reject);
763    }
764
765    if has_exp {
766        let token_before_exp = if has_dot {
767            Ok(input.advance(len - 1))
768        } else {
769            Err(Reject)
770        };
771        let mut has_sign = false;
772        let mut has_exp_value = false;
773        while let Some(&ch) = chars.peek() {
774            match ch {
775                '+' | '-' => {
776                    if has_exp_value {
777                        break;
778                    }
779                    if has_sign {
780                        return token_before_exp;
781                    }
782                    chars.next();
783                    len += 1;
784                    has_sign = true;
785                }
786                '0'..='9' => {
787                    chars.next();
788                    len += 1;
789                    has_exp_value = true;
790                }
791                '_' => {
792                    chars.next();
793                    len += 1;
794                }
795                _ => break,
796            }
797        }
798        if !has_exp_value {
799            return token_before_exp;
800        }
801    }
802
803    Ok(input.advance(len))
804}
805
806fn int(input: Cursor) -> Result<Cursor, Reject> {
807    let mut rest = digits(input)?;
808    if let Some(ch) = rest.chars().next() {
809        if is_ident_start(ch) {
810            rest = ident_not_raw(rest)?.0;
811        }
812    }
813    word_break(rest)
814}
815
816fn digits(mut input: Cursor) -> Result<Cursor, Reject> {
817    let base = if input.starts_with("0x") {
818        input = input.advance(2);
819        16
820    } else if input.starts_with("0o") {
821        input = input.advance(2);
822        8
823    } else if input.starts_with("0b") {
824        input = input.advance(2);
825        2
826    } else {
827        10
828    };
829
830    let mut len = 0;
831    let mut empty = true;
832    for b in input.bytes() {
833        match b {
834            b'0'..=b'9' => {
835                let digit = (b - b'0') as u64;
836                if digit >= base {
837                    return Err(Reject);
838                }
839            }
840            b'a'..=b'f' => {
841                let digit = 10 + (b - b'a') as u64;
842                if digit >= base {
843                    break;
844                }
845            }
846            b'A'..=b'F' => {
847                let digit = 10 + (b - b'A') as u64;
848                if digit >= base {
849                    break;
850                }
851            }
852            b'_' => {
853                if empty && base == 10 {
854                    return Err(Reject);
855                }
856                len += 1;
857                continue;
858            }
859            _ => break,
860        };
861        len += 1;
862        empty = false;
863    }
864    if empty {
865        Err(Reject)
866    } else {
867        Ok(input.advance(len))
868    }
869}
870
871fn punct(input: Cursor) -> PResult<Punct> {
872    let (rest, ch) = punct_char(input)?;
873    if ch == '\'' {
874        if ident_any(rest)?.0.starts_with_char('\'') {
875            Err(Reject)
876        } else {
877            Ok((rest, Punct::new('\'', Spacing::Joint)))
878        }
879    } else {
880        let kind = match punct_char(rest) {
881            Ok(_) => Spacing::Joint,
882            Err(Reject) => Spacing::Alone,
883        };
884        Ok((rest, Punct::new(ch, kind)))
885    }
886}
887
888fn punct_char(input: Cursor) -> PResult<char> {
889    if input.starts_with("//") || input.starts_with("/*") {
890        // Do not accept `/` of a comment as a punct.
891        return Err(Reject);
892    }
893
894    let mut chars = input.chars();
895    let first = match chars.next() {
896        Some(ch) => ch,
897        None => {
898            return Err(Reject);
899        }
900    };
901    let recognized = "~!@#$%^&*-=+|;:,<.>/?'";
902    if recognized.contains(first) {
903        Ok((input.advance(first.len_utf8()), first))
904    } else {
905        Err(Reject)
906    }
907}
908
909fn doc_comment<'a>(input: Cursor<'a>, trees: &mut TokenStreamBuilder) -> PResult<'a, ()> {
910    #[cfg(span_locations)]
911    let lo = input.off;
912    let (rest, (comment, inner)) = doc_comment_contents(input)?;
913    let fallback_span = Span {
914        #[cfg(span_locations)]
915        lo,
916        #[cfg(span_locations)]
917        hi: rest.off,
918    };
919    let span = crate::Span::_new_fallback(fallback_span);
920
921    let mut scan_for_bare_cr = comment;
922    while let Some(cr) = scan_for_bare_cr.find('\r') {
923        let rest = &scan_for_bare_cr[cr + 1..];
924        if !rest.starts_with('\n') {
925            return Err(Reject);
926        }
927        scan_for_bare_cr = rest;
928    }
929
930    let mut pound = Punct::new('#', Spacing::Alone);
931    pound.set_span(span);
932    trees.push_token_from_parser(TokenTree::Punct(pound));
933
934    if inner {
935        let mut bang = Punct::new('!', Spacing::Alone);
936        bang.set_span(span);
937        trees.push_token_from_parser(TokenTree::Punct(bang));
938    }
939
940    let doc_ident = crate::Ident::_new_fallback(Ident::new_unchecked("doc", fallback_span));
941    let mut equal = Punct::new('=', Spacing::Alone);
942    equal.set_span(span);
943    let mut literal = crate::Literal::_new_fallback(Literal::string(comment));
944    literal.set_span(span);
945    let mut bracketed = TokenStreamBuilder::with_capacity(3);
946    bracketed.push_token_from_parser(TokenTree::Ident(doc_ident));
947    bracketed.push_token_from_parser(TokenTree::Punct(equal));
948    bracketed.push_token_from_parser(TokenTree::Literal(literal));
949    let group = Group::new(Delimiter::Bracket, bracketed.build());
950    let mut group = crate::Group::_new_fallback(group);
951    group.set_span(span);
952    trees.push_token_from_parser(TokenTree::Group(group));
953
954    Ok((rest, ()))
955}
956
957fn doc_comment_contents(input: Cursor) -> PResult<(&str, bool)> {
958    if input.starts_with("//!") {
959        let input = input.advance(3);
960        let (input, s) = take_until_newline_or_eof(input);
961        Ok((input, (s, true)))
962    } else if input.starts_with("/*!") {
963        let (input, s) = block_comment(input)?;
964        Ok((input, (&s[3..s.len() - 2], true)))
965    } else if input.starts_with("///") {
966        let input = input.advance(3);
967        if input.starts_with_char('/') {
968            return Err(Reject);
969        }
970        let (input, s) = take_until_newline_or_eof(input);
971        Ok((input, (s, false)))
972    } else if input.starts_with("/**") && !input.rest[3..].starts_with('*') {
973        let (input, s) = block_comment(input)?;
974        Ok((input, (&s[3..s.len() - 2], false)))
975    } else {
976        Err(Reject)
977    }
978}
979
980fn take_until_newline_or_eof(input: Cursor) -> (Cursor, &str) {
981    let chars = input.char_indices();
982
983    for (i, ch) in chars {
984        if ch == '\n' {
985            return (input.advance(i), &input.rest[..i]);
986        } else if ch == '\r' && input.rest[i + 1..].starts_with('\n') {
987            return (input.advance(i + 1), &input.rest[..i]);
988        }
989    }
990
991    (input.advance(input.len()), input.rest)
992}