1use crate::fallback::{
2 self, is_ident_continue, is_ident_start, Group, Ident, LexError, Literal, Span, TokenStream,
3 TokenStreamBuilder,
4};
5use crate::{Delimiter, Punct, Spacing, TokenTree};
6use core::char;
7use core::str::{Bytes, CharIndices, Chars};
8
9#[derive(Copy, Clone, Eq, PartialEq)]
10pub(crate) struct Cursor<'a> {
11 pub(crate) rest: &'a str,
12 #[cfg(span_locations)]
13 pub(crate) off: u32,
14}
15
16impl<'a> Cursor<'a> {
17 pub(crate) fn advance(&self, bytes: usize) -> Cursor<'a> {
18 let (_front, rest) = self.rest.split_at(bytes);
19 Cursor {
20 rest,
21 #[cfg(span_locations)]
22 off: self.off + _front.chars().count() as u32,
23 }
24 }
25
26 pub(crate) fn starts_with(&self, s: &str) -> bool {
27 self.rest.starts_with(s)
28 }
29
30 pub(crate) fn starts_with_char(&self, ch: char) -> bool {
31 self.rest.starts_with(ch)
32 }
33
34 pub(crate) fn starts_with_fn<Pattern>(&self, f: Pattern) -> bool
35 where
36 Pattern: FnMut(char) -> bool,
37 {
38 self.rest.starts_with(f)
39 }
40
41 pub(crate) fn is_empty(&self) -> bool {
42 self.rest.is_empty()
43 }
44
45 fn len(&self) -> usize {
46 self.rest.len()
47 }
48
49 fn as_bytes(&self) -> &'a [u8] {
50 self.rest.as_bytes()
51 }
52
53 fn bytes(&self) -> Bytes<'a> {
54 self.rest.bytes()
55 }
56
57 fn chars(&self) -> Chars<'a> {
58 self.rest.chars()
59 }
60
61 fn char_indices(&self) -> CharIndices<'a> {
62 self.rest.char_indices()
63 }
64
65 fn parse(&self, tag: &str) -> Result<Cursor<'a>, Reject> {
66 if self.starts_with(tag) {
67 Ok(self.advance(tag.len()))
68 } else {
69 Err(Reject)
70 }
71 }
72}
73
74pub(crate) struct Reject;
75type PResult<'a, O> = Result<(Cursor<'a>, O), Reject>;
76
77fn skip_whitespace(input: Cursor) -> Cursor {
78 let mut s = input;
79
80 while !s.is_empty() {
81 let byte = s.as_bytes()[0];
82 if byte == b'/' {
83 if s.starts_with("//")
84 && (!s.starts_with("///") || s.starts_with("////"))
85 && !s.starts_with("//!")
86 {
87 let (cursor, _) = take_until_newline_or_eof(s);
88 s = cursor;
89 continue;
90 } else if s.starts_with("/**/") {
91 s = s.advance(4);
92 continue;
93 } else if s.starts_with("/*")
94 && (!s.starts_with("/**") || s.starts_with("/***"))
95 && !s.starts_with("/*!")
96 {
97 match block_comment(s) {
98 Ok((rest, _)) => {
99 s = rest;
100 continue;
101 }
102 Err(Reject) => return s,
103 }
104 }
105 }
106 match byte {
107 b' ' | 0x09..=0x0d => {
108 s = s.advance(1);
109 continue;
110 }
111 b if b.is_ascii() => {}
112 _ => {
113 let ch = s.chars().next().unwrap();
114 if is_whitespace(ch) {
115 s = s.advance(ch.len_utf8());
116 continue;
117 }
118 }
119 }
120 return s;
121 }
122 s
123}
124
125fn block_comment(input: Cursor) -> PResult<&str> {
126 if !input.starts_with("/*") {
127 return Err(Reject);
128 }
129
130 let mut depth = 0usize;
131 let bytes = input.as_bytes();
132 let mut i = 0usize;
133 let upper = bytes.len() - 1;
134
135 while i < upper {
136 if bytes[i] == b'/' && bytes[i + 1] == b'*' {
137 depth += 1;
138 i += 1; } else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
140 depth -= 1;
141 if depth == 0 {
142 return Ok((input.advance(i + 2), &input.rest[..i + 2]));
143 }
144 i += 1; }
146 i += 1;
147 }
148
149 Err(Reject)
150}
151
152fn is_whitespace(ch: char) -> bool {
153 ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
155}
156
157fn word_break(input: Cursor) -> Result<Cursor, Reject> {
158 match input.chars().next() {
159 Some(ch) if is_ident_continue(ch) => Err(Reject),
160 Some(_) | None => Ok(input),
161 }
162}
163
164const ERROR: &str = "(/*ERROR*/)";
167
168pub(crate) fn token_stream(mut input: Cursor) -> Result<TokenStream, LexError> {
169 let mut trees = TokenStreamBuilder::new();
170 let mut stack = Vec::new();
171
172 loop {
173 input = skip_whitespace(input);
174
175 if let Ok((rest, ())) = doc_comment(input, &mut trees) {
176 input = rest;
177 continue;
178 }
179
180 #[cfg(span_locations)]
181 let lo = input.off;
182
183 let first = match input.bytes().next() {
184 Some(first) => first,
185 None => match stack.last() {
186 None => return Ok(trees.build()),
187 #[cfg(span_locations)]
188 Some((lo, _frame)) => {
189 return Err(LexError {
190 span: Span { lo: *lo, hi: *lo },
191 })
192 }
193 #[cfg(not(span_locations))]
194 Some(_frame) => return Err(LexError { span: Span {} }),
195 },
196 };
197
198 if let Some(open_delimiter) = match first {
199 b'(' if !input.starts_with(ERROR) => Some(Delimiter::Parenthesis),
200 b'[' => Some(Delimiter::Bracket),
201 b'{' => Some(Delimiter::Brace),
202 _ => None,
203 } {
204 input = input.advance(1);
205 let frame = (open_delimiter, trees);
206 #[cfg(span_locations)]
207 let frame = (lo, frame);
208 stack.push(frame);
209 trees = TokenStreamBuilder::new();
210 } else if let Some(close_delimiter) = match first {
211 b')' => Some(Delimiter::Parenthesis),
212 b']' => Some(Delimiter::Bracket),
213 b'}' => Some(Delimiter::Brace),
214 _ => None,
215 } {
216 let frame = match stack.pop() {
217 Some(frame) => frame,
218 None => return Err(lex_error(input)),
219 };
220 #[cfg(span_locations)]
221 let (lo, frame) = frame;
222 let (open_delimiter, outer) = frame;
223 if open_delimiter != close_delimiter {
224 return Err(lex_error(input));
225 }
226 input = input.advance(1);
227 let mut g = Group::new(open_delimiter, trees.build());
228 g.set_span(Span {
229 #[cfg(span_locations)]
230 lo,
231 #[cfg(span_locations)]
232 hi: input.off,
233 });
234 trees = outer;
235 trees.push_token_from_parser(TokenTree::Group(crate::Group::_new_fallback(g)));
236 } else {
237 let (rest, mut tt) = match leaf_token(input) {
238 Ok((rest, tt)) => (rest, tt),
239 Err(Reject) => return Err(lex_error(input)),
240 };
241 tt.set_span(crate::Span::_new_fallback(Span {
242 #[cfg(span_locations)]
243 lo,
244 #[cfg(span_locations)]
245 hi: rest.off,
246 }));
247 trees.push_token_from_parser(tt);
248 input = rest;
249 }
250 }
251}
252
253fn lex_error(cursor: Cursor) -> LexError {
254 #[cfg(not(span_locations))]
255 let _ = cursor;
256 LexError {
257 span: Span {
258 #[cfg(span_locations)]
259 lo: cursor.off,
260 #[cfg(span_locations)]
261 hi: cursor.off,
262 },
263 }
264}
265
266fn leaf_token(input: Cursor) -> PResult<TokenTree> {
267 if let Ok((input, l)) = literal(input) {
268 Ok((input, TokenTree::Literal(crate::Literal::_new_fallback(l))))
270 } else if let Ok((input, p)) = punct(input) {
271 Ok((input, TokenTree::Punct(p)))
272 } else if let Ok((input, i)) = ident(input) {
273 Ok((input, TokenTree::Ident(i)))
274 } else if input.starts_with(ERROR) {
275 let rest = input.advance(ERROR.len());
276 let repr = crate::Literal::_new_fallback(Literal::_new(ERROR.to_owned()));
277 Ok((rest, TokenTree::Literal(repr)))
278 } else {
279 Err(Reject)
280 }
281}
282
283fn ident(input: Cursor) -> PResult<crate::Ident> {
284 if [
285 "r\"", "r#\"", "r##", "b\"", "b\'", "br\"", "br#", "c\"", "cr\"", "cr#",
286 ]
287 .iter()
288 .any(|prefix| input.starts_with(prefix))
289 {
290 Err(Reject)
291 } else {
292 ident_any(input)
293 }
294}
295
296fn ident_any(input: Cursor) -> PResult<crate::Ident> {
297 let raw = input.starts_with("r#");
298 let rest = input.advance((raw as usize) << 1);
299
300 let (rest, sym) = ident_not_raw(rest)?;
301
302 if !raw {
303 let ident =
304 crate::Ident::_new_fallback(Ident::new_unchecked(sym, fallback::Span::call_site()));
305 return Ok((rest, ident));
306 }
307
308 match sym {
309 "_" | "super" | "self" | "Self" | "crate" => return Err(Reject),
310 _ => {}
311 }
312
313 let ident =
314 crate::Ident::_new_fallback(Ident::new_raw_unchecked(sym, fallback::Span::call_site()));
315 Ok((rest, ident))
316}
317
318fn ident_not_raw(input: Cursor) -> PResult<&str> {
319 let mut chars = input.char_indices();
320
321 match chars.next() {
322 Some((_, ch)) if is_ident_start(ch) => {}
323 _ => return Err(Reject),
324 }
325
326 let mut end = input.len();
327 for (i, ch) in chars {
328 if !is_ident_continue(ch) {
329 end = i;
330 break;
331 }
332 }
333
334 Ok((input.advance(end), &input.rest[..end]))
335}
336
337pub(crate) fn literal(input: Cursor) -> PResult<Literal> {
338 let rest = literal_nocapture(input)?;
339 let end = input.len() - rest.len();
340 Ok((rest, Literal::_new(input.rest[..end].to_string())))
341}
342
343fn literal_nocapture(input: Cursor) -> Result<Cursor, Reject> {
344 if let Ok(ok) = string(input) {
345 Ok(ok)
346 } else if let Ok(ok) = byte_string(input) {
347 Ok(ok)
348 } else if let Ok(ok) = c_string(input) {
349 Ok(ok)
350 } else if let Ok(ok) = byte(input) {
351 Ok(ok)
352 } else if let Ok(ok) = character(input) {
353 Ok(ok)
354 } else if let Ok(ok) = float(input) {
355 Ok(ok)
356 } else if let Ok(ok) = int(input) {
357 Ok(ok)
358 } else {
359 Err(Reject)
360 }
361}
362
363fn literal_suffix(input: Cursor) -> Cursor {
364 match ident_not_raw(input) {
365 Ok((input, _)) => input,
366 Err(Reject) => input,
367 }
368}
369
370fn string(input: Cursor) -> Result<Cursor, Reject> {
371 if let Ok(input) = input.parse("\"") {
372 cooked_string(input)
373 } else if let Ok(input) = input.parse("r") {
374 raw_string(input)
375 } else {
376 Err(Reject)
377 }
378}
379
380fn cooked_string(mut input: Cursor) -> Result<Cursor, Reject> {
381 let mut chars = input.char_indices();
382
383 while let Some((i, ch)) = chars.next() {
384 match ch {
385 '"' => {
386 let input = input.advance(i + 1);
387 return Ok(literal_suffix(input));
388 }
389 '\r' => match chars.next() {
390 Some((_, '\n')) => {}
391 _ => break,
392 },
393 '\\' => match chars.next() {
394 Some((_, 'x')) => {
395 backslash_x_char(&mut chars)?;
396 }
397 Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0')) => {}
398 Some((_, 'u')) => {
399 backslash_u(&mut chars)?;
400 }
401 Some((newline, ch @ ('\n' | '\r'))) => {
402 input = input.advance(newline + 1);
403 trailing_backslash(&mut input, ch as u8)?;
404 chars = input.char_indices();
405 }
406 _ => break,
407 },
408 _ch => {}
409 }
410 }
411 Err(Reject)
412}
413
414fn raw_string(input: Cursor) -> Result<Cursor, Reject> {
415 let (input, delimiter) = delimiter_of_raw_string(input)?;
416 let mut bytes = input.bytes().enumerate();
417 while let Some((i, byte)) = bytes.next() {
418 match byte {
419 b'"' if input.rest[i + 1..].starts_with(delimiter) => {
420 let rest = input.advance(i + 1 + delimiter.len());
421 return Ok(literal_suffix(rest));
422 }
423 b'\r' => match bytes.next() {
424 Some((_, b'\n')) => {}
425 _ => break,
426 },
427 _ => {}
428 }
429 }
430 Err(Reject)
431}
432
433fn byte_string(input: Cursor) -> Result<Cursor, Reject> {
434 if let Ok(input) = input.parse("b\"") {
435 cooked_byte_string(input)
436 } else if let Ok(input) = input.parse("br") {
437 raw_byte_string(input)
438 } else {
439 Err(Reject)
440 }
441}
442
443fn cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject> {
444 let mut bytes = input.bytes().enumerate();
445 while let Some((offset, b)) = bytes.next() {
446 match b {
447 b'"' => {
448 let input = input.advance(offset + 1);
449 return Ok(literal_suffix(input));
450 }
451 b'\r' => match bytes.next() {
452 Some((_, b'\n')) => {}
453 _ => break,
454 },
455 b'\\' => match bytes.next() {
456 Some((_, b'x')) => {
457 backslash_x_byte(&mut bytes)?;
458 }
459 Some((_, b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"')) => {}
460 Some((newline, b @ (b'\n' | b'\r'))) => {
461 input = input.advance(newline + 1);
462 trailing_backslash(&mut input, b)?;
463 bytes = input.bytes().enumerate();
464 }
465 _ => break,
466 },
467 b if b.is_ascii() => {}
468 _ => break,
469 }
470 }
471 Err(Reject)
472}
473
474fn delimiter_of_raw_string(input: Cursor) -> PResult<&str> {
475 for (i, byte) in input.bytes().enumerate() {
476 match byte {
477 b'"' => {
478 if i > 255 {
479 return Err(Reject);
481 }
482 return Ok((input.advance(i + 1), &input.rest[..i]));
483 }
484 b'#' => {}
485 _ => break,
486 }
487 }
488 Err(Reject)
489}
490
491fn raw_byte_string(input: Cursor) -> Result<Cursor, Reject> {
492 let (input, delimiter) = delimiter_of_raw_string(input)?;
493 let mut bytes = input.bytes().enumerate();
494 while let Some((i, byte)) = bytes.next() {
495 match byte {
496 b'"' if input.rest[i + 1..].starts_with(delimiter) => {
497 let rest = input.advance(i + 1 + delimiter.len());
498 return Ok(literal_suffix(rest));
499 }
500 b'\r' => match bytes.next() {
501 Some((_, b'\n')) => {}
502 _ => break,
503 },
504 other => {
505 if !other.is_ascii() {
506 break;
507 }
508 }
509 }
510 }
511 Err(Reject)
512}
513
514fn c_string(input: Cursor) -> Result<Cursor, Reject> {
515 if let Ok(input) = input.parse("c\"") {
516 cooked_c_string(input)
517 } else if let Ok(input) = input.parse("cr") {
518 raw_c_string(input)
519 } else {
520 Err(Reject)
521 }
522}
523
524fn raw_c_string(input: Cursor) -> Result<Cursor, Reject> {
525 let (input, delimiter) = delimiter_of_raw_string(input)?;
526 let mut bytes = input.bytes().enumerate();
527 while let Some((i, byte)) = bytes.next() {
528 match byte {
529 b'"' if input.rest[i + 1..].starts_with(delimiter) => {
530 let rest = input.advance(i + 1 + delimiter.len());
531 return Ok(literal_suffix(rest));
532 }
533 b'\r' => match bytes.next() {
534 Some((_, b'\n')) => {}
535 _ => break,
536 },
537 b'\0' => break,
538 _ => {}
539 }
540 }
541 Err(Reject)
542}
543
544fn cooked_c_string(mut input: Cursor) -> Result<Cursor, Reject> {
545 let mut chars = input.char_indices();
546
547 while let Some((i, ch)) = chars.next() {
548 match ch {
549 '"' => {
550 let input = input.advance(i + 1);
551 return Ok(literal_suffix(input));
552 }
553 '\r' => match chars.next() {
554 Some((_, '\n')) => {}
555 _ => break,
556 },
557 '\\' => match chars.next() {
558 Some((_, 'x')) => {
559 backslash_x_nonzero(&mut chars)?;
560 }
561 Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"')) => {}
562 Some((_, 'u')) => {
563 if backslash_u(&mut chars)? == '\0' {
564 break;
565 }
566 }
567 Some((newline, ch @ ('\n' | '\r'))) => {
568 input = input.advance(newline + 1);
569 trailing_backslash(&mut input, ch as u8)?;
570 chars = input.char_indices();
571 }
572 _ => break,
573 },
574 '\0' => break,
575 _ch => {}
576 }
577 }
578 Err(Reject)
579}
580
581fn byte(input: Cursor) -> Result<Cursor, Reject> {
582 let input = input.parse("b'")?;
583 let mut bytes = input.bytes().enumerate();
584 let ok = match bytes.next().map(|(_, b)| b) {
585 Some(b'\\') => match bytes.next().map(|(_, b)| b) {
586 Some(b'x') => backslash_x_byte(&mut bytes).is_ok(),
587 Some(b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"') => true,
588 _ => false,
589 },
590 b => b.is_some(),
591 };
592 if !ok {
593 return Err(Reject);
594 }
595 let (offset, _) = bytes.next().ok_or(Reject)?;
596 if !input.chars().as_str().is_char_boundary(offset) {
597 return Err(Reject);
598 }
599 let input = input.advance(offset).parse("'")?;
600 Ok(literal_suffix(input))
601}
602
603fn character(input: Cursor) -> Result<Cursor, Reject> {
604 let input = input.parse("'")?;
605 let mut chars = input.char_indices();
606 let ok = match chars.next().map(|(_, ch)| ch) {
607 Some('\\') => match chars.next().map(|(_, ch)| ch) {
608 Some('x') => backslash_x_char(&mut chars).is_ok(),
609 Some('u') => backslash_u(&mut chars).is_ok(),
610 Some('n' | 'r' | 't' | '\\' | '0' | '\'' | '"') => true,
611 _ => false,
612 },
613 ch => ch.is_some(),
614 };
615 if !ok {
616 return Err(Reject);
617 }
618 let (idx, _) = chars.next().ok_or(Reject)?;
619 let input = input.advance(idx).parse("'")?;
620 Ok(literal_suffix(input))
621}
622
623macro_rules! next_ch {
624 ($chars:ident @ $pat:pat) => {
625 match $chars.next() {
626 Some((_, ch)) => match ch {
627 $pat => ch,
628 _ => return Err(Reject),
629 },
630 None => return Err(Reject),
631 }
632 };
633}
634
635fn backslash_x_char<I>(chars: &mut I) -> Result<(), Reject>
636where
637 I: Iterator<Item = (usize, char)>,
638{
639 next_ch!(chars @ '0'..='7');
640 next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
641 Ok(())
642}
643
644fn backslash_x_byte<I>(chars: &mut I) -> Result<(), Reject>
645where
646 I: Iterator<Item = (usize, u8)>,
647{
648 next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
649 next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
650 Ok(())
651}
652
653fn backslash_x_nonzero<I>(chars: &mut I) -> Result<(), Reject>
654where
655 I: Iterator<Item = (usize, char)>,
656{
657 let first = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
658 let second = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
659 if first == '0' && second == '0' {
660 Err(Reject)
661 } else {
662 Ok(())
663 }
664}
665
666fn backslash_u<I>(chars: &mut I) -> Result<char, Reject>
667where
668 I: Iterator<Item = (usize, char)>,
669{
670 next_ch!(chars @ '{');
671 let mut value = 0;
672 let mut len = 0;
673 for (_, ch) in chars {
674 let digit = match ch {
675 '0'..='9' => ch as u8 - b'0',
676 'a'..='f' => 10 + ch as u8 - b'a',
677 'A'..='F' => 10 + ch as u8 - b'A',
678 '_' if len > 0 => continue,
679 '}' if len > 0 => return char::from_u32(value).ok_or(Reject),
680 _ => break,
681 };
682 if len == 6 {
683 break;
684 }
685 value *= 0x10;
686 value += u32::from(digit);
687 len += 1;
688 }
689 Err(Reject)
690}
691
692fn trailing_backslash(input: &mut Cursor, mut last: u8) -> Result<(), Reject> {
693 let mut whitespace = input.bytes().enumerate();
694 loop {
695 if last == b'\r' && whitespace.next().map_or(true, |(_, b)| b != b'\n') {
696 return Err(Reject);
697 }
698 match whitespace.next() {
699 Some((_, b @ (b' ' | b'\t' | b'\n' | b'\r'))) => {
700 last = b;
701 }
702 Some((offset, _)) => {
703 *input = input.advance(offset);
704 return Ok(());
705 }
706 None => return Err(Reject),
707 }
708 }
709}
710
711fn float(input: Cursor) -> Result<Cursor, Reject> {
712 let mut rest = float_digits(input)?;
713 if let Some(ch) = rest.chars().next() {
714 if is_ident_start(ch) {
715 rest = ident_not_raw(rest)?.0;
716 }
717 }
718 word_break(rest)
719}
720
721fn float_digits(input: Cursor) -> Result<Cursor, Reject> {
722 let mut chars = input.chars().peekable();
723 match chars.next() {
724 Some(ch) if '0' <= ch && ch <= '9' => {}
725 _ => return Err(Reject),
726 }
727
728 let mut len = 1;
729 let mut has_dot = false;
730 let mut has_exp = false;
731 while let Some(&ch) = chars.peek() {
732 match ch {
733 '0'..='9' | '_' => {
734 chars.next();
735 len += 1;
736 }
737 '.' => {
738 if has_dot {
739 break;
740 }
741 chars.next();
742 if chars
743 .peek()
744 .map_or(false, |&ch| ch == '.' || is_ident_start(ch))
745 {
746 return Err(Reject);
747 }
748 len += 1;
749 has_dot = true;
750 }
751 'e' | 'E' => {
752 chars.next();
753 len += 1;
754 has_exp = true;
755 break;
756 }
757 _ => break,
758 }
759 }
760
761 if !(has_dot || has_exp) {
762 return Err(Reject);
763 }
764
765 if has_exp {
766 let token_before_exp = if has_dot {
767 Ok(input.advance(len - 1))
768 } else {
769 Err(Reject)
770 };
771 let mut has_sign = false;
772 let mut has_exp_value = false;
773 while let Some(&ch) = chars.peek() {
774 match ch {
775 '+' | '-' => {
776 if has_exp_value {
777 break;
778 }
779 if has_sign {
780 return token_before_exp;
781 }
782 chars.next();
783 len += 1;
784 has_sign = true;
785 }
786 '0'..='9' => {
787 chars.next();
788 len += 1;
789 has_exp_value = true;
790 }
791 '_' => {
792 chars.next();
793 len += 1;
794 }
795 _ => break,
796 }
797 }
798 if !has_exp_value {
799 return token_before_exp;
800 }
801 }
802
803 Ok(input.advance(len))
804}
805
806fn int(input: Cursor) -> Result<Cursor, Reject> {
807 let mut rest = digits(input)?;
808 if let Some(ch) = rest.chars().next() {
809 if is_ident_start(ch) {
810 rest = ident_not_raw(rest)?.0;
811 }
812 }
813 word_break(rest)
814}
815
816fn digits(mut input: Cursor) -> Result<Cursor, Reject> {
817 let base = if input.starts_with("0x") {
818 input = input.advance(2);
819 16
820 } else if input.starts_with("0o") {
821 input = input.advance(2);
822 8
823 } else if input.starts_with("0b") {
824 input = input.advance(2);
825 2
826 } else {
827 10
828 };
829
830 let mut len = 0;
831 let mut empty = true;
832 for b in input.bytes() {
833 match b {
834 b'0'..=b'9' => {
835 let digit = (b - b'0') as u64;
836 if digit >= base {
837 return Err(Reject);
838 }
839 }
840 b'a'..=b'f' => {
841 let digit = 10 + (b - b'a') as u64;
842 if digit >= base {
843 break;
844 }
845 }
846 b'A'..=b'F' => {
847 let digit = 10 + (b - b'A') as u64;
848 if digit >= base {
849 break;
850 }
851 }
852 b'_' => {
853 if empty && base == 10 {
854 return Err(Reject);
855 }
856 len += 1;
857 continue;
858 }
859 _ => break,
860 };
861 len += 1;
862 empty = false;
863 }
864 if empty {
865 Err(Reject)
866 } else {
867 Ok(input.advance(len))
868 }
869}
870
871fn punct(input: Cursor) -> PResult<Punct> {
872 let (rest, ch) = punct_char(input)?;
873 if ch == '\'' {
874 if ident_any(rest)?.0.starts_with_char('\'') {
875 Err(Reject)
876 } else {
877 Ok((rest, Punct::new('\'', Spacing::Joint)))
878 }
879 } else {
880 let kind = match punct_char(rest) {
881 Ok(_) => Spacing::Joint,
882 Err(Reject) => Spacing::Alone,
883 };
884 Ok((rest, Punct::new(ch, kind)))
885 }
886}
887
888fn punct_char(input: Cursor) -> PResult<char> {
889 if input.starts_with("//") || input.starts_with("/*") {
890 return Err(Reject);
892 }
893
894 let mut chars = input.chars();
895 let first = match chars.next() {
896 Some(ch) => ch,
897 None => {
898 return Err(Reject);
899 }
900 };
901 let recognized = "~!@#$%^&*-=+|;:,<.>/?'";
902 if recognized.contains(first) {
903 Ok((input.advance(first.len_utf8()), first))
904 } else {
905 Err(Reject)
906 }
907}
908
909fn doc_comment<'a>(input: Cursor<'a>, trees: &mut TokenStreamBuilder) -> PResult<'a, ()> {
910 #[cfg(span_locations)]
911 let lo = input.off;
912 let (rest, (comment, inner)) = doc_comment_contents(input)?;
913 let fallback_span = Span {
914 #[cfg(span_locations)]
915 lo,
916 #[cfg(span_locations)]
917 hi: rest.off,
918 };
919 let span = crate::Span::_new_fallback(fallback_span);
920
921 let mut scan_for_bare_cr = comment;
922 while let Some(cr) = scan_for_bare_cr.find('\r') {
923 let rest = &scan_for_bare_cr[cr + 1..];
924 if !rest.starts_with('\n') {
925 return Err(Reject);
926 }
927 scan_for_bare_cr = rest;
928 }
929
930 let mut pound = Punct::new('#', Spacing::Alone);
931 pound.set_span(span);
932 trees.push_token_from_parser(TokenTree::Punct(pound));
933
934 if inner {
935 let mut bang = Punct::new('!', Spacing::Alone);
936 bang.set_span(span);
937 trees.push_token_from_parser(TokenTree::Punct(bang));
938 }
939
940 let doc_ident = crate::Ident::_new_fallback(Ident::new_unchecked("doc", fallback_span));
941 let mut equal = Punct::new('=', Spacing::Alone);
942 equal.set_span(span);
943 let mut literal = crate::Literal::_new_fallback(Literal::string(comment));
944 literal.set_span(span);
945 let mut bracketed = TokenStreamBuilder::with_capacity(3);
946 bracketed.push_token_from_parser(TokenTree::Ident(doc_ident));
947 bracketed.push_token_from_parser(TokenTree::Punct(equal));
948 bracketed.push_token_from_parser(TokenTree::Literal(literal));
949 let group = Group::new(Delimiter::Bracket, bracketed.build());
950 let mut group = crate::Group::_new_fallback(group);
951 group.set_span(span);
952 trees.push_token_from_parser(TokenTree::Group(group));
953
954 Ok((rest, ()))
955}
956
957fn doc_comment_contents(input: Cursor) -> PResult<(&str, bool)> {
958 if input.starts_with("//!") {
959 let input = input.advance(3);
960 let (input, s) = take_until_newline_or_eof(input);
961 Ok((input, (s, true)))
962 } else if input.starts_with("/*!") {
963 let (input, s) = block_comment(input)?;
964 Ok((input, (&s[3..s.len() - 2], true)))
965 } else if input.starts_with("///") {
966 let input = input.advance(3);
967 if input.starts_with_char('/') {
968 return Err(Reject);
969 }
970 let (input, s) = take_until_newline_or_eof(input);
971 Ok((input, (s, false)))
972 } else if input.starts_with("/**") && !input.rest[3..].starts_with('*') {
973 let (input, s) = block_comment(input)?;
974 Ok((input, (&s[3..s.len() - 2], false)))
975 } else {
976 Err(Reject)
977 }
978}
979
980fn take_until_newline_or_eof(input: Cursor) -> (Cursor, &str) {
981 let chars = input.char_indices();
982
983 for (i, ch) in chars {
984 if ch == '\n' {
985 return (input.advance(i), &input.rest[..i]);
986 } else if ch == '\r' && input.rest[i + 1..].starts_with('\n') {
987 return (input.advance(i + 1), &input.rest[..i]);
988 }
989 }
990
991 (input.advance(input.len()), input.rest)
992}