artichoke/
parser.rs

1//! Detect if Ruby code parses successfully.
2//!
3//! The REPL needs to check if code is valid to determine whether it should
4//! enter multiline editing mode.
5
6use std::ffi::{CStr, c_char};
7use std::ptr::NonNull;
8
9use crate::backend::sys;
10use crate::backend::{Artichoke, Error};
11
12#[cfg(feature = "cli")]
13pub(crate) mod repl;
14
15/// State shows whether artichoke can parse some code or why it cannot.
16///
17/// This enum only encapsulates whether artichoke can parse the code. It may
18/// still have syntactic or semantic errors.
19#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
20pub enum State {
21    /// Internal parser error. This is a fatal error.
22    ParseError,
23    /// Code must be fewer than [`isize::MAX`] bytes.
24    CodeTooLong,
25    /// The code has too many end statements.
26    UnexpectedEnd,
27    /// The code has unclosed blocks.
28    UnexpectedProgramEnd,
29    /// The current expression is an unterminated `Regexp`.
30    UnexpectedRegexpBegin,
31    /// The current expression is an unterminated block.
32    UnterminatedBlock,
33    /// The current expression is an unterminated heredoc.
34    UnterminatedHeredoc,
35    /// The current expression is an unterminated `String`.
36    UnterminatedString,
37    /// Code is valid and fit to eval.
38    Valid,
39}
40
41impl State {
42    /// Construct a new, default `State`.
43    #[must_use]
44    pub const fn new() -> Self {
45        Self::Valid
46    }
47
48    /// Whether this variant indicates a code block is open.
49    ///
50    /// This method can be used by a REPL to check whether to buffer code or
51    /// begin a multi-line editing session before attempting to eval the code on
52    /// an interpreter.
53    #[must_use]
54    pub fn is_code_block_open(self) -> bool {
55        !matches!(
56            self,
57            Self::Valid | Self::UnexpectedEnd | Self::UnexpectedRegexpBegin | Self::CodeTooLong
58        )
59    }
60
61    /// Whether this variant is a recoverable error.
62    ///
63    /// Recoverable errors should be handled by resetting the parser and input
64    /// buffer.
65    #[must_use]
66    pub fn is_recoverable_error(self) -> bool {
67        matches!(self, Self::CodeTooLong)
68    }
69
70    /// Whether this variant is a fatal parse error.
71    ///
72    /// Fatal parser states indicate the parser is corrupted and cannot be used
73    /// again.
74    #[must_use]
75    pub fn is_fatal(self) -> bool {
76        matches!(self, Self::ParseError)
77    }
78}
79
80impl Default for State {
81    fn default() -> Self {
82        Self::new()
83    }
84}
85
86/// Wraps a [`artichoke_backend`] mruby parser.
87#[derive(Debug)]
88pub struct Parser<'a> {
89    interp: &'a mut Artichoke,
90    parser: NonNull<sys::mrb_parser_state>,
91    context: NonNull<sys::mrbc_context>,
92}
93
94impl<'a> Parser<'a> {
95    /// Create a new parser from an interpreter instance.
96    #[must_use]
97    pub fn new(interp: &'a mut Artichoke) -> Option<Self> {
98        let state = interp.state.as_deref_mut()?;
99        let context = state.parser.as_mut()?.context_mut();
100        let context = NonNull::new(context)?;
101        // SAFETY: `mrb_parser_new` requires an initialized mruby interpreter,
102        // which is guaranteed by the `Artichoke` type.
103        let parser = unsafe { interp.with_ffi_boundary(|mrb| sys::mrb_parser_new(mrb)).ok()? };
104        let parser = NonNull::new(parser)?;
105        Some(Self {
106            interp,
107            parser,
108            context,
109        })
110    }
111
112    /// Return a reference to the wrapped interpreter.
113    #[must_use]
114    pub fn interp(&mut self) -> &mut Artichoke {
115        self.interp
116    }
117
118    /// Parse the code buffer to determine if the code is a complete expression
119    /// that could be evaluated even though it may not be syntactically or
120    /// semantically valid.
121    ///
122    /// # Errors
123    ///
124    /// If the supplied code is more than `isize::MAX` bytes long, an error is
125    /// returned,
126    ///
127    /// If the underlying parser returns a UTF-8 invalid error message, an error
128    /// is returned.
129    pub fn parse(&mut self, code: &[u8]) -> Result<State, Error> {
130        use sys::mrb_lex_state_enum::{
131            EXPR_ARG, EXPR_BEG, EXPR_CLASS, EXPR_CMDARG, EXPR_DOT, EXPR_END, EXPR_ENDARG, EXPR_ENDFN, EXPR_FNAME,
132            EXPR_MAX_STATE, EXPR_MID, EXPR_VALUE,
133        };
134
135        // SAFETY: The parser is already initialized and the context is owned by
136        // the Artichoke state.
137        let parser = unsafe { self.parser.as_mut() };
138        // SAFETY: The context is already initialized and the context is owned
139        // by the Artichoke state.
140        let context = unsafe { self.context.as_mut() };
141
142        let ptr = code.as_ptr().cast::<c_char>();
143        parser.s = ptr;
144        // SAFETY: the resulting pointer is within the bounds of the given
145        // `code` slice.
146        parser.send = unsafe { ptr.add(code.len()) };
147        parser.lineno = context.lineno;
148        // SAFETY: `mrb_parser_parser` requires an initialized mruby
149        // interpreter, and calling `interp.with_ffi_boundary` ensures the
150        // interpreter is initialized and packed for foreign code.
151        unsafe {
152            self.interp.with_ffi_boundary(|_| {
153                sys::mrb_parser_parse(parser, context);
154            })?;
155        }
156
157        if !parser.parsing_heredoc.is_null() {
158            return Ok(State::UnterminatedHeredoc);
159        }
160        if !parser.lex_strterm.is_null() {
161            return Ok(State::UnterminatedString);
162        }
163        let state = if parser.nerr > 0 {
164            let errmsg = parser.error_buffer[0].message;
165            if errmsg.is_null() {
166                return Ok(State::ParseError);
167            }
168            // SAFETY: `errmsg` is a pointer to a NUL-terminated C string.
169            let cstring = unsafe { CStr::from_ptr(errmsg) };
170            if let Ok(message) = cstring.to_str() {
171                match message {
172                    "syntax error, unexpected $end" => State::UnexpectedProgramEnd,
173                    "syntax error, unexpected keyword_end" => State::UnexpectedEnd,
174                    "syntax error, unexpected tREGEXP_BEG" => State::UnexpectedRegexpBegin,
175                    _ => State::ParseError,
176                }
177            } else {
178                State::ParseError
179            }
180        } else {
181            #[expect(clippy::match_same_arms, reason = "documentation on each arm")]
182            let code_has_unterminated_expression = match parser.lstate {
183                // beginning of a statement, that means previous line ended
184                EXPR_BEG => false,
185                // a message dot was the last token, there has to come more
186                EXPR_DOT => true,
187                // class keyword is not enough! we need also a name of the class
188                EXPR_CLASS => true,
189                // a method name is necessary
190                EXPR_FNAME => true,
191                // if, elsif, etc. without condition
192                EXPR_VALUE => true,
193                // an argument is the last token
194                EXPR_ARG => false,
195                // a block/proc/lambda argument is the last token
196                EXPR_CMDARG => false,
197                // an expression was ended
198                EXPR_END => false,
199                // closing parenthesis
200                EXPR_ENDARG => false,
201                // definition end
202                EXPR_ENDFN => false,
203                // jump keyword like break, return, ...
204                EXPR_MID => false,
205                // this token is unreachable and is used to do integer math on the
206                // values of `mrb_lex_state_enum`.
207                EXPR_MAX_STATE => false,
208            };
209            if code_has_unterminated_expression {
210                State::UnterminatedBlock
211            } else {
212                State::Valid
213            }
214        };
215        Ok(state)
216    }
217}
218
219impl Drop for Parser<'_> {
220    fn drop(&mut self) {
221        let Self { interp, parser, .. } = self;
222
223        // SAFETY: `mrb_parser_free` requires an initialized mruby interpreter,
224        // and calling `interp.with_ffi_boundary` ensures the interpreter is
225        // initialized and packed for foreign code.
226        unsafe {
227            let _ignored = interp.with_ffi_boundary(|_| {
228                sys::mrb_parser_free(parser.as_mut());
229            });
230        }
231        // There is no need to free `context` since it is owned by the
232        // Artichoke state.
233    }
234}