artichoke/parser.rs
1//! Detect if Ruby code parses successfully.
2//!
3//! The REPL needs to check if code is valid to determine whether it should
4//! enter multiline editing mode.
5
6use std::ffi::{CStr, c_char};
7use std::ptr::NonNull;
8
9use crate::backend::sys;
10use crate::backend::{Artichoke, Error};
11
12#[cfg(feature = "cli")]
13pub(crate) mod repl;
14
15/// State shows whether artichoke can parse some code or why it cannot.
16///
17/// This enum only encapsulates whether artichoke can parse the code. It may
18/// still have syntactic or semantic errors.
19#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
20pub enum State {
21 /// Internal parser error. This is a fatal error.
22 ParseError,
23 /// Code must be fewer than [`isize::MAX`] bytes.
24 CodeTooLong,
25 /// The code has too many end statements.
26 UnexpectedEnd,
27 /// The code has unclosed blocks.
28 UnexpectedProgramEnd,
29 /// The current expression is an unterminated `Regexp`.
30 UnexpectedRegexpBegin,
31 /// The current expression is an unterminated block.
32 UnterminatedBlock,
33 /// The current expression is an unterminated heredoc.
34 UnterminatedHeredoc,
35 /// The current expression is an unterminated `String`.
36 UnterminatedString,
37 /// Code is valid and fit to eval.
38 Valid,
39}
40
41impl State {
42 /// Construct a new, default `State`.
43 #[must_use]
44 pub const fn new() -> Self {
45 Self::Valid
46 }
47
48 /// Whether this variant indicates a code block is open.
49 ///
50 /// This method can be used by a REPL to check whether to buffer code or
51 /// begin a multi-line editing session before attempting to eval the code on
52 /// an interpreter.
53 #[must_use]
54 pub fn is_code_block_open(self) -> bool {
55 !matches!(
56 self,
57 Self::Valid | Self::UnexpectedEnd | Self::UnexpectedRegexpBegin | Self::CodeTooLong
58 )
59 }
60
61 /// Whether this variant is a recoverable error.
62 ///
63 /// Recoverable errors should be handled by resetting the parser and input
64 /// buffer.
65 #[must_use]
66 pub fn is_recoverable_error(self) -> bool {
67 matches!(self, Self::CodeTooLong)
68 }
69
70 /// Whether this variant is a fatal parse error.
71 ///
72 /// Fatal parser states indicate the parser is corrupted and cannot be used
73 /// again.
74 #[must_use]
75 pub fn is_fatal(self) -> bool {
76 matches!(self, Self::ParseError)
77 }
78}
79
80impl Default for State {
81 fn default() -> Self {
82 Self::new()
83 }
84}
85
86/// Wraps a [`artichoke_backend`] mruby parser.
87#[derive(Debug)]
88pub struct Parser<'a> {
89 interp: &'a mut Artichoke,
90 parser: NonNull<sys::mrb_parser_state>,
91 context: NonNull<sys::mrbc_context>,
92}
93
94impl<'a> Parser<'a> {
95 /// Create a new parser from an interpreter instance.
96 #[must_use]
97 pub fn new(interp: &'a mut Artichoke) -> Option<Self> {
98 let state = interp.state.as_deref_mut()?;
99 let context = state.parser.as_mut()?.context_mut();
100 let context = NonNull::new(context)?;
101 // SAFETY: `mrb_parser_new` requires an initialized mruby interpreter,
102 // which is guaranteed by the `Artichoke` type.
103 let parser = unsafe { interp.with_ffi_boundary(|mrb| sys::mrb_parser_new(mrb)).ok()? };
104 let parser = NonNull::new(parser)?;
105 Some(Self {
106 interp,
107 parser,
108 context,
109 })
110 }
111
112 /// Return a reference to the wrapped interpreter.
113 #[must_use]
114 pub fn interp(&mut self) -> &mut Artichoke {
115 self.interp
116 }
117
118 /// Parse the code buffer to determine if the code is a complete expression
119 /// that could be evaluated even though it may not be syntactically or
120 /// semantically valid.
121 ///
122 /// # Errors
123 ///
124 /// If the supplied code is more than `isize::MAX` bytes long, an error is
125 /// returned,
126 ///
127 /// If the underlying parser returns a UTF-8 invalid error message, an error
128 /// is returned.
129 pub fn parse(&mut self, code: &[u8]) -> Result<State, Error> {
130 use sys::mrb_lex_state_enum::{
131 EXPR_ARG, EXPR_BEG, EXPR_CLASS, EXPR_CMDARG, EXPR_DOT, EXPR_END, EXPR_ENDARG, EXPR_ENDFN, EXPR_FNAME,
132 EXPR_MAX_STATE, EXPR_MID, EXPR_VALUE,
133 };
134
135 // SAFETY: The parser is already initialized and the context is owned by
136 // the Artichoke state.
137 let parser = unsafe { self.parser.as_mut() };
138 // SAFETY: The context is already initialized and the context is owned
139 // by the Artichoke state.
140 let context = unsafe { self.context.as_mut() };
141
142 let ptr = code.as_ptr().cast::<c_char>();
143 parser.s = ptr;
144 // SAFETY: the resulting pointer is within the bounds of the given
145 // `code` slice.
146 parser.send = unsafe { ptr.add(code.len()) };
147 parser.lineno = context.lineno;
148 // SAFETY: `mrb_parser_parser` requires an initialized mruby
149 // interpreter, and calling `interp.with_ffi_boundary` ensures the
150 // interpreter is initialized and packed for foreign code.
151 unsafe {
152 self.interp.with_ffi_boundary(|_| {
153 sys::mrb_parser_parse(parser, context);
154 })?;
155 }
156
157 if !parser.parsing_heredoc.is_null() {
158 return Ok(State::UnterminatedHeredoc);
159 }
160 if !parser.lex_strterm.is_null() {
161 return Ok(State::UnterminatedString);
162 }
163 let state = if parser.nerr > 0 {
164 let errmsg = parser.error_buffer[0].message;
165 if errmsg.is_null() {
166 return Ok(State::ParseError);
167 }
168 // SAFETY: `errmsg` is a pointer to a NUL-terminated C string.
169 let cstring = unsafe { CStr::from_ptr(errmsg) };
170 if let Ok(message) = cstring.to_str() {
171 match message {
172 "syntax error, unexpected $end" => State::UnexpectedProgramEnd,
173 "syntax error, unexpected keyword_end" => State::UnexpectedEnd,
174 "syntax error, unexpected tREGEXP_BEG" => State::UnexpectedRegexpBegin,
175 _ => State::ParseError,
176 }
177 } else {
178 State::ParseError
179 }
180 } else {
181 #[expect(clippy::match_same_arms, reason = "documentation on each arm")]
182 let code_has_unterminated_expression = match parser.lstate {
183 // beginning of a statement, that means previous line ended
184 EXPR_BEG => false,
185 // a message dot was the last token, there has to come more
186 EXPR_DOT => true,
187 // class keyword is not enough! we need also a name of the class
188 EXPR_CLASS => true,
189 // a method name is necessary
190 EXPR_FNAME => true,
191 // if, elsif, etc. without condition
192 EXPR_VALUE => true,
193 // an argument is the last token
194 EXPR_ARG => false,
195 // a block/proc/lambda argument is the last token
196 EXPR_CMDARG => false,
197 // an expression was ended
198 EXPR_END => false,
199 // closing parenthesis
200 EXPR_ENDARG => false,
201 // definition end
202 EXPR_ENDFN => false,
203 // jump keyword like break, return, ...
204 EXPR_MID => false,
205 // this token is unreachable and is used to do integer math on the
206 // values of `mrb_lex_state_enum`.
207 EXPR_MAX_STATE => false,
208 };
209 if code_has_unterminated_expression {
210 State::UnterminatedBlock
211 } else {
212 State::Valid
213 }
214 };
215 Ok(state)
216 }
217}
218
219impl Drop for Parser<'_> {
220 fn drop(&mut self) {
221 let Self { interp, parser, .. } = self;
222
223 // SAFETY: `mrb_parser_free` requires an initialized mruby interpreter,
224 // and calling `interp.with_ffi_boundary` ensures the interpreter is
225 // initialized and packed for foreign code.
226 unsafe {
227 let _ignored = interp.with_ffi_boundary(|_| {
228 sys::mrb_parser_free(parser.as_mut());
229 });
230 }
231 // There is no need to free `context` since it is owned by the
232 // Artichoke state.
233 }
234}