artichoke_backend/extn/core/matchdata/
mod.rs1use std::collections::HashMap;
13use std::fmt::Write as _;
14use std::ops::{Bound, RangeBounds};
15use std::str;
16
17use bstr::BString;
18use scolapasta_string_escape::format_debug_escape_into;
19
20use crate::convert::{implicitly_convert_to_int, implicitly_convert_to_string};
21use crate::extn::core::regexp::Regexp;
22use crate::extn::core::regexp::backend::NilableString;
23use crate::extn::core::symbol::Symbol;
24use crate::extn::prelude::*;
25use crate::fmt::WriteError;
26
27mod boxing;
28pub(in crate::extn) mod mruby;
29pub(super) mod trampoline;
30
31#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
32pub struct Region {
33 start: Bound<usize>,
34 end: Bound<usize>,
35}
36
37impl Region {
38 fn from_range<R>(bounds: R) -> Self
39 where
40 R: RangeBounds<usize>,
41 {
42 let start = bounds.start_bound().cloned();
43 let end = bounds.end_bound().cloned();
44 Region { start, end }
45 }
46
47 fn offset(&self) -> usize {
48 match self.start {
49 Bound::Included(bound) => bound,
50 Bound::Excluded(bound) => bound.checked_sub(1).unwrap_or_default(),
51 Bound::Unbounded => 0,
52 }
53 }
54}
55
56impl RangeBounds<usize> for Region {
57 fn start_bound(&self) -> Bound<&usize> {
58 self.start.as_ref()
59 }
60
61 fn end_bound(&self) -> Bound<&usize> {
62 self.end.as_ref()
63 }
64}
65
66#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
67pub enum Capture<'a> {
68 GroupIndex(i64),
69 GroupName(&'a [u8]),
70}
71
72#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
73pub enum CaptureExtract<'a> {
74 GroupIndex(i64),
75 GroupName(&'a [u8]),
76 Symbol(Symbol),
77}
78
79impl<'a> TryConvertMut<&'a mut Value, CaptureExtract<'a>> for Artichoke {
80 type Error = Error;
81
82 fn try_convert_mut(&mut self, value: &'a mut Value) -> Result<CaptureExtract<'a>, Self::Error> {
83 if let Ok(idx) = implicitly_convert_to_int(self, *value) {
84 Ok(CaptureExtract::GroupIndex(idx))
85 } else if let Ok(symbol) = unsafe { Symbol::unbox_from_value(value, self) } {
86 let sym = symbol.id();
87 Ok(CaptureExtract::Symbol(sym.into()))
88 } else {
89 let name = unsafe { implicitly_convert_to_string(self, value)? };
90 Ok(CaptureExtract::GroupName(name))
91 }
92 }
93}
94
95#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
96pub enum CaptureAt<'a> {
97 GroupIndex(i64),
98 GroupName(&'a [u8]),
99 StartLen(i64, i64),
100}
101
102#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
103pub enum CaptureMatch {
104 None,
105 Single(Option<Vec<u8>>),
106 Range(Vec<Option<Vec<u8>>>),
107}
108
109impl TryConvertMut<CaptureMatch, Value> for Artichoke {
110 type Error = Error;
111
112 fn try_convert_mut(&mut self, value: CaptureMatch) -> Result<Value, Self::Error> {
113 match value {
114 CaptureMatch::None => Ok(Value::nil()),
115 CaptureMatch::Single(capture) => self.try_convert_mut(capture),
116 CaptureMatch::Range(captures) => self.try_convert_mut(captures),
117 }
118 }
119}
120
121#[derive(Debug, Clone, Hash, PartialEq, Eq)]
122pub struct MatchData {
123 haystack: BString,
124 regexp: Regexp,
125 region: Region,
126}
127
128impl MatchData {
129 #[must_use]
130 pub fn new<R>(haystack: Vec<u8>, regexp: Regexp, bounds: R) -> Self
131 where
132 R: RangeBounds<usize>,
133 {
134 let region = Region::from_range(bounds);
135 Self {
136 haystack: haystack.into(),
137 regexp,
138 region,
139 }
140 }
141
142 pub fn set_region<R>(&mut self, bounds: R)
143 where
144 R: RangeBounds<usize>,
145 {
146 self.region = Region::from_range(bounds);
147 }
148
149 #[must_use]
150 pub fn matched_region(&self) -> &[u8] {
151 let matched = match (self.region.start, self.region.end) {
152 (Bound::Included(start), Bound::Included(end)) => self.haystack.get(start..=end),
153 (Bound::Included(start), Bound::Excluded(end)) => self.haystack.get(start..end),
154 (Bound::Included(start), Bound::Unbounded) => self.haystack.get(start..),
155 (Bound::Excluded(start), Bound::Included(end)) => self.haystack.get((start + 1)..=end),
156 (Bound::Excluded(start), Bound::Excluded(end)) => self.haystack.get((start + 1)..end),
157 (Bound::Excluded(start), Bound::Unbounded) => self.haystack.get(start + 1..),
158 (Bound::Unbounded, Bound::Included(end)) => self.haystack.get(..=end),
159 (Bound::Unbounded, Bound::Excluded(end)) => self.haystack.get(..end),
160 (Bound::Unbounded, Bound::Unbounded) => self.haystack.get(..),
161 };
162 matched.unwrap_or_default()
163 }
164
165 #[inline]
166 pub fn begin(&self, capture: Capture<'_>) -> Result<Option<usize>, Error> {
167 if let Some([begin, _]) = self.offset(capture)? {
168 Ok(Some(begin))
169 } else {
170 Ok(None)
171 }
172 }
173
174 pub fn capture_at(&self, at: CaptureAt<'_>) -> Result<CaptureMatch, Error> {
175 let haystack = self.matched_region();
176 let captures = if let Some(captures) = self.regexp.inner().captures(haystack)? {
177 captures
178 } else {
179 return Ok(CaptureMatch::None);
180 };
181 match at {
182 CaptureAt::GroupIndex(index) => match aref::offset_to_index(index, captures.len()) {
183 None => Ok(CaptureMatch::None),
184 Some(idx) => {
185 if let Some(capture) = captures.into_iter().nth(idx) {
186 Ok(CaptureMatch::Single(capture))
187 } else {
188 Ok(CaptureMatch::None)
189 }
190 }
191 },
192 CaptureAt::GroupName(name) => {
193 let indexes = self.regexp.inner().capture_indexes_for_name(name)?;
194 if let Some(indexes) = indexes {
195 let capture = indexes
196 .iter()
197 .copied()
198 .filter_map(|index| captures.get(index).and_then(Option::as_deref))
199 .last();
200 Ok(CaptureMatch::Single(capture.map(<[_]>::to_vec)))
201 } else {
202 let mut message = String::from("undefined group name reference: \"");
203 format_debug_escape_into(&mut message, name).map_err(WriteError::from)?;
204 message.push('"');
205 Err(IndexError::from(message).into())
206 }
207 }
208 CaptureAt::StartLen(start, len) => {
209 if let Ok(len) = usize::try_from(len) {
210 let start = if let Some(start) = aref::offset_to_index(start, captures.len()) {
211 start
212 } else {
213 return Ok(CaptureMatch::None);
214 };
215 let matches = captures.into_iter().skip(start).take(len).collect::<Vec<_>>();
216 Ok(CaptureMatch::Range(matches))
217 } else {
218 Ok(CaptureMatch::None)
219 }
220 }
221 }
222 }
223
224 pub fn captures(&self) -> Result<Option<Vec<Option<Vec<u8>>>>, Error> {
225 let haystack = self.matched_region();
226 let captures = self.regexp.inner().captures(haystack)?;
227 if let Some(mut captures) = captures {
228 captures.remove(0);
232 Ok(Some(captures))
233 } else {
234 Ok(None)
235 }
236 }
237
238 #[inline]
239 pub fn end(&self, capture: Capture<'_>) -> Result<Option<usize>, Error> {
240 if let Some([_, end]) = self.offset(capture)? {
241 Ok(Some(end))
242 } else {
243 Ok(None)
244 }
245 }
246
247 #[inline]
248 pub fn len(&self) -> Result<usize, Error> {
249 let haystack = self.matched_region();
250 self.regexp.inner().captures_len(Some(haystack))
251 }
252
253 #[inline]
254 pub fn named_captures(&self) -> Result<Option<HashMap<Vec<u8>, NilableString>>, Error> {
255 let haystack = self.matched_region();
256 self.regexp.inner().named_captures_for_haystack(haystack)
257 }
258
259 #[inline]
260 #[must_use]
261 pub fn names(&self) -> Vec<Vec<u8>> {
262 self.regexp.names()
263 }
264
265 pub fn offset(&self, capture: Capture<'_>) -> Result<Option<[usize; 2]>, Error> {
266 let haystack = self.matched_region();
267 let index = match capture {
268 Capture::GroupIndex(index) => {
269 let captures_len = self.regexp.inner().captures_len(Some(haystack))?;
270 match usize::try_from(index) {
271 Ok(idx) if idx < captures_len => idx,
272 _ => {
273 let mut message = String::new();
274 write!(&mut message, "index {index} out of matches").map_err(WriteError::from)?;
275 return Err(IndexError::from(message).into());
276 }
277 }
278 }
279 Capture::GroupName(name) => {
280 let indexes = self.regexp.inner().capture_indexes_for_name(name)?;
281 if let Some(index) = indexes.and_then(|indexes| indexes.last().copied()) {
282 index
283 } else {
284 return Ok(None);
285 }
286 }
287 };
288 if let Some((begin, end)) = self.regexp.inner().pos(haystack, index)? {
289 let begin = if let Some(Ok(haystack)) = haystack.get(..begin).map(str::from_utf8) {
290 haystack.chars().count()
291 } else {
292 haystack.len()
293 };
294 let end = if let Some(Ok(haystack)) = haystack.get(..end).map(str::from_utf8) {
295 haystack.chars().count()
296 } else {
297 haystack.len()
298 };
299 let offset = self.region.offset();
300 Ok(Some([offset + begin, offset + end]))
301 } else {
302 Ok(None)
303 }
304 }
305
306 #[must_use]
307 pub fn pre(&self) -> &[u8] {
308 let pre = match self.region.start {
309 Bound::Included(start) => self.haystack.get(..start),
310 Bound::Excluded(start) => self.haystack.get(..=start),
311 Bound::Unbounded => return &[],
312 };
313 pre.unwrap_or_else(|| {
314 self.haystack.as_slice()
316 })
317 }
318
319 #[must_use]
320 pub fn post(&self) -> &[u8] {
321 let post = match self.region.end {
322 Bound::Included(end) => self.haystack.get(end + 1..),
323 Bound::Excluded(end) => self.haystack.get(end..),
324 Bound::Unbounded => return &[],
325 };
326 post.unwrap_or_default()
328 }
329
330 #[inline]
331 #[must_use]
332 pub fn regexp(&self) -> &Regexp {
333 &self.regexp
334 }
335
336 #[inline]
337 #[must_use]
338 pub fn string(&self) -> &[u8] {
339 self.haystack.as_slice()
340 }
341
342 #[inline]
343 pub fn to_a(&self) -> Result<Option<Vec<NilableString>>, Error> {
344 let haystack = self.matched_region();
345 self.regexp.inner().captures(haystack)
346 }
347
348 #[inline]
349 pub fn to_s(&self) -> Result<Option<&[u8]>, Error> {
350 let haystack = self.matched_region();
351 self.regexp.inner().capture0(haystack)
352 }
353}
354
355#[cfg(test)]
356mod tests {
357 use crate::test::prelude::*;
358
359 const SUBJECT: &str = "MatchData";
360 const FUNCTIONAL_TEST: &[u8] = include_bytes!("matchdata_functional_test.rb");
361
362 #[test]
363 fn functional() {
364 let mut interp = interpreter();
365 let result = interp.eval(FUNCTIONAL_TEST);
366 unwrap_or_panic_with_backtrace(&mut interp, SUBJECT, result);
367 let result = interp.eval(b"spec");
368 unwrap_or_panic_with_backtrace(&mut interp, SUBJECT, result);
369 }
370}