onig/
region.rs

1#![allow(clippy::transmute_ptr_to_ref)]
2
3use std::iter::FusedIterator;
4use std::mem::transmute;
5use std::os::raw::{c_int, c_void};
6use std::ptr::null_mut;
7
8use super::flags::TraverseCallbackAt;
9use super::CaptureTreeNode;
10
11/// Represents a set of capture groups found in a search or match.
12#[derive(Debug, Eq, PartialEq)]
13#[repr(transparent)]
14pub struct Region {
15    pub(crate) raw: onig_sys::OnigRegion,
16}
17
18impl Region {
19    /// Create a new empty `Region`
20    pub fn new() -> Region {
21        Region {
22            raw: onig_sys::OnigRegion {
23                allocated: 0,
24                num_regs: 0,
25                beg: null_mut(),
26                end: null_mut(),
27                history_root: null_mut(),
28            },
29        }
30    }
31
32    /// Create a new region with a given capacity. This function allocates
33    /// a new region object as in `Region::new` and resizes it to
34    /// contain at least `capacity` regions.
35    ///
36    /// # Arguments
37    ///
38    /// * `capacity` - the number of captures this region should be
39    /// capable of storing without allocation.
40    pub fn with_capacity(capacity: usize) -> Region {
41        let mut region = Self::new();
42        region.reserve(capacity);
43        region
44    }
45
46    /// Clone From Raw
47    ///
48    /// Construct a new region based on an existing raw
49    /// `*onig_sys::OnigRegion` pointer by copying.
50    pub unsafe fn clone_from_raw(ptr: *mut onig_sys::OnigRegion) -> Self {
51        let mut region = Self::new();
52        onig_sys::onig_region_copy(&mut region.raw, ptr);
53        region
54    }
55
56    /// This can be used to clear out a region so it can be used
57    /// again. See [`onig_sys::onig_region_clear`][region_clear]
58    ///
59    /// [region_clear]: ./onig_sys/fn.onig_region_clear.html
60    pub fn clear(&mut self) {
61        unsafe {
62            onig_sys::onig_region_clear(&mut self.raw);
63        }
64    }
65
66    /// Get the current capacity of the region.
67    pub fn capacity(&self) -> usize {
68        self.raw.allocated as usize
69    }
70
71    /// Updates the region to contain `new_capacity` slots. See
72    /// [`onig_sys::onig_region_resize`][region_resize] for mor
73    /// information.
74    ///
75    /// [region_resize]: ./onig_sys/fn.onig_region_resize.html
76    ///
77    /// # Arguments
78    ///
79    ///  * `new_capacity` - The new number of groups in the region.
80    pub fn reserve(&mut self, new_capacity: usize) {
81        let r = unsafe { onig_sys::onig_region_resize(&mut self.raw, new_capacity as c_int) };
82        if r != onig_sys::ONIG_NORMAL as i32 {
83            panic!("Onig: fail to memory allocation during region resize")
84        }
85    }
86
87    /// Get the size of the region.
88    ///
89    /// Returns the number of registers in the region.
90    pub fn len(&self) -> usize {
91        self.raw.num_regs as usize
92    }
93
94    /// Check if the region is empty.
95    ///
96    /// Returns true if there are no registers in the region.
97    pub fn is_empty(&self) -> bool {
98        self.len() == 0
99    }
100
101    /// Returns the start and end positions of the Nth capture group.
102    ///
103    /// Returns `None` if `pos` is not a valid capture group or if the
104    /// capture group did not match anything. The positions returned
105    /// are always byte indices with respect to the original string
106    /// matched.
107    pub fn pos(&self, pos: usize) -> Option<(usize, usize)> {
108        if pos >= self.len() {
109            return None;
110        }
111        let pos = pos as isize;
112        let (beg, end) = unsafe { (*self.raw.beg.offset(pos), *self.raw.end.offset(pos)) };
113        if beg != onig_sys::ONIG_REGION_NOTPOS {
114            Some((beg as usize, end as usize))
115        } else {
116            None
117        }
118    }
119
120    /// Get Capture Tree
121    ///
122    /// Returns the capture tree for this region, if there is one.
123    pub fn tree(&self) -> Option<&CaptureTreeNode> {
124        let tree = unsafe { onig_sys::onig_get_capture_tree(self.raw_mut()) };
125        if tree.is_null() {
126            None
127        } else {
128            Some(unsafe { transmute(tree) })
129        }
130    }
131
132    /// Get an iterator over the captures in the region.
133    pub fn iter(&self) -> RegionIter<'_> {
134        RegionIter {
135            region: self,
136            pos: 0,
137        }
138    }
139
140    /// Walk the Tree of Captures
141    ///
142    /// The given callback is invoked for each node in the capture
143    /// tree. Each node is passed to the callback before any children.
144    pub fn tree_traverse<F>(&self, callback: F) -> i32
145    where
146        F: Fn(u32, (usize, usize), u32) -> bool,
147    {
148        self.tree_traverse_at(TraverseCallbackAt::CALLBACK_AT_FIRST, callback)
149    }
150
151    /// Walk the Tree of Captures in a Given Order
152    ///
153    /// The given callback is invoked for each node in the capture
154    /// tree. The order in which the callback is invoked can be
155    /// chosen.
156    pub fn tree_traverse_at<F>(&self, at: TraverseCallbackAt, mut callback: F) -> i32
157    where
158        F: Fn(u32, (usize, usize), u32) -> bool,
159    {
160        use onig_sys::onig_capture_tree_traverse;
161
162        extern "C" fn traverse_cb<F>(
163            group: c_int,
164            beg: c_int,
165            end: c_int,
166            level: c_int,
167            _at: c_int,
168            ud: *mut c_void,
169        ) -> c_int
170        where
171            F: Fn(u32, (usize, usize), u32) -> bool,
172        {
173            let callback = unsafe { &*(ud as *mut F) };
174            if callback(group as u32, (beg as usize, end as usize), level as u32) {
175                0
176            } else {
177                -1
178            }
179        }
180
181        unsafe {
182            onig_capture_tree_traverse(
183                self.raw_mut(),
184                at.bits() as c_int,
185                Some(traverse_cb::<F>),
186                &mut callback as *mut F as *mut c_void,
187            )
188        }
189    }
190
191    /// Convert a reference to self to a mutable pointer. This
192    /// shouldn't ever actually be used to mutate the underlying
193    /// region. It's needed to match the bindgened types though.
194    fn raw_mut(&self) -> *mut onig_sys::OnigRegion {
195        &self.raw as *const onig_sys::OnigRegion as *mut onig_sys::OnigRegion
196    }
197}
198
199impl Default for Region {
200    fn default() -> Self {
201        Region::new()
202    }
203}
204
205impl Drop for Region {
206    fn drop(&mut self) {
207        unsafe {
208            onig_sys::onig_region_free(&mut self.raw, 0);
209        }
210    }
211}
212
213impl Clone for Region {
214    fn clone(&self) -> Self {
215        unsafe { Self::clone_from_raw(self.raw_mut()) }
216    }
217}
218
219impl<'a> IntoIterator for &'a Region {
220    type Item = (usize, usize);
221    type IntoIter = RegionIter<'a>;
222    fn into_iter(self) -> Self::IntoIter {
223        self.iter()
224    }
225}
226
227/// Region Iterator
228///
229/// This struct is responsible for holding iteration state over a
230/// given region.
231pub struct RegionIter<'a> {
232    region: &'a Region,
233    pos: usize,
234}
235
236impl<'a> Iterator for RegionIter<'a> {
237    type Item = (usize, usize);
238
239    fn next(&mut self) -> Option<Self::Item> {
240        let next = self.region.pos(self.pos);
241        self.pos += 1;
242        next
243    }
244
245    fn size_hint(&self) -> (usize, Option<usize>) {
246        let len = self.region.len();
247        (len, Some(len))
248    }
249
250    fn count(self) -> usize {
251        self.region.len()
252    }
253}
254
255impl<'a> FusedIterator for RegionIter<'a> {}
256
257impl<'a> ExactSizeIterator for RegionIter<'a> {}
258
259#[cfg(test)]
260mod tests {
261    use super::super::{Regex, SearchOptions};
262    use super::*;
263
264    #[test]
265    fn test_region_create() {
266        Region::new();
267    }
268
269    #[test]
270    fn test_region_clear() {
271        let mut region = Region::new();
272        region.clear();
273    }
274
275    #[test]
276    fn test_region_copy() {
277        let region = Region::new();
278        let new_region = region.clone();
279        assert_eq!(new_region.len(), region.len());
280    }
281
282    #[test]
283    fn test_region_resize() {
284        {
285            let mut region = Region::new();
286            assert!(region.capacity() == 0);
287            region.reserve(100);
288            {
289                // can still get the capacity without a mutable borrow
290                let region_borrowed = &region;
291                assert!(region_borrowed.capacity() == 100);
292            }
293        }
294
295        {
296            let region = Region::with_capacity(10);
297            assert!(region.capacity() == 10);
298        }
299    }
300
301    #[test]
302    fn test_region_empty_iterate() {
303        let region = Region::new();
304        for _ in &region {
305            panic!("region should not contain any elements");
306        }
307    }
308
309    #[test]
310    fn test_region_iter_returns_iterator() {
311        let region = Region::new();
312        let all = region.iter().collect::<Vec<_>>();
313        assert_eq!(all, Vec::new());
314    }
315
316    #[test]
317    fn test_region_iterate_with_captures() {
318        let mut region = Region::new();
319        let reg = Regex::new("(a+)(b+)(c+)").unwrap();
320        let res = reg.search_with_options(
321            "aaaabbbbc",
322            0,
323            9,
324            SearchOptions::SEARCH_OPTION_NONE,
325            Some(&mut region),
326        );
327        assert!(res.is_some());
328        let all = region.iter().collect::<Vec<_>>();
329        assert_eq!(all, vec![(0, 9), (0, 4), (4, 8), (8, 9)]);
330    }
331
332    #[test]
333    fn test_region_all_iteration_options() {
334        let mut region = Region::new();
335        let reg = Regex::new("a(b)").unwrap();
336        let res = reg.search_with_options(
337            "habitat",
338            0,
339            7,
340            SearchOptions::SEARCH_OPTION_NONE,
341            Some(&mut region),
342        );
343        assert!(res.is_some());
344
345        // collect into a vector by iterating with a for loop
346        let mut a = Vec::<(usize, usize)>::new();
347        for pos in &region {
348            a.push(pos)
349        }
350
351        // collect into a vector by using `iter` and collec
352        let b = region.iter().collect::<Vec<_>>();
353
354        let expected = vec![(1, 3), (2, 3)];
355        assert_eq!(expected, a);
356        assert_eq!(expected, b);
357
358        assert_eq!(2, region.iter().count());
359    }
360}