onig/region.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
#![allow(clippy::transmute_ptr_to_ref)]
use std::iter::FusedIterator;
use std::mem::transmute;
use std::os::raw::{c_int, c_void};
use std::ptr::null_mut;
use super::flags::TraverseCallbackAt;
use super::CaptureTreeNode;
/// Represents a set of capture groups found in a search or match.
#[derive(Debug, Eq, PartialEq)]
#[repr(transparent)]
pub struct Region {
pub(crate) raw: onig_sys::OnigRegion,
}
impl Region {
/// Create a new empty `Region`
pub fn new() -> Region {
Region {
raw: onig_sys::OnigRegion {
allocated: 0,
num_regs: 0,
beg: null_mut(),
end: null_mut(),
history_root: null_mut(),
},
}
}
/// Create a new region with a given capacity. This function allocates
/// a new region object as in `Region::new` and resizes it to
/// contain at least `capacity` regions.
///
/// # Arguments
///
/// * `capacity` - the number of captures this region should be
/// capable of storing without allocation.
pub fn with_capacity(capacity: usize) -> Region {
let mut region = Self::new();
region.reserve(capacity);
region
}
/// Clone From Raw
///
/// Construct a new region based on an existing raw
/// `*onig_sys::OnigRegion` pointer by copying.
pub unsafe fn clone_from_raw(ptr: *mut onig_sys::OnigRegion) -> Self {
let mut region = Self::new();
onig_sys::onig_region_copy(&mut region.raw, ptr);
region
}
/// This can be used to clear out a region so it can be used
/// again. See [`onig_sys::onig_region_clear`][region_clear]
///
/// [region_clear]: ./onig_sys/fn.onig_region_clear.html
pub fn clear(&mut self) {
unsafe {
onig_sys::onig_region_clear(&mut self.raw);
}
}
/// Get the current capacity of the region.
pub fn capacity(&self) -> usize {
self.raw.allocated as usize
}
/// Updates the region to contain `new_capacity` slots. See
/// [`onig_sys::onig_region_resize`][region_resize] for mor
/// information.
///
/// [region_resize]: ./onig_sys/fn.onig_region_resize.html
///
/// # Arguments
///
/// * `new_capacity` - The new number of groups in the region.
pub fn reserve(&mut self, new_capacity: usize) {
let r = unsafe { onig_sys::onig_region_resize(&mut self.raw, new_capacity as c_int) };
if r != onig_sys::ONIG_NORMAL as i32 {
panic!("Onig: fail to memory allocation during region resize")
}
}
/// Get the size of the region.
///
/// Returns the number of registers in the region.
pub fn len(&self) -> usize {
self.raw.num_regs as usize
}
/// Check if the region is empty.
///
/// Returns true if there are no registers in the region.
pub fn is_empty(&self) -> bool {
self.len() == 0
}
/// Returns the start and end positions of the Nth capture group.
///
/// Returns `None` if `pos` is not a valid capture group or if the
/// capture group did not match anything. The positions returned
/// are always byte indices with respect to the original string
/// matched.
pub fn pos(&self, pos: usize) -> Option<(usize, usize)> {
if pos >= self.len() {
return None;
}
let pos = pos as isize;
let (beg, end) = unsafe { (*self.raw.beg.offset(pos), *self.raw.end.offset(pos)) };
if beg != onig_sys::ONIG_REGION_NOTPOS {
Some((beg as usize, end as usize))
} else {
None
}
}
/// Get Capture Tree
///
/// Returns the capture tree for this region, if there is one.
pub fn tree(&self) -> Option<&CaptureTreeNode> {
let tree = unsafe { onig_sys::onig_get_capture_tree(self.raw_mut()) };
if tree.is_null() {
None
} else {
Some(unsafe { transmute(tree) })
}
}
/// Get an iterator over the captures in the region.
pub fn iter(&self) -> RegionIter<'_> {
RegionIter {
region: self,
pos: 0,
}
}
/// Walk the Tree of Captures
///
/// The given callback is invoked for each node in the capture
/// tree. Each node is passed to the callback before any children.
pub fn tree_traverse<F>(&self, callback: F) -> i32
where
F: Fn(u32, (usize, usize), u32) -> bool,
{
self.tree_traverse_at(TraverseCallbackAt::CALLBACK_AT_FIRST, callback)
}
/// Walk the Tree of Captures in a Given Order
///
/// The given callback is invoked for each node in the capture
/// tree. The order in which the callback is invoked can be
/// chosen.
pub fn tree_traverse_at<F>(&self, at: TraverseCallbackAt, mut callback: F) -> i32
where
F: Fn(u32, (usize, usize), u32) -> bool,
{
use onig_sys::onig_capture_tree_traverse;
extern "C" fn traverse_cb<F>(
group: c_int,
beg: c_int,
end: c_int,
level: c_int,
_at: c_int,
ud: *mut c_void,
) -> c_int
where
F: Fn(u32, (usize, usize), u32) -> bool,
{
let callback = unsafe { &*(ud as *mut F) };
if callback(group as u32, (beg as usize, end as usize), level as u32) {
0
} else {
-1
}
}
unsafe {
onig_capture_tree_traverse(
self.raw_mut(),
at.bits() as c_int,
Some(traverse_cb::<F>),
&mut callback as *mut F as *mut c_void,
)
}
}
/// Convert a reference to self to a mutable pointer. This
/// shouldn't ever actually be used to mutate the underlying
/// region. It's needed to match the bindgened types though.
fn raw_mut(&self) -> *mut onig_sys::OnigRegion {
&self.raw as *const onig_sys::OnigRegion as *mut onig_sys::OnigRegion
}
}
impl Default for Region {
fn default() -> Self {
Region::new()
}
}
impl Drop for Region {
fn drop(&mut self) {
unsafe {
onig_sys::onig_region_free(&mut self.raw, 0);
}
}
}
impl Clone for Region {
fn clone(&self) -> Self {
unsafe { Self::clone_from_raw(self.raw_mut()) }
}
}
impl<'a> IntoIterator for &'a Region {
type Item = (usize, usize);
type IntoIter = RegionIter<'a>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
/// Region Iterator
///
/// This struct is responsible for holding iteration state over a
/// given region.
pub struct RegionIter<'a> {
region: &'a Region,
pos: usize,
}
impl<'a> Iterator for RegionIter<'a> {
type Item = (usize, usize);
fn next(&mut self) -> Option<Self::Item> {
let next = self.region.pos(self.pos);
self.pos += 1;
next
}
fn size_hint(&self) -> (usize, Option<usize>) {
let len = self.region.len();
(len, Some(len))
}
fn count(self) -> usize {
self.region.len()
}
}
impl<'a> FusedIterator for RegionIter<'a> {}
impl<'a> ExactSizeIterator for RegionIter<'a> {}
#[cfg(test)]
mod tests {
use super::super::{Regex, SearchOptions};
use super::*;
#[test]
fn test_region_create() {
Region::new();
}
#[test]
fn test_region_clear() {
let mut region = Region::new();
region.clear();
}
#[test]
fn test_region_copy() {
let region = Region::new();
let new_region = region.clone();
assert_eq!(new_region.len(), region.len());
}
#[test]
fn test_region_resize() {
{
let mut region = Region::new();
assert!(region.capacity() == 0);
region.reserve(100);
{
// can still get the capacity without a mutable borrow
let region_borrowed = ®ion;
assert!(region_borrowed.capacity() == 100);
}
}
{
let region = Region::with_capacity(10);
assert!(region.capacity() == 10);
}
}
#[test]
fn test_region_empty_iterate() {
let region = Region::new();
for _ in ®ion {
panic!("region should not contain any elements");
}
}
#[test]
fn test_region_iter_returns_iterator() {
let region = Region::new();
let all = region.iter().collect::<Vec<_>>();
assert_eq!(all, Vec::new());
}
#[test]
fn test_region_iterate_with_captures() {
let mut region = Region::new();
let reg = Regex::new("(a+)(b+)(c+)").unwrap();
let res = reg.search_with_options(
"aaaabbbbc",
0,
9,
SearchOptions::SEARCH_OPTION_NONE,
Some(&mut region),
);
assert!(res.is_some());
let all = region.iter().collect::<Vec<_>>();
assert_eq!(all, vec![(0, 9), (0, 4), (4, 8), (8, 9)]);
}
#[test]
fn test_region_all_iteration_options() {
let mut region = Region::new();
let reg = Regex::new("a(b)").unwrap();
let res = reg.search_with_options(
"habitat",
0,
7,
SearchOptions::SEARCH_OPTION_NONE,
Some(&mut region),
);
assert!(res.is_some());
// collect into a vector by iterating with a for loop
let mut a = Vec::<(usize, usize)>::new();
for pos in ®ion {
a.push(pos)
}
// collect into a vector by using `iter` and collec
let b = region.iter().collect::<Vec<_>>();
let expected = vec![(1, 3), (2, 3)];
assert_eq!(expected, a);
assert_eq!(expected, b);
assert_eq!(2, region.iter().count());
}
}