onig/lib.rs
1//! This crate provides a safe wrapper around the
2//! [Oniguruma](https://github.com/kkos/oniguruma) regular expression library.
3//!
4//! # Examples
5//!
6//! ```rust
7//! use onig::Regex;
8//!
9//! let regex = Regex::new("e(l+)").unwrap();
10//! for (i, pos) in regex.captures("hello").unwrap().iter_pos().enumerate() {
11//! match pos {
12//! Some((beg, end)) =>
13//! println!("Group {} captured in position {}:{}", i, beg, end),
14//! None =>
15//! println!("Group {} is not captured", i)
16//! }
17//! }
18//! ```
19//!
20//! # Match vs Search
21//!
22//! There are two basic things you can do with a `Regex` pattern; test
23//! if the pattern matches the whole of a given string, and search for
24//! occurences of the pattern within a string. Oniguruma exposes these
25//! two concepts with the *match* and *search* APIs.
26//!
27//! In addition two these two base Onigurma APIs this crate exposes a
28//! third *find* API, built on top of the *search* API.
29//!
30//! ```
31//! # use onig::Regex;
32//! let pattern = Regex::new("hello").unwrap();
33//! assert_eq!(true, pattern.find("hello world").is_some());
34//! assert_eq!(false, pattern.is_match("hello world"));
35//! ```
36//!
37//! ## The *Match* API
38//!
39//! Functions in the match API check if a pattern matches the entire
40//! string. The simplest of these is `Regex::is_match`. This retuns a
41//! `true` if the pattern matches the string. For more complex useage
42//! then `Regex::match_with_options` and `Regex::match_with_encoding`
43//! can be used. These allow the capture groups to be inspected,
44//! matching with different options, and matching sub-sections of a
45//! given text.
46//!
47//! ## The *Search* API
48//!
49//! Function in the search API search for a pattern anywhere within a
50//! string. The simplist of these is `Regex::find`. This returns the
51//! offset of the first occurence of the pattern within the string.
52//! For more complex useage `Regex::search_with_options` and
53//! `Regex::search_with_encoding` can be used. These allow capture
54//! groups to be inspected, searching with different options and
55//! searching within subsections of a given text.
56//!
57//! ## The *Find* API
58//!
59//! The find API is built on top of the search API. Functions in this
60//! API allow iteration across all matches of the pattern within a
61//! string, not just the first one. The functions deal with some of
62//! the complexities of this, such as zero-length matches.
63//!
64//! The simplest step-up from the basic search API `Regex::find` is
65//! getting the captures relating to a match with the
66//! `Regex::captures` method. To find capture information for all
67//! matches within a string `Regex::find_iter` and
68//! `Regex::captures_iter` can be used. The former exposes the start
69//! and end of the match as `Regex::find` does, the latter exposes the
70//! whole capture group information as `Regex::captures` does.
71//!
72//! # The `std::pattern` API
73//!
74//! In addition to the main Oniguruma API it is possible to use the
75//! `Regex` object with the
76//! [`std::pattern`](https://doc.rust-lang.org/std/str/pattern/)
77//! API. To enable support compile with the `std-pattern` feature. If
78//! you're using Cargo you can do this by adding the following to your
79//! Cargo.toml:
80//!
81//! ```toml
82//! [dependencies.onig]
83//! version = "1.2"
84//! features = ["std-pattern"]
85//! ```
86
87#![cfg_attr(feature = "std-pattern", feature(pattern))]
88#![deny(missing_docs)]
89
90use once_cell::sync::Lazy;
91
92mod buffers;
93mod find;
94mod flags;
95mod match_param;
96mod names;
97mod region;
98mod replace;
99mod syntax;
100mod tree;
101mod utils;
102
103#[cfg(feature = "std-pattern")]
104mod pattern;
105
106// re-export the onig types publically
107pub use crate::buffers::{EncodedBytes, EncodedChars};
108pub use crate::find::{
109 Captures, FindCaptures, FindMatches, RegexSplits, RegexSplitsN, SubCaptures, SubCapturesPos,
110};
111pub use crate::flags::*;
112pub use crate::match_param::MatchParam;
113pub use crate::region::Region;
114pub use crate::replace::Replacer;
115pub use crate::syntax::{MetaChar, Syntax};
116pub use crate::tree::{CaptureTreeNode, CaptureTreeNodeIter};
117pub use crate::utils::{copyright, define_user_property, version};
118
119use std::os::raw::c_int;
120use std::ptr::{null, null_mut};
121use std::sync::Mutex;
122use std::{error, fmt, str};
123
124#[derive(Debug)]
125enum ErrorData {
126 OnigError(c_int),
127 Custom,
128}
129
130/// This struture represents an error from the underlying Oniguruma libray.
131pub struct Error {
132 data: ErrorData,
133 description: String,
134}
135
136/// This struct is a wrapper around an Oniguruma regular expression
137/// pointer. This represents a compiled regex which can be used in
138/// search and match operations.
139#[derive(Debug, Eq, PartialEq)]
140pub struct Regex {
141 raw: onig_sys::OnigRegex,
142}
143
144unsafe impl Send for Regex {}
145unsafe impl Sync for Regex {}
146
147impl Error {
148 fn from_code_and_info(code: c_int, info: &onig_sys::OnigErrorInfo) -> Self {
149 Error::new(code, info)
150 }
151
152 fn from_code(code: c_int) -> Self {
153 Error::new(code, null())
154 }
155
156 fn custom<T: Into<String>>(message: T) -> Self {
157 Error {
158 data: ErrorData::Custom,
159 description: message.into(),
160 }
161 }
162
163 fn new(code: c_int, info: *const onig_sys::OnigErrorInfo) -> Self {
164 let buff = &mut [0; onig_sys::ONIG_MAX_ERROR_MESSAGE_LEN as usize];
165 let len = unsafe { onig_sys::onig_error_code_to_str(buff.as_mut_ptr(), code, info) };
166 let description = if let Ok(description) = str::from_utf8(&buff[..len as usize]) {
167 description
168 } else {
169 return Self::custom("Onig error string was invalid UTF-8");
170 };
171 Error {
172 data: ErrorData::OnigError(code),
173 description: description.to_owned(),
174 }
175 }
176
177 /// Return Oniguruma engine error code.
178 pub fn code(&self) -> i32 {
179 match self.data {
180 ErrorData::OnigError(code) => code,
181 _ => -1,
182 }
183 }
184
185 /// Return error description provided by Oniguruma engine.
186 pub fn description(&self) -> &str {
187 &self.description
188 }
189}
190
191impl error::Error for Error {
192 fn description(&self) -> &str {
193 &self.description
194 }
195}
196
197impl fmt::Display for Error {
198 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
199 write!(f, "Oniguruma error: {}", self.description())
200 }
201}
202
203impl fmt::Debug for Error {
204 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
205 write!(f, "Error({:?}, {})", self.data, self.description())
206 }
207}
208
209static REGEX_NEW_MUTEX: Lazy<Mutex<()>> = Lazy::new(|| Mutex::new(()));
210
211impl Regex {
212 /// Create a Regex
213 ///
214 /// Simple regular expression constructor. Compiles a new regular
215 /// expression with the default options using the ruby syntax.
216 /// Once compiled, it can be used repeatedly to search in a string. If an
217 /// invalid expression is given, then an error is returned.
218 ///
219 /// # Arguments
220 ///
221 /// * `pattern` - The regex pattern to compile
222 ///
223 /// # Examples
224 ///
225 /// ```
226 /// use onig::Regex;
227 /// let r = Regex::new(r#"hello (\w+)"#);
228 /// assert!(r.is_ok());
229 /// ```
230 pub fn new(pattern: &str) -> Result<Self, Error> {
231 Regex::with_encoding(pattern)
232 }
233
234 /// Create a Regex, Specifying an Encoding
235 ///
236 /// Attempts to compile `pattern` into a new `Regex`
237 /// instance. Instead of assuming UTF-8 as the encoding scheme the
238 /// encoding is inferred from the `pattern` buffer.
239 ///
240 /// # Arguments
241 ///
242 /// * `pattern` - The regex pattern to compile
243 ///
244 /// # Examples
245 ///
246 /// ```
247 /// use onig::{Regex, EncodedBytes};
248 /// let utf8 = Regex::with_encoding("hello");
249 /// assert!(utf8.is_ok());
250 /// let ascii = Regex::with_encoding(EncodedBytes::ascii(b"world"));
251 /// assert!(ascii.is_ok());
252 /// ```
253 pub fn with_encoding<T>(pattern: T) -> Result<Regex, Error>
254 where
255 T: EncodedChars,
256 {
257 Regex::with_options_and_encoding(
258 pattern,
259 RegexOptions::REGEX_OPTION_NONE,
260 Syntax::default(),
261 )
262 }
263
264 /// Create a new Regex
265 ///
266 /// Attempts to compile a pattern into a new `Regex` instance.
267 /// Once compiled, it can be used repeatedly to search in a string. If an
268 /// invalid expression is given, then an error is returned.
269 /// See [`onig_sys::onig_new`][regex_new] for more information.
270 ///
271 /// # Arguments
272 ///
273 /// * `pattern` - The regex pattern to compile.
274 /// * `options` - The regex compilation options.
275 /// * `syntax` - The syntax which the regex is written in.
276 ///
277 /// # Examples
278 ///
279 /// ```
280 /// use onig::{Regex, Syntax, RegexOptions};
281 /// let r = Regex::with_options("hello.*world",
282 /// RegexOptions::REGEX_OPTION_NONE,
283 /// Syntax::default());
284 /// assert!(r.is_ok());
285 /// ```
286 ///
287 /// [regex_new]: ./onig_sys/fn.onig_new.html
288 pub fn with_options(
289 pattern: &str,
290 option: RegexOptions,
291 syntax: &Syntax,
292 ) -> Result<Regex, Error> {
293 Regex::with_options_and_encoding(pattern, option, syntax)
294 }
295
296 /// Create a new Regex, Specifying Options and Ecoding
297 ///
298 /// Attempts to comile the given `pattern` into a new `Regex`
299 /// instance. Instead of assuming UTF-8 as the encoding scheme the
300 /// encoding is inferred from the `pattern` buffer. If the regex
301 /// fails to compile the returned `Error` value from
302 /// [`onig_new`][regex_new] contains more information.
303 ///
304 /// [regex_new]: ./onig_sys/fn.onig_new.html
305 ///
306 /// # Arguments
307 ///
308 /// * `pattern` - The regex pattern to compile.
309 /// * `options` - The regex compilation options.
310 /// * `syntax` - The syntax which the regex is written in.
311 ///
312 /// # Examples
313 /// ```
314 /// use onig::{Regex, Syntax, EncodedBytes, RegexOptions};
315 /// let pattern = EncodedBytes::ascii(b"hello");
316 /// let r = Regex::with_options_and_encoding(pattern,
317 /// RegexOptions::REGEX_OPTION_SINGLELINE,
318 /// Syntax::default());
319 /// assert!(r.is_ok());
320 /// ```
321 pub fn with_options_and_encoding<T>(
322 pattern: T,
323 option: RegexOptions,
324 syntax: &Syntax,
325 ) -> Result<Self, Error>
326 where
327 T: EncodedChars,
328 {
329 // Convert the rust types to those required for the call to
330 // `onig_new`.
331 let mut reg: onig_sys::OnigRegex = null_mut();
332 let reg_ptr = &mut reg as *mut onig_sys::OnigRegex;
333
334 // We can use this later to get an error message to pass back
335 // if regex creation fails.
336 let mut error = onig_sys::OnigErrorInfo {
337 enc: null_mut(),
338 par: null_mut(),
339 par_end: null_mut(),
340 };
341
342 let err = unsafe {
343 // Grab a lock to make sure that `onig_new` isn't called by
344 // more than one thread at a time.
345 let _guard = REGEX_NEW_MUTEX.lock().unwrap();
346 onig_sys::onig_new(
347 reg_ptr,
348 pattern.start_ptr(),
349 pattern.limit_ptr(),
350 option.bits(),
351 pattern.encoding(),
352 syntax as *const Syntax as *mut Syntax as *mut onig_sys::OnigSyntaxType,
353 &mut error,
354 )
355 };
356
357 if err == onig_sys::ONIG_NORMAL as i32 {
358 Ok(Regex { raw: reg })
359 } else {
360 Err(Error::from_code_and_info(err, &error))
361 }
362 }
363
364 /// Match String
365 ///
366 /// Try to match the regex against the given string slice,
367 /// starting at a given offset. This method works the same way as
368 /// `match_with_encoding`, but the encoding is always utf-8.
369 ///
370 /// For more information see [Match vs
371 /// Search](index.html#match-vs-search)
372 ///
373 /// # Arguments
374 ///
375 /// * `str` - The string slice to match against.
376 /// * `at` - The byte index in the passed slice to start matching
377 /// * `options` - The regex match options.
378 /// * `region` - The region for return group match range info
379 ///
380 /// # Returns
381 ///
382 /// `Some(len)` if the regex matched, with `len` being the number
383 /// of bytes matched. `None` if the regex doesn't match.
384 ///
385 /// # Examples
386 ///
387 /// ```
388 /// use onig::{Regex, SearchOptions};
389 ///
390 /// let r = Regex::new(".*").unwrap();
391 /// let res = r.match_with_options("hello", 0, SearchOptions::SEARCH_OPTION_NONE, None);
392 /// assert!(res.is_some()); // it matches
393 /// assert!(res.unwrap() == 5); // 5 characters matched
394 /// ```
395 pub fn match_with_options(
396 &self,
397 str: &str,
398 at: usize,
399 options: SearchOptions,
400 region: Option<&mut Region>,
401 ) -> Option<usize> {
402 self.match_with_encoding(str, at, options, region)
403 }
404
405 /// Match String with Encoding
406 ///
407 /// Match the regex against a string. This method will start at
408 /// the offset `at` into the string and try and match the
409 /// regex. If the regex matches then the return value is the
410 /// number of characters which matched. If the regex doesn't match
411 /// the return is `None`.
412 ///
413 /// For more information see [Match vs
414 /// Search](index.html#match-vs-search)
415 ///
416 /// The contents of `chars` must have the same encoding that was
417 /// used to construct the regex.
418 ///
419 /// # Arguments
420 ///
421 /// * `chars` - The buffer to match against.
422 /// * `at` - The byte index in the passed buffer to start matching
423 /// * `options` - The regex match options.
424 /// * `region` - The region for return group match range info
425 ///
426 /// # Returns
427 ///
428 /// `Some(len)` if the regex matched, with `len` being the number
429 /// of bytes matched. `None` if the regex doesn't match.
430 ///
431 /// # Examples
432 ///
433 /// ```
434 /// use onig::{Regex, EncodedBytes, SearchOptions};
435 ///
436 /// let r = Regex::with_encoding(EncodedBytes::ascii(b".*")).unwrap();
437 /// let res = r.match_with_encoding(EncodedBytes::ascii(b"world"),
438 /// 0, SearchOptions::SEARCH_OPTION_NONE, None);
439 /// assert!(res.is_some()); // it matches
440 /// assert!(res.unwrap() == 5); // 5 characters matched
441 /// ```
442 pub fn match_with_encoding<T>(
443 &self,
444 chars: T,
445 at: usize,
446 options: SearchOptions,
447 region: Option<&mut Region>,
448 ) -> Option<usize>
449 where
450 T: EncodedChars,
451 {
452 let match_param = MatchParam::default();
453 let result = self.match_with_param(chars, at, options, region, match_param);
454
455 match result {
456 Ok(r) => r,
457 Err(e) => panic!("Onig: Regex match error: {}", e.description()),
458 }
459 }
460
461 /// Match string with encoding and match param
462 ///
463 /// Match the regex against a string. This method will start at
464 /// the offset `at` into the string and try and match the
465 /// regex. If the regex matches then the return value is the
466 /// number of characters which matched. If the regex doesn't match
467 /// the return is `None`.
468 ///
469 /// For more information see [Match vs
470 /// Search](index.html#match-vs-search)
471 ///
472 /// The contents of `chars` must have the same encoding that was
473 /// used to construct the regex.
474 ///
475 /// # Arguments
476 ///
477 /// * `chars` - The buffer to match against.
478 /// * `at` - The byte index in the passed buffer to start matching
479 /// * `options` - The regex match options.
480 /// * `region` - The region for return group match range info
481 /// * `match_param` - The match parameters
482 ///
483 /// # Returns
484 ///
485 /// `Ok(Some(len))` if the regex matched, with `len` being the number
486 /// of bytes matched. `Ok(None)` if the regex doesn't match. `Err` with an
487 /// `Error` if an error occurred (e.g. retry-limit-in-match exceeded).
488 ///
489 /// # Examples
490 ///
491 /// ```
492 /// use onig::{Regex, EncodedBytes, MatchParam, SearchOptions};
493 ///
494 /// let r = Regex::with_encoding(EncodedBytes::ascii(b".*")).unwrap();
495 /// let res = r.match_with_param(EncodedBytes::ascii(b"world"),
496 /// 0, SearchOptions::SEARCH_OPTION_NONE,
497 /// None, MatchParam::default());
498 /// assert!(res.is_ok()); // matching did not error
499 /// assert!(res.unwrap() == Some(5)); // 5 characters matched
500 /// ```
501 pub fn match_with_param<T>(
502 &self,
503 chars: T,
504 at: usize,
505 options: SearchOptions,
506 region: Option<&mut Region>,
507 match_param: MatchParam,
508 ) -> Result<Option<usize>, Error>
509 where
510 T: EncodedChars,
511 {
512 if chars.encoding() != self.encoding() {
513 return Err(Error::custom(format!(
514 "Regex encoding does not match haystack encoding ({0:?}, {1:?})",
515 chars.encoding(),
516 self.encoding()
517 )));
518 }
519 let r = unsafe {
520 let offset = chars.start_ptr().add(at);
521 if offset > chars.limit_ptr() {
522 return Err(Error::custom(format!("Offset {} is too large", at)));
523 }
524 onig_sys::onig_match_with_param(
525 self.raw,
526 chars.start_ptr(),
527 chars.limit_ptr(),
528 offset,
529 match region {
530 Some(region) => region as *mut Region as *mut onig_sys::OnigRegion,
531 None => std::ptr::null_mut(),
532 },
533 options.bits(),
534 match_param.as_raw(),
535 )
536 };
537
538 if r >= 0 {
539 Ok(Some(r as usize))
540 } else if r == onig_sys::ONIG_MISMATCH {
541 Ok(None)
542 } else {
543 Err(Error::from_code(r))
544 }
545 }
546
547 /// Search pattern in string
548 ///
549 /// Search for matches the regex in a string. This method will return the
550 /// index of the first match of the regex within the string, if
551 /// there is one. If `from` is less than `to`, then search is performed
552 /// in forward order, otherwise – in backward order.
553 ///
554 /// For more information see [Match vs
555 /// Search](index.html#match-vs-search)
556 ///
557 /// # Arguments
558 ///
559 /// * `str` - The string to search in.
560 /// * `from` - The byte index in the passed slice to start search
561 /// * `to` - The byte index in the passed slice to finish search
562 /// * `options` - The options for the search.
563 /// * `region` - The region for return group match range info
564 ///
565 /// # Returns
566 ///
567 /// `Some(pos)` if the regex matches, where `pos` is the
568 /// byte-position of the start of the match. `None` if the regex
569 /// doesn't match anywhere in `str`.
570 ///
571 /// # Examples
572 ///
573 /// ```
574 /// use onig::{Regex, SearchOptions};
575 ///
576 /// let r = Regex::new("l{1,2}").unwrap();
577 /// let res = r.search_with_options("hello", 0, 5, SearchOptions::SEARCH_OPTION_NONE, None);
578 /// assert!(res.is_some()); // it matches
579 /// assert!(res.unwrap() == 2); // match starts at character 3
580 /// ```
581 pub fn search_with_options(
582 &self,
583 str: &str,
584 from: usize,
585 to: usize,
586 options: SearchOptions,
587 region: Option<&mut Region>,
588 ) -> Option<usize> {
589 self.search_with_encoding(str, from, to, options, region)
590 }
591
592 /// Search for a Pattern in a String with an Encoding
593 ///
594 /// Search for matches the regex in a string. This method will
595 /// return the index of the first match of the regex within the
596 /// string, if there is one. If `from` is less than `to`, then
597 /// search is performed in forward order, otherwise – in backward
598 /// order.
599 ///
600 /// For more information see [Match vs
601 /// Search](index.html#match-vs-search)
602 ///
603 /// The encoding of the buffer passed to search in must match the
604 /// encoding of the regex.
605 ///
606 /// # Arguments
607 ///
608 /// * `chars` - The character buffer to search in.
609 /// * `from` - The byte index in the passed slice to start search
610 /// * `to` - The byte index in the passed slice to finish search
611 /// * `options` - The options for the search.
612 /// * `region` - The region for return group match range info
613 ///
614 /// # Returns
615 ///
616 /// `Some(pos)` if the regex matches, where `pos` is the
617 /// byte-position of the start of the match. `None` if the regex
618 /// doesn't match anywhere in `chars`.
619 ///
620 /// # Examples
621 ///
622 /// ```
623 /// use onig::{Regex, EncodedBytes, SearchOptions};
624 ///
625 /// let r = Regex::with_encoding(EncodedBytes::ascii(b"l{1,2}")).unwrap();
626 /// let res = r.search_with_encoding(EncodedBytes::ascii(b"hello"),
627 /// 0, 5, SearchOptions::SEARCH_OPTION_NONE, None);
628 /// assert!(res.is_some()); // it matches
629 /// assert!(res.unwrap() == 2); // match starts at character 3
630 /// ```
631 pub fn search_with_encoding<T>(
632 &self,
633 chars: T,
634 from: usize,
635 to: usize,
636 options: SearchOptions,
637 region: Option<&mut Region>,
638 ) -> Option<usize>
639 where
640 T: EncodedChars,
641 {
642 let match_param = MatchParam::default();
643 let result = self.search_with_param(chars, from, to, options, region, match_param);
644
645 match result {
646 Ok(r) => r,
647 Err(e) => panic!("Onig: Regex search error: {}", e.description()),
648 }
649 }
650
651 /// Search pattern in string with encoding and match param
652 ///
653 /// Search for matches the regex in a string. This method will
654 /// return the index of the first match of the regex within the
655 /// string, if there is one. If `from` is less than `to`, then
656 /// search is performed in forward order, otherwise – in backward
657 /// order.
658 ///
659 /// For more information see [Match vs
660 /// Search](index.html#match-vs-search)
661 ///
662 /// The encoding of the buffer passed to search in must match the
663 /// encoding of the regex.
664 ///
665 /// # Arguments
666 ///
667 /// * `chars` - The character buffer to search in.
668 /// * `from` - The byte index in the passed slice to start search
669 /// * `to` - The byte index in the passed slice to finish search
670 /// * `options` - The options for the search.
671 /// * `region` - The region for return group match range info
672 /// * `match_param` - The match parameters
673 ///
674 /// # Returns
675 ///
676 /// `Ok(Some(pos))` if the regex matches, where `pos` is the
677 /// byte-position of the start of the match. `Ok(None)` if the regex
678 /// doesn't match anywhere in `chars`. `Err` with an `Error` if an error
679 /// occurred (e.g. retry-limit-in-match exceeded).
680 ///
681 /// # Examples
682 ///
683 /// ```
684 /// use onig::{Regex, EncodedBytes, MatchParam, SearchOptions};
685 ///
686 /// let r = Regex::with_encoding(EncodedBytes::ascii(b"l{1,2}")).unwrap();
687 /// let res = r.search_with_param(EncodedBytes::ascii(b"hello"),
688 /// 0, 5, SearchOptions::SEARCH_OPTION_NONE,
689 /// None, MatchParam::default());
690 /// assert!(res.is_ok()); // matching did not error
691 /// assert!(res.unwrap() == Some(2)); // match starts at character 3
692 /// ```
693 pub fn search_with_param<T>(
694 &self,
695 chars: T,
696 from: usize,
697 to: usize,
698 options: SearchOptions,
699 region: Option<&mut Region>,
700 match_param: MatchParam,
701 ) -> Result<Option<usize>, Error>
702 where
703 T: EncodedChars,
704 {
705 let (beg, end) = (chars.start_ptr(), chars.limit_ptr());
706 if chars.encoding() != self.encoding() {
707 return Err(Error::custom(format!(
708 "Regex encoding does not match haystack encoding ({0:?}, {1:?})",
709 chars.encoding(),
710 self.encoding()
711 )));
712 }
713 let r = unsafe {
714 let start = beg.add(from);
715 let range = beg.add(to);
716 if start > end {
717 return Err(Error::custom("Start of match should be before end"));
718 }
719 if range > end {
720 return Err(Error::custom("Limit of match should be before end"));
721 }
722 onig_sys::onig_search_with_param(
723 self.raw,
724 beg,
725 end,
726 start,
727 range,
728 match region {
729 Some(region) => region as *mut Region as *mut onig_sys::OnigRegion,
730 None => std::ptr::null_mut(),
731 },
732 options.bits(),
733 match_param.as_raw(),
734 )
735 };
736
737 if r >= 0 {
738 Ok(Some(r as usize))
739 } else if r == onig_sys::ONIG_MISMATCH {
740 Ok(None)
741 } else {
742 Err(Error::from_code(r))
743 }
744 }
745
746 /// Returns true if and only if the regex matches the string given.
747 ///
748 /// For more information see [Match vs
749 /// Search](index.html#match-vs-search)
750 ///
751 /// # Arguments
752 /// * `text` - The string slice to test against the pattern.
753 ///
754 /// # Returns
755 ///
756 /// `true` if the pattern matches the whole of `text`, `false` otherwise.
757 pub fn is_match(&self, text: &str) -> bool {
758 self.match_with_options(text, 0, SearchOptions::SEARCH_OPTION_WHOLE_STRING, None)
759 .map(|r| r == text.len())
760 .unwrap_or(false)
761 }
762
763 /// Find a Match in a Buffer, With Encoding
764 ///
765 /// Finds the first match of the regular expression within the
766 /// buffer.
767 ///
768 /// Note that this should only be used if you want to discover the
769 /// position of the match within a string. Testing if a pattern
770 /// matches the whole string is faster if you use `is_match`. For
771 /// more information see [Match vs
772 /// Search](index.html#match-vs-search)
773 ///
774 /// # Arguments
775 /// * `text` - The text to search in.
776 ///
777 /// # Returns
778 ///
779 /// The offset of the start and end of the first match. If no
780 /// match exists `None` is returned.
781 pub fn find(&self, text: &str) -> Option<(usize, usize)> {
782 self.find_with_encoding(text)
783 }
784
785 /// Find a Match in a Buffer, With Encoding
786 ///
787 /// Finds the first match of the regular expression within the
788 /// buffer.
789 ///
790 /// For more information see [Match vs
791 /// Search](index.html#match-vs-search)
792 ///
793 /// # Arguments
794 /// * `text` - The text to search in.
795 ///
796 /// # Returns
797 ///
798 /// The offset of the start and end of the first match. If no
799 /// match exists `None` is returned.
800 pub fn find_with_encoding<T>(&self, text: T) -> Option<(usize, usize)>
801 where
802 T: EncodedChars,
803 {
804 let mut region = Region::new();
805 let len = text.len();
806 self.search_with_encoding(
807 text,
808 0,
809 len,
810 SearchOptions::SEARCH_OPTION_NONE,
811 Some(&mut region),
812 )
813 .and_then(|_| region.pos(0))
814 }
815
816 /// Get the Encoding of the Regex
817 ///
818 /// # Returns
819 ///
820 /// Returns a reference to an oniguruma encoding which was used
821 /// when this regex was created.
822 pub fn encoding(&self) -> onig_sys::OnigEncoding {
823 unsafe { onig_sys::onig_get_encoding(self.raw) }
824 }
825
826 /// Get the Number of Capture Groups in this Pattern
827 pub fn captures_len(&self) -> usize {
828 unsafe { onig_sys::onig_number_of_captures(self.raw) as usize }
829 }
830
831 /// Get the Size of the Capture Histories for this Pattern
832 pub fn capture_histories_len(&self) -> usize {
833 unsafe { onig_sys::onig_number_of_capture_histories(self.raw) as usize }
834 }
835}
836
837impl Drop for Regex {
838 fn drop(&mut self) {
839 unsafe {
840 onig_sys::onig_free(self.raw);
841 }
842 }
843}
844
845#[cfg(test)]
846mod tests {
847 use super::*;
848 use std::panic;
849
850 #[test]
851 fn test_regex_create() {
852 Regex::with_options(".*", RegexOptions::REGEX_OPTION_NONE, Syntax::default()).unwrap();
853
854 Regex::new(r#"a \w+ word"#).unwrap();
855 }
856
857 #[test]
858 fn test_regex_invalid() {
859 let e = Regex::new("\\p{foo}").unwrap_err();
860 assert_eq!(e.code(), -223);
861 assert_eq!(e.description(), "invalid character property name {foo}");
862 }
863
864 #[test]
865 fn test_failed_match() {
866 let regex = Regex::new("foo").unwrap();
867 let res = regex.match_with_options("bar", 0, SearchOptions::SEARCH_OPTION_NONE, None);
868 assert!(res.is_none());
869 }
870
871 #[test]
872 fn test_regex_search_with_options() {
873 let mut region = Region::new();
874 let regex = Regex::new("e(l+)").unwrap();
875
876 let r = regex.search_with_options(
877 "hello",
878 0,
879 5,
880 SearchOptions::SEARCH_OPTION_NONE,
881 Some(&mut region),
882 );
883
884 assert!(region.tree().is_none());
885 assert_eq!(r, Some(1));
886 assert_eq!(region.len(), 2);
887 let pos1 = region.pos(0).unwrap();
888 let pos2 = region.pos(1).unwrap();
889 assert_eq!(pos1, (1, 4));
890 assert_eq!(pos2, (2, 4));
891
892 // test cloning here since we already have a filled region
893 let cloned_region = region.clone();
894 let pos1_clone = cloned_region.pos(0).unwrap();
895 assert_eq!(pos1_clone, pos1);
896 }
897
898 #[test]
899 fn test_regex_match_with_options() {
900 let mut region = Region::new();
901 let regex = Regex::new("he(l+)").unwrap();
902
903 let r = regex.match_with_options(
904 "hello",
905 0,
906 SearchOptions::SEARCH_OPTION_NONE,
907 Some(&mut region),
908 );
909
910 assert!(region.tree().is_none());
911 assert_eq!(r, Some(4));
912 assert_eq!(region.len(), 2);
913 let pos1 = region.pos(0).unwrap();
914 let pos2 = region.pos(1).unwrap();
915 assert_eq!(pos1, (0, 4));
916 assert_eq!(pos2, (2, 4));
917 }
918
919 #[test]
920 fn test_regex_is_match() {
921 let regex = Regex::new("he(l+)o").unwrap();
922 assert!(regex.is_match("hello"));
923 assert!(!regex.is_match("hello 2.0"));
924 }
925
926 #[test]
927 fn test_is_match_chooses_longest_alternation() {
928 let regex = Regex::new("Greater|GreaterOrEqual").unwrap();
929 assert!(regex.is_match("Greater"));
930 assert!(regex.is_match("GreaterOrEqual"));
931 }
932
933 #[test]
934 fn test_regex_find() {
935 let regex = Regex::new("he(l+)o").unwrap();
936 assert_eq!(regex.find("hey, hello!"), Some((5, 10)));
937 assert_eq!(regex.find("hey, honey!"), None);
938 }
939
940 #[test]
941 fn test_regex_captures_len() {
942 let regex = Regex::new("(he)(l+)(o)").unwrap();
943 assert_eq!(regex.captures_len(), 3);
944 }
945
946 #[test]
947 fn test_regex_error_is_match() {
948 let regex = Regex::new("(a|b|ab)*bc").unwrap();
949 let result = regex.match_with_param(
950 "ababababababababababababababababababababababababababababacbc",
951 0,
952 SearchOptions::SEARCH_OPTION_NONE,
953 None,
954 MatchParam::default(),
955 );
956
957 let e = result.err().unwrap();
958 assert_eq!("retry-limit-in-match over", e.description());
959 }
960
961 #[test]
962 fn test_regex_panic_is_match() {
963 let regex = Regex::new("(a|b|ab)*bc").unwrap();
964 let result = panic::catch_unwind(|| {
965 regex.is_match("ababababababababababababababababababababababababababababacbc")
966 });
967 let e = result.err().unwrap();
968 let message = e.downcast_ref::<String>().unwrap();
969 assert_eq!(
970 message.as_str(),
971 "Onig: Regex match error: retry-limit-in-match over"
972 );
973 }
974
975 #[test]
976 fn test_regex_error_find() {
977 let regex = Regex::new("(a|b|ab)*bc").unwrap();
978 let s = "ababababababababababababababababababababababababababababacbc";
979 let result = regex.search_with_param(
980 s,
981 0,
982 s.len(),
983 SearchOptions::SEARCH_OPTION_NONE,
984 None,
985 MatchParam::default(),
986 );
987
988 let e = result.err().unwrap();
989 assert_eq!("retry-limit-in-match over", e.description());
990 }
991
992 #[test]
993 fn test_regex_panic_find() {
994 let regex = Regex::new("(a|b|ab)*bc").unwrap();
995 let result = panic::catch_unwind(|| {
996 regex.find("ababababababababababababababababababababababababababababacbc")
997 });
998 let e = result.err().unwrap();
999 let message = e.downcast_ref::<String>().unwrap();
1000 assert_eq!(
1001 message.as_str(),
1002 "Onig: Regex search error: retry-limit-in-match over"
1003 );
1004 }
1005
1006 #[test]
1007 fn test_search_with_invalid_range() {
1008 let regex = Regex::with_options("R...", RegexOptions::REGEX_OPTION_NONE, Syntax::default())
1009 .expect("regex");
1010 let string = "Ruby";
1011 let is_match = regex.search_with_param(
1012 string,
1013 5,
1014 string.len(),
1015 SearchOptions::SEARCH_OPTION_NONE,
1016 None,
1017 MatchParam::default(),
1018 );
1019 assert!(is_match.is_err());
1020
1021 let is_match = regex.search_with_param(
1022 string,
1023 2,
1024 string.len() + 1,
1025 SearchOptions::SEARCH_OPTION_NONE,
1026 None,
1027 MatchParam::default(),
1028 );
1029 assert!(is_match.is_err());
1030 }
1031
1032 #[test]
1033 fn test_search_with_invalid_range_panic() {
1034 let regex = Regex::with_options("R...", RegexOptions::REGEX_OPTION_NONE, Syntax::default())
1035 .expect("regex");
1036 let string = "Ruby";
1037 let is_match = panic::catch_unwind(|| {
1038 regex.search_with_encoding(
1039 string,
1040 5,
1041 string.len(),
1042 SearchOptions::SEARCH_OPTION_NONE,
1043 None,
1044 )
1045 });
1046 assert!(is_match.is_err());
1047 }
1048
1049 #[test]
1050 fn test_match_with_invalid_range() {
1051 let regex = Regex::with_options("R...", RegexOptions::REGEX_OPTION_NONE, Syntax::default())
1052 .expect("regex");
1053 let string = "Ruby";
1054 let is_match = regex.match_with_param(
1055 string,
1056 5,
1057 SearchOptions::SEARCH_OPTION_NONE,
1058 None,
1059 MatchParam::default(),
1060 );
1061 assert!(is_match.is_err());
1062 }
1063
1064 #[test]
1065 fn test_match_with_invalid_range_panic() {
1066 let regex = Regex::with_options("R...", RegexOptions::REGEX_OPTION_NONE, Syntax::default())
1067 .expect("regex");
1068 let string = "Ruby";
1069 let is_match = panic::catch_unwind(|| {
1070 regex.match_with_encoding(string, 5, SearchOptions::SEARCH_OPTION_NONE, None)
1071 });
1072 assert!(is_match.is_err());
1073 }
1074}