onig/
lib.rs

1//! This crate provides a safe wrapper around the
2//! [Oniguruma](https://github.com/kkos/oniguruma) regular expression library.
3//!
4//! # Examples
5//!
6//! ```rust
7//! use onig::Regex;
8//!
9//! let regex = Regex::new("e(l+)").unwrap();
10//! for (i, pos) in regex.captures("hello").unwrap().iter_pos().enumerate() {
11//!     match pos {
12//!          Some((beg, end)) =>
13//!              println!("Group {} captured in position {}:{}", i, beg, end),
14//!          None =>
15//!              println!("Group {} is not captured", i)
16//!     }
17//! }
18//! ```
19//!
20//! # Match vs Search
21//!
22//! There are two basic things you can do with a `Regex` pattern; test
23//! if the pattern matches the whole of a given string, and search for
24//! occurences of the pattern within a string. Oniguruma exposes these
25//! two concepts with the *match* and *search* APIs.
26//!
27//! In addition two these two base Onigurma APIs this crate exposes a
28//! third *find* API, built on top of the *search* API.
29//!
30//! ```
31//! # use onig::Regex;
32//! let pattern = Regex::new("hello").unwrap();
33//! assert_eq!(true, pattern.find("hello world").is_some());
34//! assert_eq!(false, pattern.is_match("hello world"));
35//! ```
36//!
37//! ## The *Match* API
38//!
39//! Functions in the match API check if a pattern matches the entire
40//! string. The simplest of these is `Regex::is_match`. This retuns a
41//! `true` if the pattern matches the string. For more complex useage
42//! then `Regex::match_with_options` and `Regex::match_with_encoding`
43//! can be used. These allow the capture groups to be inspected,
44//! matching with different options, and matching sub-sections of a
45//! given text.
46//!
47//! ## The *Search* API
48//!
49//! Function in the search API search for a pattern anywhere within a
50//! string. The simplist of these is `Regex::find`. This returns the
51//! offset of the first occurence of the pattern within the string.
52//! For more complex useage `Regex::search_with_options` and
53//! `Regex::search_with_encoding` can be used. These allow capture
54//! groups to be inspected, searching with different options and
55//! searching within subsections of a given text.
56//!
57//! ## The *Find* API
58//!
59//! The find API is built on top of the search API. Functions in this
60//! API allow iteration across all matches of the pattern within a
61//! string, not just the first one. The functions deal with some of
62//! the complexities of this, such as zero-length matches.
63//!
64//! The simplest step-up from the basic search API `Regex::find` is
65//! getting the captures relating to a match with the
66//! `Regex::captures` method. To find capture information for all
67//! matches within a string `Regex::find_iter` and
68//! `Regex::captures_iter` can be used. The former exposes the start
69//! and end of the match as `Regex::find` does, the latter exposes the
70//! whole capture group information as `Regex::captures` does.
71//!
72//! # The `std::pattern` API
73//!
74//! In addition to the main Oniguruma API it is possible to use the
75//! `Regex` object with the
76//! [`std::pattern`](https://doc.rust-lang.org/std/str/pattern/)
77//! API. To enable support compile with the `std-pattern` feature. If
78//! you're using Cargo you can do this by adding the following to your
79//! Cargo.toml:
80//!
81//! ```toml
82//! [dependencies.onig]
83//! version = "1.2"
84//! features = ["std-pattern"]
85//! ```
86
87#![cfg_attr(feature = "std-pattern", feature(pattern))]
88#![deny(missing_docs)]
89
90use once_cell::sync::Lazy;
91
92mod buffers;
93mod find;
94mod flags;
95mod match_param;
96mod names;
97mod region;
98mod replace;
99mod syntax;
100mod tree;
101mod utils;
102
103#[cfg(feature = "std-pattern")]
104mod pattern;
105
106// re-export the onig types publically
107pub use crate::buffers::{EncodedBytes, EncodedChars};
108pub use crate::find::{
109    Captures, FindCaptures, FindMatches, RegexSplits, RegexSplitsN, SubCaptures, SubCapturesPos,
110};
111pub use crate::flags::*;
112pub use crate::match_param::MatchParam;
113pub use crate::region::Region;
114pub use crate::replace::Replacer;
115pub use crate::syntax::{MetaChar, Syntax};
116pub use crate::tree::{CaptureTreeNode, CaptureTreeNodeIter};
117pub use crate::utils::{copyright, define_user_property, version};
118
119use std::os::raw::c_int;
120use std::ptr::{null, null_mut};
121use std::sync::Mutex;
122use std::{error, fmt, str};
123
124#[derive(Debug)]
125enum ErrorData {
126    OnigError(c_int),
127    Custom,
128}
129
130/// This struture represents an error from the underlying Oniguruma libray.
131pub struct Error {
132    data: ErrorData,
133    description: String,
134}
135
136/// This struct is a wrapper around an Oniguruma regular expression
137/// pointer. This represents a compiled regex which can be used in
138/// search and match operations.
139#[derive(Debug, Eq, PartialEq)]
140pub struct Regex {
141    raw: onig_sys::OnigRegex,
142}
143
144unsafe impl Send for Regex {}
145unsafe impl Sync for Regex {}
146
147impl Error {
148    fn from_code_and_info(code: c_int, info: &onig_sys::OnigErrorInfo) -> Self {
149        Error::new(code, info)
150    }
151
152    fn from_code(code: c_int) -> Self {
153        Error::new(code, null())
154    }
155
156    fn custom<T: Into<String>>(message: T) -> Self {
157        Error {
158            data: ErrorData::Custom,
159            description: message.into(),
160        }
161    }
162
163    fn new(code: c_int, info: *const onig_sys::OnigErrorInfo) -> Self {
164        let buff = &mut [0; onig_sys::ONIG_MAX_ERROR_MESSAGE_LEN as usize];
165        let len = unsafe { onig_sys::onig_error_code_to_str(buff.as_mut_ptr(), code, info) };
166        let description = if let Ok(description) = str::from_utf8(&buff[..len as usize]) {
167            description
168        } else {
169            return Self::custom("Onig error string was invalid UTF-8");
170        };
171        Error {
172            data: ErrorData::OnigError(code),
173            description: description.to_owned(),
174        }
175    }
176
177    /// Return Oniguruma engine error code.
178    pub fn code(&self) -> i32 {
179        match self.data {
180            ErrorData::OnigError(code) => code,
181            _ => -1,
182        }
183    }
184
185    /// Return error description provided by Oniguruma engine.
186    pub fn description(&self) -> &str {
187        &self.description
188    }
189}
190
191impl error::Error for Error {
192    fn description(&self) -> &str {
193        &self.description
194    }
195}
196
197impl fmt::Display for Error {
198    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
199        write!(f, "Oniguruma error: {}", self.description())
200    }
201}
202
203impl fmt::Debug for Error {
204    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
205        write!(f, "Error({:?}, {})", self.data, self.description())
206    }
207}
208
209static REGEX_NEW_MUTEX: Lazy<Mutex<()>> = Lazy::new(|| Mutex::new(()));
210
211impl Regex {
212    /// Create a Regex
213    ///
214    /// Simple regular expression constructor. Compiles a new regular
215    /// expression with the default options using the ruby syntax.
216    /// Once compiled, it can be used repeatedly to search in a string. If an
217    /// invalid expression is given, then an error is returned.
218    ///
219    /// # Arguments
220    ///
221    /// * `pattern` - The regex pattern to compile
222    ///
223    /// # Examples
224    ///
225    /// ```
226    /// use onig::Regex;
227    /// let r = Regex::new(r#"hello (\w+)"#);
228    /// assert!(r.is_ok());
229    /// ```
230    pub fn new(pattern: &str) -> Result<Self, Error> {
231        Regex::with_encoding(pattern)
232    }
233
234    /// Create a Regex, Specifying an Encoding
235    ///
236    /// Attempts to compile `pattern` into a new `Regex`
237    /// instance. Instead of assuming UTF-8 as the encoding scheme the
238    /// encoding is inferred from the `pattern` buffer.
239    ///
240    /// # Arguments
241    ///
242    /// * `pattern` - The regex pattern to compile
243    ///
244    /// # Examples
245    ///
246    /// ```
247    /// use onig::{Regex, EncodedBytes};
248    /// let utf8 = Regex::with_encoding("hello");
249    /// assert!(utf8.is_ok());
250    /// let ascii = Regex::with_encoding(EncodedBytes::ascii(b"world"));
251    /// assert!(ascii.is_ok());
252    /// ```
253    pub fn with_encoding<T>(pattern: T) -> Result<Regex, Error>
254    where
255        T: EncodedChars,
256    {
257        Regex::with_options_and_encoding(
258            pattern,
259            RegexOptions::REGEX_OPTION_NONE,
260            Syntax::default(),
261        )
262    }
263
264    /// Create a new Regex
265    ///
266    /// Attempts to compile a pattern into a new `Regex` instance.
267    /// Once compiled, it can be used repeatedly to search in a string. If an
268    /// invalid expression is given, then an error is returned.
269    /// See [`onig_sys::onig_new`][regex_new] for more information.
270    ///
271    /// # Arguments
272    ///
273    ///  * `pattern` - The regex pattern to compile.
274    ///  * `options` - The regex compilation options.
275    ///  * `syntax`  - The syntax which the regex is written in.
276    ///
277    /// # Examples
278    ///
279    /// ```
280    /// use onig::{Regex, Syntax, RegexOptions};
281    /// let r = Regex::with_options("hello.*world",
282    ///                             RegexOptions::REGEX_OPTION_NONE,
283    ///                             Syntax::default());
284    /// assert!(r.is_ok());
285    /// ```
286    ///
287    /// [regex_new]: ./onig_sys/fn.onig_new.html
288    pub fn with_options(
289        pattern: &str,
290        option: RegexOptions,
291        syntax: &Syntax,
292    ) -> Result<Regex, Error> {
293        Regex::with_options_and_encoding(pattern, option, syntax)
294    }
295
296    /// Create a new Regex, Specifying Options and Ecoding
297    ///
298    /// Attempts to comile the given `pattern` into a new `Regex`
299    /// instance. Instead of assuming UTF-8 as the encoding scheme the
300    /// encoding is inferred from the `pattern` buffer. If the regex
301    /// fails to compile the returned `Error` value from
302    /// [`onig_new`][regex_new] contains more information.
303    ///
304    /// [regex_new]: ./onig_sys/fn.onig_new.html
305    ///
306    /// # Arguments
307    ///
308    ///  * `pattern` - The regex pattern to compile.
309    ///  * `options` - The regex compilation options.
310    ///  * `syntax`  - The syntax which the regex is written in.
311    ///
312    /// # Examples
313    /// ```
314    /// use onig::{Regex, Syntax, EncodedBytes, RegexOptions};
315    /// let pattern = EncodedBytes::ascii(b"hello");
316    /// let r = Regex::with_options_and_encoding(pattern,
317    ///                                          RegexOptions::REGEX_OPTION_SINGLELINE,
318    ///                                          Syntax::default());
319    /// assert!(r.is_ok());
320    /// ```
321    pub fn with_options_and_encoding<T>(
322        pattern: T,
323        option: RegexOptions,
324        syntax: &Syntax,
325    ) -> Result<Self, Error>
326    where
327        T: EncodedChars,
328    {
329        // Convert the rust types to those required for the call to
330        // `onig_new`.
331        let mut reg: onig_sys::OnigRegex = null_mut();
332        let reg_ptr = &mut reg as *mut onig_sys::OnigRegex;
333
334        // We can use this later to get an error message to pass back
335        // if regex creation fails.
336        let mut error = onig_sys::OnigErrorInfo {
337            enc: null_mut(),
338            par: null_mut(),
339            par_end: null_mut(),
340        };
341
342        let err = unsafe {
343            // Grab a lock to make sure that `onig_new` isn't called by
344            // more than one thread at a time.
345            let _guard = REGEX_NEW_MUTEX.lock().unwrap();
346            onig_sys::onig_new(
347                reg_ptr,
348                pattern.start_ptr(),
349                pattern.limit_ptr(),
350                option.bits(),
351                pattern.encoding(),
352                syntax as *const Syntax as *mut Syntax as *mut onig_sys::OnigSyntaxType,
353                &mut error,
354            )
355        };
356
357        if err == onig_sys::ONIG_NORMAL as i32 {
358            Ok(Regex { raw: reg })
359        } else {
360            Err(Error::from_code_and_info(err, &error))
361        }
362    }
363
364    /// Match String
365    ///
366    /// Try to match the regex against the given string slice,
367    /// starting at a given offset. This method works the same way as
368    /// `match_with_encoding`, but the encoding is always utf-8.
369    ///
370    /// For more information see [Match vs
371    /// Search](index.html#match-vs-search)
372    ///
373    /// # Arguments
374    ///
375    /// * `str` - The string slice to match against.
376    /// * `at` - The byte index in the passed slice to start matching
377    /// * `options` - The regex match options.
378    /// * `region` - The region for return group match range info
379    ///
380    /// # Returns
381    ///
382    /// `Some(len)` if the regex matched, with `len` being the number
383    /// of bytes matched. `None` if the regex doesn't match.
384    ///
385    /// # Examples
386    ///
387    /// ```
388    /// use onig::{Regex, SearchOptions};
389    ///
390    /// let r = Regex::new(".*").unwrap();
391    /// let res = r.match_with_options("hello", 0, SearchOptions::SEARCH_OPTION_NONE, None);
392    /// assert!(res.is_some()); // it matches
393    /// assert!(res.unwrap() == 5); // 5 characters matched
394    /// ```
395    pub fn match_with_options(
396        &self,
397        str: &str,
398        at: usize,
399        options: SearchOptions,
400        region: Option<&mut Region>,
401    ) -> Option<usize> {
402        self.match_with_encoding(str, at, options, region)
403    }
404
405    /// Match String with Encoding
406    ///
407    /// Match the regex against a string. This method will start at
408    /// the offset `at` into the string and try and match the
409    /// regex. If the regex matches then the return value is the
410    /// number of characters which matched. If the regex doesn't match
411    /// the return is `None`.
412    ///
413    /// For more information see [Match vs
414    /// Search](index.html#match-vs-search)
415    ///
416    /// The contents of `chars` must have the same encoding that was
417    /// used to construct the regex.
418    ///
419    /// # Arguments
420    ///
421    /// * `chars` - The buffer to match against.
422    /// * `at` - The byte index in the passed buffer to start matching
423    /// * `options` - The regex match options.
424    /// * `region` - The region for return group match range info
425    ///
426    /// # Returns
427    ///
428    /// `Some(len)` if the regex matched, with `len` being the number
429    /// of bytes matched. `None` if the regex doesn't match.
430    ///
431    /// # Examples
432    ///
433    /// ```
434    /// use onig::{Regex, EncodedBytes, SearchOptions};
435    ///
436    /// let r = Regex::with_encoding(EncodedBytes::ascii(b".*")).unwrap();
437    /// let res = r.match_with_encoding(EncodedBytes::ascii(b"world"),
438    ///                                 0, SearchOptions::SEARCH_OPTION_NONE, None);
439    /// assert!(res.is_some()); // it matches
440    /// assert!(res.unwrap() == 5); // 5 characters matched
441    /// ```
442    pub fn match_with_encoding<T>(
443        &self,
444        chars: T,
445        at: usize,
446        options: SearchOptions,
447        region: Option<&mut Region>,
448    ) -> Option<usize>
449    where
450        T: EncodedChars,
451    {
452        let match_param = MatchParam::default();
453        let result = self.match_with_param(chars, at, options, region, match_param);
454
455        match result {
456            Ok(r) => r,
457            Err(e) => panic!("Onig: Regex match error: {}", e.description()),
458        }
459    }
460
461    /// Match string with encoding and match param
462    ///
463    /// Match the regex against a string. This method will start at
464    /// the offset `at` into the string and try and match the
465    /// regex. If the regex matches then the return value is the
466    /// number of characters which matched. If the regex doesn't match
467    /// the return is `None`.
468    ///
469    /// For more information see [Match vs
470    /// Search](index.html#match-vs-search)
471    ///
472    /// The contents of `chars` must have the same encoding that was
473    /// used to construct the regex.
474    ///
475    /// # Arguments
476    ///
477    /// * `chars` - The buffer to match against.
478    /// * `at` - The byte index in the passed buffer to start matching
479    /// * `options` - The regex match options.
480    /// * `region` - The region for return group match range info
481    /// * `match_param` - The match parameters
482    ///
483    /// # Returns
484    ///
485    /// `Ok(Some(len))` if the regex matched, with `len` being the number
486    /// of bytes matched. `Ok(None)` if the regex doesn't match. `Err` with an
487    /// `Error` if an error occurred (e.g. retry-limit-in-match exceeded).
488    ///
489    /// # Examples
490    ///
491    /// ```
492    /// use onig::{Regex, EncodedBytes, MatchParam, SearchOptions};
493    ///
494    /// let r = Regex::with_encoding(EncodedBytes::ascii(b".*")).unwrap();
495    /// let res = r.match_with_param(EncodedBytes::ascii(b"world"),
496    ///                              0, SearchOptions::SEARCH_OPTION_NONE,
497    ///                              None, MatchParam::default());
498    /// assert!(res.is_ok()); // matching did not error
499    /// assert!(res.unwrap() == Some(5)); // 5 characters matched
500    /// ```
501    pub fn match_with_param<T>(
502        &self,
503        chars: T,
504        at: usize,
505        options: SearchOptions,
506        region: Option<&mut Region>,
507        match_param: MatchParam,
508    ) -> Result<Option<usize>, Error>
509    where
510        T: EncodedChars,
511    {
512        if chars.encoding() != self.encoding() {
513            return Err(Error::custom(format!(
514                "Regex encoding does not match haystack encoding ({0:?}, {1:?})",
515                chars.encoding(),
516                self.encoding()
517            )));
518        }
519        let r = unsafe {
520            let offset = chars.start_ptr().add(at);
521            if offset > chars.limit_ptr() {
522                return Err(Error::custom(format!("Offset {} is too large", at)));
523            }
524            onig_sys::onig_match_with_param(
525                self.raw,
526                chars.start_ptr(),
527                chars.limit_ptr(),
528                offset,
529                match region {
530                    Some(region) => region as *mut Region as *mut onig_sys::OnigRegion,
531                    None => std::ptr::null_mut(),
532                },
533                options.bits(),
534                match_param.as_raw(),
535            )
536        };
537
538        if r >= 0 {
539            Ok(Some(r as usize))
540        } else if r == onig_sys::ONIG_MISMATCH {
541            Ok(None)
542        } else {
543            Err(Error::from_code(r))
544        }
545    }
546
547    /// Search pattern in string
548    ///
549    /// Search for matches the regex in a string. This method will return the
550    /// index of the first match of the regex within the string, if
551    /// there is one. If `from` is less than `to`, then search is performed
552    /// in forward order, otherwise – in backward order.
553    ///
554    /// For more information see [Match vs
555    /// Search](index.html#match-vs-search)
556    ///
557    /// # Arguments
558    ///
559    ///  * `str` - The string to search in.
560    ///  * `from` - The byte index in the passed slice to start search
561    ///  * `to` - The byte index in the passed slice to finish search
562    ///  * `options` - The options for the search.
563    ///  * `region` - The region for return group match range info
564    ///
565    /// # Returns
566    ///
567    /// `Some(pos)` if the regex matches, where `pos` is the
568    /// byte-position of the start of the match. `None` if the regex
569    /// doesn't match anywhere in `str`.
570    ///
571    /// # Examples
572    ///
573    /// ```
574    /// use onig::{Regex, SearchOptions};
575    ///
576    /// let r = Regex::new("l{1,2}").unwrap();
577    /// let res = r.search_with_options("hello", 0, 5, SearchOptions::SEARCH_OPTION_NONE, None);
578    /// assert!(res.is_some()); // it matches
579    /// assert!(res.unwrap() == 2); // match starts at character 3
580    /// ```
581    pub fn search_with_options(
582        &self,
583        str: &str,
584        from: usize,
585        to: usize,
586        options: SearchOptions,
587        region: Option<&mut Region>,
588    ) -> Option<usize> {
589        self.search_with_encoding(str, from, to, options, region)
590    }
591
592    /// Search for a Pattern in a String with an Encoding
593    ///
594    /// Search for matches the regex in a string. This method will
595    /// return the index of the first match of the regex within the
596    /// string, if there is one. If `from` is less than `to`, then
597    /// search is performed in forward order, otherwise – in backward
598    /// order.
599    ///
600    /// For more information see [Match vs
601    /// Search](index.html#match-vs-search)
602    ///
603    /// The encoding of the buffer passed to search in must match the
604    /// encoding of the regex.
605    ///
606    /// # Arguments
607    ///
608    ///  * `chars` - The character buffer to search in.
609    ///  * `from` - The byte index in the passed slice to start search
610    ///  * `to` - The byte index in the passed slice to finish search
611    ///  * `options` - The options for the search.
612    ///  * `region` - The region for return group match range info
613    ///
614    /// # Returns
615    ///
616    /// `Some(pos)` if the regex matches, where `pos` is the
617    /// byte-position of the start of the match. `None` if the regex
618    /// doesn't match anywhere in `chars`.
619    ///
620    /// # Examples
621    ///
622    /// ```
623    /// use onig::{Regex, EncodedBytes, SearchOptions};
624    ///
625    /// let r = Regex::with_encoding(EncodedBytes::ascii(b"l{1,2}")).unwrap();
626    /// let res = r.search_with_encoding(EncodedBytes::ascii(b"hello"),
627    ///                                  0, 5, SearchOptions::SEARCH_OPTION_NONE, None);
628    /// assert!(res.is_some()); // it matches
629    /// assert!(res.unwrap() == 2); // match starts at character 3
630    /// ```
631    pub fn search_with_encoding<T>(
632        &self,
633        chars: T,
634        from: usize,
635        to: usize,
636        options: SearchOptions,
637        region: Option<&mut Region>,
638    ) -> Option<usize>
639    where
640        T: EncodedChars,
641    {
642        let match_param = MatchParam::default();
643        let result = self.search_with_param(chars, from, to, options, region, match_param);
644
645        match result {
646            Ok(r) => r,
647            Err(e) => panic!("Onig: Regex search error: {}", e.description()),
648        }
649    }
650
651    /// Search pattern in string with encoding and match param
652    ///
653    /// Search for matches the regex in a string. This method will
654    /// return the index of the first match of the regex within the
655    /// string, if there is one. If `from` is less than `to`, then
656    /// search is performed in forward order, otherwise – in backward
657    /// order.
658    ///
659    /// For more information see [Match vs
660    /// Search](index.html#match-vs-search)
661    ///
662    /// The encoding of the buffer passed to search in must match the
663    /// encoding of the regex.
664    ///
665    /// # Arguments
666    ///
667    ///  * `chars` - The character buffer to search in.
668    ///  * `from` - The byte index in the passed slice to start search
669    ///  * `to` - The byte index in the passed slice to finish search
670    ///  * `options` - The options for the search.
671    ///  * `region` - The region for return group match range info
672    ///  * `match_param` - The match parameters
673    ///
674    /// # Returns
675    ///
676    /// `Ok(Some(pos))` if the regex matches, where `pos` is the
677    /// byte-position of the start of the match. `Ok(None)` if the regex
678    /// doesn't match anywhere in `chars`. `Err` with an `Error` if an error
679    /// occurred (e.g. retry-limit-in-match exceeded).
680    ///
681    /// # Examples
682    ///
683    /// ```
684    /// use onig::{Regex, EncodedBytes, MatchParam, SearchOptions};
685    ///
686    /// let r = Regex::with_encoding(EncodedBytes::ascii(b"l{1,2}")).unwrap();
687    /// let res = r.search_with_param(EncodedBytes::ascii(b"hello"),
688    ///                               0, 5, SearchOptions::SEARCH_OPTION_NONE,
689    ///                               None, MatchParam::default());
690    /// assert!(res.is_ok()); // matching did not error
691    /// assert!(res.unwrap() == Some(2)); // match starts at character 3
692    /// ```
693    pub fn search_with_param<T>(
694        &self,
695        chars: T,
696        from: usize,
697        to: usize,
698        options: SearchOptions,
699        region: Option<&mut Region>,
700        match_param: MatchParam,
701    ) -> Result<Option<usize>, Error>
702    where
703        T: EncodedChars,
704    {
705        let (beg, end) = (chars.start_ptr(), chars.limit_ptr());
706        if chars.encoding() != self.encoding() {
707            return Err(Error::custom(format!(
708                "Regex encoding does not match haystack encoding ({0:?}, {1:?})",
709                chars.encoding(),
710                self.encoding()
711            )));
712        }
713        let r = unsafe {
714            let start = beg.add(from);
715            let range = beg.add(to);
716            if start > end {
717                return Err(Error::custom("Start of match should be before end"));
718            }
719            if range > end {
720                return Err(Error::custom("Limit of match should be before end"));
721            }
722            onig_sys::onig_search_with_param(
723                self.raw,
724                beg,
725                end,
726                start,
727                range,
728                match region {
729                    Some(region) => region as *mut Region as *mut onig_sys::OnigRegion,
730                    None => std::ptr::null_mut(),
731                },
732                options.bits(),
733                match_param.as_raw(),
734            )
735        };
736
737        if r >= 0 {
738            Ok(Some(r as usize))
739        } else if r == onig_sys::ONIG_MISMATCH {
740            Ok(None)
741        } else {
742            Err(Error::from_code(r))
743        }
744    }
745
746    /// Returns true if and only if the regex matches the string given.
747    ///
748    /// For more information see [Match vs
749    /// Search](index.html#match-vs-search)
750    ///
751    /// # Arguments
752    ///  * `text` - The string slice to test against the pattern.
753    ///
754    /// # Returns
755    ///
756    /// `true` if the pattern matches the whole of `text`, `false` otherwise.
757    pub fn is_match(&self, text: &str) -> bool {
758        self.match_with_options(text, 0, SearchOptions::SEARCH_OPTION_WHOLE_STRING, None)
759            .map(|r| r == text.len())
760            .unwrap_or(false)
761    }
762
763    /// Find a Match in a Buffer, With Encoding
764    ///
765    /// Finds the first match of the regular expression within the
766    /// buffer.
767    ///
768    /// Note that this should only be used if you want to discover the
769    /// position of the match within a string. Testing if a pattern
770    /// matches the whole string is faster if you use `is_match`.  For
771    /// more information see [Match vs
772    /// Search](index.html#match-vs-search)
773    ///
774    /// # Arguments
775    ///  * `text` - The text to search in.
776    ///
777    /// # Returns
778    ///
779    ///  The offset of the start and end of the first match. If no
780    ///  match exists `None` is returned.
781    pub fn find(&self, text: &str) -> Option<(usize, usize)> {
782        self.find_with_encoding(text)
783    }
784
785    /// Find a Match in a Buffer, With Encoding
786    ///
787    /// Finds the first match of the regular expression within the
788    /// buffer.
789    ///
790    /// For more information see [Match vs
791    /// Search](index.html#match-vs-search)
792    ///
793    /// # Arguments
794    ///  * `text` - The text to search in.
795    ///
796    /// # Returns
797    ///
798    ///  The offset of the start and end of the first match. If no
799    ///  match exists `None` is returned.
800    pub fn find_with_encoding<T>(&self, text: T) -> Option<(usize, usize)>
801    where
802        T: EncodedChars,
803    {
804        let mut region = Region::new();
805        let len = text.len();
806        self.search_with_encoding(
807            text,
808            0,
809            len,
810            SearchOptions::SEARCH_OPTION_NONE,
811            Some(&mut region),
812        )
813        .and_then(|_| region.pos(0))
814    }
815
816    /// Get the Encoding of the Regex
817    ///
818    /// # Returns
819    ///
820    /// Returns a reference to an oniguruma encoding which was used
821    /// when this regex was created.
822    pub fn encoding(&self) -> onig_sys::OnigEncoding {
823        unsafe { onig_sys::onig_get_encoding(self.raw) }
824    }
825
826    /// Get the Number of Capture Groups in this Pattern
827    pub fn captures_len(&self) -> usize {
828        unsafe { onig_sys::onig_number_of_captures(self.raw) as usize }
829    }
830
831    /// Get the Size of the Capture Histories for this Pattern
832    pub fn capture_histories_len(&self) -> usize {
833        unsafe { onig_sys::onig_number_of_capture_histories(self.raw) as usize }
834    }
835}
836
837impl Drop for Regex {
838    fn drop(&mut self) {
839        unsafe {
840            onig_sys::onig_free(self.raw);
841        }
842    }
843}
844
845#[cfg(test)]
846mod tests {
847    use super::*;
848    use std::panic;
849
850    #[test]
851    fn test_regex_create() {
852        Regex::with_options(".*", RegexOptions::REGEX_OPTION_NONE, Syntax::default()).unwrap();
853
854        Regex::new(r#"a \w+ word"#).unwrap();
855    }
856
857    #[test]
858    fn test_regex_invalid() {
859        let e = Regex::new("\\p{foo}").unwrap_err();
860        assert_eq!(e.code(), -223);
861        assert_eq!(e.description(), "invalid character property name {foo}");
862    }
863
864    #[test]
865    fn test_failed_match() {
866        let regex = Regex::new("foo").unwrap();
867        let res = regex.match_with_options("bar", 0, SearchOptions::SEARCH_OPTION_NONE, None);
868        assert!(res.is_none());
869    }
870
871    #[test]
872    fn test_regex_search_with_options() {
873        let mut region = Region::new();
874        let regex = Regex::new("e(l+)").unwrap();
875
876        let r = regex.search_with_options(
877            "hello",
878            0,
879            5,
880            SearchOptions::SEARCH_OPTION_NONE,
881            Some(&mut region),
882        );
883
884        assert!(region.tree().is_none());
885        assert_eq!(r, Some(1));
886        assert_eq!(region.len(), 2);
887        let pos1 = region.pos(0).unwrap();
888        let pos2 = region.pos(1).unwrap();
889        assert_eq!(pos1, (1, 4));
890        assert_eq!(pos2, (2, 4));
891
892        // test cloning here since we already have a filled region
893        let cloned_region = region.clone();
894        let pos1_clone = cloned_region.pos(0).unwrap();
895        assert_eq!(pos1_clone, pos1);
896    }
897
898    #[test]
899    fn test_regex_match_with_options() {
900        let mut region = Region::new();
901        let regex = Regex::new("he(l+)").unwrap();
902
903        let r = regex.match_with_options(
904            "hello",
905            0,
906            SearchOptions::SEARCH_OPTION_NONE,
907            Some(&mut region),
908        );
909
910        assert!(region.tree().is_none());
911        assert_eq!(r, Some(4));
912        assert_eq!(region.len(), 2);
913        let pos1 = region.pos(0).unwrap();
914        let pos2 = region.pos(1).unwrap();
915        assert_eq!(pos1, (0, 4));
916        assert_eq!(pos2, (2, 4));
917    }
918
919    #[test]
920    fn test_regex_is_match() {
921        let regex = Regex::new("he(l+)o").unwrap();
922        assert!(regex.is_match("hello"));
923        assert!(!regex.is_match("hello 2.0"));
924    }
925
926    #[test]
927    fn test_is_match_chooses_longest_alternation() {
928        let regex = Regex::new("Greater|GreaterOrEqual").unwrap();
929        assert!(regex.is_match("Greater"));
930        assert!(regex.is_match("GreaterOrEqual"));
931    }
932
933    #[test]
934    fn test_regex_find() {
935        let regex = Regex::new("he(l+)o").unwrap();
936        assert_eq!(regex.find("hey, hello!"), Some((5, 10)));
937        assert_eq!(regex.find("hey, honey!"), None);
938    }
939
940    #[test]
941    fn test_regex_captures_len() {
942        let regex = Regex::new("(he)(l+)(o)").unwrap();
943        assert_eq!(regex.captures_len(), 3);
944    }
945
946    #[test]
947    fn test_regex_error_is_match() {
948        let regex = Regex::new("(a|b|ab)*bc").unwrap();
949        let result = regex.match_with_param(
950            "ababababababababababababababababababababababababababababacbc",
951            0,
952            SearchOptions::SEARCH_OPTION_NONE,
953            None,
954            MatchParam::default(),
955        );
956
957        let e = result.err().unwrap();
958        assert_eq!("retry-limit-in-match over", e.description());
959    }
960
961    #[test]
962    fn test_regex_panic_is_match() {
963        let regex = Regex::new("(a|b|ab)*bc").unwrap();
964        let result = panic::catch_unwind(|| {
965            regex.is_match("ababababababababababababababababababababababababababababacbc")
966        });
967        let e = result.err().unwrap();
968        let message = e.downcast_ref::<String>().unwrap();
969        assert_eq!(
970            message.as_str(),
971            "Onig: Regex match error: retry-limit-in-match over"
972        );
973    }
974
975    #[test]
976    fn test_regex_error_find() {
977        let regex = Regex::new("(a|b|ab)*bc").unwrap();
978        let s = "ababababababababababababababababababababababababababababacbc";
979        let result = regex.search_with_param(
980            s,
981            0,
982            s.len(),
983            SearchOptions::SEARCH_OPTION_NONE,
984            None,
985            MatchParam::default(),
986        );
987
988        let e = result.err().unwrap();
989        assert_eq!("retry-limit-in-match over", e.description());
990    }
991
992    #[test]
993    fn test_regex_panic_find() {
994        let regex = Regex::new("(a|b|ab)*bc").unwrap();
995        let result = panic::catch_unwind(|| {
996            regex.find("ababababababababababababababababababababababababababababacbc")
997        });
998        let e = result.err().unwrap();
999        let message = e.downcast_ref::<String>().unwrap();
1000        assert_eq!(
1001            message.as_str(),
1002            "Onig: Regex search error: retry-limit-in-match over"
1003        );
1004    }
1005
1006    #[test]
1007    fn test_search_with_invalid_range() {
1008        let regex = Regex::with_options("R...", RegexOptions::REGEX_OPTION_NONE, Syntax::default())
1009            .expect("regex");
1010        let string = "Ruby";
1011        let is_match = regex.search_with_param(
1012            string,
1013            5,
1014            string.len(),
1015            SearchOptions::SEARCH_OPTION_NONE,
1016            None,
1017            MatchParam::default(),
1018        );
1019        assert!(is_match.is_err());
1020
1021        let is_match = regex.search_with_param(
1022            string,
1023            2,
1024            string.len() + 1,
1025            SearchOptions::SEARCH_OPTION_NONE,
1026            None,
1027            MatchParam::default(),
1028        );
1029        assert!(is_match.is_err());
1030    }
1031
1032    #[test]
1033    fn test_search_with_invalid_range_panic() {
1034        let regex = Regex::with_options("R...", RegexOptions::REGEX_OPTION_NONE, Syntax::default())
1035            .expect("regex");
1036        let string = "Ruby";
1037        let is_match = panic::catch_unwind(|| {
1038            regex.search_with_encoding(
1039                string,
1040                5,
1041                string.len(),
1042                SearchOptions::SEARCH_OPTION_NONE,
1043                None,
1044            )
1045        });
1046        assert!(is_match.is_err());
1047    }
1048
1049    #[test]
1050    fn test_match_with_invalid_range() {
1051        let regex = Regex::with_options("R...", RegexOptions::REGEX_OPTION_NONE, Syntax::default())
1052            .expect("regex");
1053        let string = "Ruby";
1054        let is_match = regex.match_with_param(
1055            string,
1056            5,
1057            SearchOptions::SEARCH_OPTION_NONE,
1058            None,
1059            MatchParam::default(),
1060        );
1061        assert!(is_match.is_err());
1062    }
1063
1064    #[test]
1065    fn test_match_with_invalid_range_panic() {
1066        let regex = Regex::with_options("R...", RegexOptions::REGEX_OPTION_NONE, Syntax::default())
1067            .expect("regex");
1068        let string = "Ruby";
1069        let is_match = panic::catch_unwind(|| {
1070            regex.match_with_encoding(string, 5, SearchOptions::SEARCH_OPTION_NONE, None)
1071        });
1072        assert!(is_match.is_err());
1073    }
1074}