onig/
lib.rs

1//! This crate provides a safe wrapper around the
2//! [Oniguruma](https://github.com/kkos/oniguruma) regular expression library.
3//!
4//! # Examples
5//!
6//! ```rust
7//! use onig::Regex;
8//!
9//! let regex = Regex::new("e(l+)").unwrap();
10//! for (i, pos) in regex.captures("hello").unwrap().iter_pos().enumerate() {
11//!     match pos {
12//!          Some((beg, end)) =>
13//!              println!("Group {} captured in position {}:{}", i, beg, end),
14//!          None =>
15//!              println!("Group {} is not captured", i)
16//!     }
17//! }
18//! ```
19//!
20//! # Match vs Search
21//!
22//! There are two basic things you can do with a `Regex` pattern; test
23//! if the pattern matches the whole of a given string, and search for
24//! occurences of the pattern within a string. Oniguruma exposes these
25//! two concepts with the *match* and *search* APIs.
26//!
27//! In addition two these two base Onigurma APIs this crate exposes a
28//! third *find* API, built on top of the *search* API.
29//!
30//! ```
31//! # use onig::Regex;
32//! let pattern = Regex::new("hello").unwrap();
33//! assert_eq!(true, pattern.find("hello world").is_some());
34//! assert_eq!(false, pattern.is_match("hello world"));
35//! ```
36//!
37//! ## The *Match* API
38//!
39//! Functions in the match API check if a pattern matches the entire
40//! string. The simplest of these is `Regex::is_match`. This retuns a
41//! `true` if the pattern matches the string. For more complex useage
42//! then `Regex::match_with_options` and `Regex::match_with_encoding`
43//! can be used. These allow the capture groups to be inspected,
44//! matching with different options, and matching sub-sections of a
45//! given text.
46//!
47//! ## The *Search* API
48//!
49//! Function in the search API search for a pattern anywhere within a
50//! string. The simplist of these is `Regex::find`. This returns the
51//! offset of the first occurence of the pattern within the string.
52//! For more complex useage `Regex::search_with_options` and
53//! `Regex::search_with_encoding` can be used. These allow capture
54//! groups to be inspected, searching with different options and
55//! searching within subsections of a given text.
56//!
57//! ## The *Find* API
58//!
59//! The find API is built on top of the search API. Functions in this
60//! API allow iteration across all matches of the pattern within a
61//! string, not just the first one. The functions deal with some of
62//! the complexities of this, such as zero-length matches.
63//!
64//! The simplest step-up from the basic search API `Regex::find` is
65//! getting the captures relating to a match with the
66//! `Regex::captures` method. To find capture information for all
67//! matches within a string `Regex::find_iter` and
68//! `Regex::captures_iter` can be used. The former exposes the start
69//! and end of the match as `Regex::find` does, the latter exposes the
70//! whole capture group information as `Regex::captures` does.
71//!
72//! # The `std::pattern` API
73//!
74//! In addition to the main Oniguruma API it is possible to use the
75//! `Regex` object with the
76//! [`std::pattern`](https://doc.rust-lang.org/std/str/pattern/)
77//! API. To enable support compile with the `std-pattern` feature. If
78//! you're using Cargo you can do this by adding the following to your
79//! Cargo.toml:
80//!
81//! ```toml
82//! [dependencies.onig]
83//! version = "1.2"
84//! features = ["std-pattern"]
85//! ```
86
87#![cfg_attr(not(feature = "cargo-clippy"), allow(unknown_lints))]
88#![cfg_attr(feature = "std-pattern", feature(pattern))]
89#![deny(missing_docs)]
90
91use once_cell::sync::Lazy;
92
93mod buffers;
94mod find;
95mod flags;
96mod match_param;
97mod names;
98mod region;
99mod replace;
100mod syntax;
101mod tree;
102mod utils;
103
104#[cfg(feature = "std-pattern")]
105mod pattern;
106
107// re-export the onig types publically
108pub use crate::buffers::{EncodedBytes, EncodedChars};
109pub use crate::find::{
110    Captures, FindCaptures, FindMatches, RegexSplits, RegexSplitsN, SubCaptures, SubCapturesPos,
111};
112pub use crate::flags::*;
113pub use crate::match_param::MatchParam;
114pub use crate::region::Region;
115pub use crate::replace::Replacer;
116pub use crate::syntax::{MetaChar, Syntax};
117pub use crate::tree::{CaptureTreeNode, CaptureTreeNodeIter};
118pub use crate::utils::{copyright, define_user_property, version};
119
120use std::os::raw::c_int;
121use std::ptr::{null, null_mut};
122use std::sync::Mutex;
123use std::{error, fmt, str};
124
125#[derive(Debug)]
126enum ErrorData {
127    OnigError(c_int),
128    Custom,
129}
130
131/// This struture represents an error from the underlying Oniguruma libray.
132pub struct Error {
133    data: ErrorData,
134    description: String,
135}
136
137/// This struct is a wrapper around an Oniguruma regular expression
138/// pointer. This represents a compiled regex which can be used in
139/// search and match operations.
140#[derive(Debug, Eq, PartialEq)]
141pub struct Regex {
142    raw: onig_sys::OnigRegex,
143}
144
145unsafe impl Send for Regex {}
146unsafe impl Sync for Regex {}
147
148impl Error {
149    fn from_code_and_info(code: c_int, info: &onig_sys::OnigErrorInfo) -> Self {
150        Error::new(code, info)
151    }
152
153    fn from_code(code: c_int) -> Self {
154        Error::new(code, null())
155    }
156
157    fn custom<T: Into<String>>(message: T) -> Self {
158        Error {
159            data: ErrorData::Custom,
160            description: message.into(),
161        }
162    }
163
164    fn new(code: c_int, info: *const onig_sys::OnigErrorInfo) -> Self {
165        let buff = &mut [0; onig_sys::ONIG_MAX_ERROR_MESSAGE_LEN as usize];
166        let len = unsafe { onig_sys::onig_error_code_to_str(buff.as_mut_ptr(), code, info) };
167        let description = if let Ok(description) = str::from_utf8(&buff[..len as usize]) {
168            description
169        } else {
170            return Self::custom("Onig error string was invalid UTF-8");
171        };
172        Error {
173            data: ErrorData::OnigError(code),
174            description: description.to_owned(),
175        }
176    }
177
178    /// Return Oniguruma engine error code.
179    pub fn code(&self) -> i32 {
180        match self.data {
181            ErrorData::OnigError(code) => code,
182            _ => -1,
183        }
184    }
185
186    /// Return error description provided by Oniguruma engine.
187    pub fn description(&self) -> &str {
188        &self.description
189    }
190}
191
192impl error::Error for Error {
193    fn description(&self) -> &str {
194        &self.description
195    }
196}
197
198impl fmt::Display for Error {
199    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
200        write!(f, "Oniguruma error: {}", self.description())
201    }
202}
203
204impl fmt::Debug for Error {
205    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
206        write!(f, "Error({:?}, {})", self.data, self.description())
207    }
208}
209
210static REGEX_NEW_MUTEX: Lazy<Mutex<()>> = Lazy::new(|| Mutex::new(()));
211
212impl Regex {
213    /// Create a Regex
214    ///
215    /// Simple regular expression constructor. Compiles a new regular
216    /// expression with the default options using the ruby syntax.
217    /// Once compiled, it can be used repeatedly to search in a string. If an
218    /// invalid expression is given, then an error is returned.
219    ///
220    /// # Arguments
221    ///
222    /// * `pattern` - The regex pattern to compile
223    ///
224    /// # Examples
225    ///
226    /// ```
227    /// use onig::Regex;
228    /// let r = Regex::new(r#"hello (\w+)"#);
229    /// assert!(r.is_ok());
230    /// ```
231    pub fn new(pattern: &str) -> Result<Self, Error> {
232        Regex::with_encoding(pattern)
233    }
234
235    /// Create a Regex, Specifying an Encoding
236    ///
237    /// Attempts to compile `pattern` into a new `Regex`
238    /// instance. Instead of assuming UTF-8 as the encoding scheme the
239    /// encoding is inferred from the `pattern` buffer.
240    ///
241    /// # Arguments
242    ///
243    /// * `pattern` - The regex pattern to compile
244    ///
245    /// # Examples
246    ///
247    /// ```
248    /// use onig::{Regex, EncodedBytes};
249    /// let utf8 = Regex::with_encoding("hello");
250    /// assert!(utf8.is_ok());
251    /// let ascii = Regex::with_encoding(EncodedBytes::ascii(b"world"));
252    /// assert!(ascii.is_ok());
253    /// ```
254    pub fn with_encoding<T>(pattern: T) -> Result<Regex, Error>
255    where
256        T: EncodedChars,
257    {
258        Regex::with_options_and_encoding(
259            pattern,
260            RegexOptions::REGEX_OPTION_NONE,
261            Syntax::default(),
262        )
263    }
264
265    /// Create a new Regex
266    ///
267    /// Attempts to compile a pattern into a new `Regex` instance.
268    /// Once compiled, it can be used repeatedly to search in a string. If an
269    /// invalid expression is given, then an error is returned.
270    /// See [`onig_sys::onig_new`][regex_new] for more information.
271    ///
272    /// # Arguments
273    ///
274    ///  * `pattern` - The regex pattern to compile.
275    ///  * `options` - The regex compilation options.
276    ///  * `syntax`  - The syntax which the regex is written in.
277    ///
278    /// # Examples
279    ///
280    /// ```
281    /// use onig::{Regex, Syntax, RegexOptions};
282    /// let r = Regex::with_options("hello.*world",
283    ///                             RegexOptions::REGEX_OPTION_NONE,
284    ///                             Syntax::default());
285    /// assert!(r.is_ok());
286    /// ```
287    ///
288    /// [regex_new]: ./onig_sys/fn.onig_new.html
289    pub fn with_options(
290        pattern: &str,
291        option: RegexOptions,
292        syntax: &Syntax,
293    ) -> Result<Regex, Error> {
294        Regex::with_options_and_encoding(pattern, option, syntax)
295    }
296
297    /// Create a new Regex, Specifying Options and Ecoding
298    ///
299    /// Attempts to comile the given `pattern` into a new `Regex`
300    /// instance. Instead of assuming UTF-8 as the encoding scheme the
301    /// encoding is inferred from the `pattern` buffer. If the regex
302    /// fails to compile the returned `Error` value from
303    /// [`onig_new`][regex_new] contains more information.
304    ///
305    /// [regex_new]: ./onig_sys/fn.onig_new.html
306    ///
307    /// # Arguments
308    ///
309    ///  * `pattern` - The regex pattern to compile.
310    ///  * `options` - The regex compilation options.
311    ///  * `syntax`  - The syntax which the regex is written in.
312    ///
313    /// # Examples
314    /// ```
315    /// use onig::{Regex, Syntax, EncodedBytes, RegexOptions};
316    /// let pattern = EncodedBytes::ascii(b"hello");
317    /// let r = Regex::with_options_and_encoding(pattern,
318    ///                                          RegexOptions::REGEX_OPTION_SINGLELINE,
319    ///                                          Syntax::default());
320    /// assert!(r.is_ok());
321    /// ```
322    pub fn with_options_and_encoding<T>(
323        pattern: T,
324        option: RegexOptions,
325        syntax: &Syntax,
326    ) -> Result<Self, Error>
327    where
328        T: EncodedChars,
329    {
330        // Convert the rust types to those required for the call to
331        // `onig_new`.
332        let mut reg: onig_sys::OnigRegex = null_mut();
333        let reg_ptr = &mut reg as *mut onig_sys::OnigRegex;
334
335        // We can use this later to get an error message to pass back
336        // if regex creation fails.
337        let mut error = onig_sys::OnigErrorInfo {
338            enc: null_mut(),
339            par: null_mut(),
340            par_end: null_mut(),
341        };
342
343        let err = unsafe {
344            // Grab a lock to make sure that `onig_new` isn't called by
345            // more than one thread at a time.
346            let _guard = REGEX_NEW_MUTEX.lock().unwrap();
347            onig_sys::onig_new(
348                reg_ptr,
349                pattern.start_ptr(),
350                pattern.limit_ptr(),
351                option.bits(),
352                pattern.encoding(),
353                syntax as *const Syntax as *mut Syntax as *mut onig_sys::OnigSyntaxType,
354                &mut error,
355            )
356        };
357
358        if err == onig_sys::ONIG_NORMAL as i32 {
359            Ok(Regex { raw: reg })
360        } else {
361            Err(Error::from_code_and_info(err, &error))
362        }
363    }
364
365    /// Match String
366    ///
367    /// Try to match the regex against the given string slice,
368    /// starting at a given offset. This method works the same way as
369    /// `match_with_encoding`, but the encoding is always utf-8.
370    ///
371    /// For more information see [Match vs
372    /// Search](index.html#match-vs-search)
373    ///
374    /// # Arguments
375    ///
376    /// * `str` - The string slice to match against.
377    /// * `at` - The byte index in the passed slice to start matching
378    /// * `options` - The regex match options.
379    /// * `region` - The region for return group match range info
380    ///
381    /// # Returns
382    ///
383    /// `Some(len)` if the regex matched, with `len` being the number
384    /// of bytes matched. `None` if the regex doesn't match.
385    ///
386    /// # Examples
387    ///
388    /// ```
389    /// use onig::{Regex, SearchOptions};
390    ///
391    /// let r = Regex::new(".*").unwrap();
392    /// let res = r.match_with_options("hello", 0, SearchOptions::SEARCH_OPTION_NONE, None);
393    /// assert!(res.is_some()); // it matches
394    /// assert!(res.unwrap() == 5); // 5 characters matched
395    /// ```
396    pub fn match_with_options(
397        &self,
398        str: &str,
399        at: usize,
400        options: SearchOptions,
401        region: Option<&mut Region>,
402    ) -> Option<usize> {
403        self.match_with_encoding(str, at, options, region)
404    }
405
406    /// Match String with Encoding
407    ///
408    /// Match the regex against a string. This method will start at
409    /// the offset `at` into the string and try and match the
410    /// regex. If the regex matches then the return value is the
411    /// number of characters which matched. If the regex doesn't match
412    /// the return is `None`.
413    ///
414    /// For more information see [Match vs
415    /// Search](index.html#match-vs-search)
416    ///
417    /// The contents of `chars` must have the same encoding that was
418    /// used to construct the regex.
419    ///
420    /// # Arguments
421    ///
422    /// * `chars` - The buffer to match against.
423    /// * `at` - The byte index in the passed buffer to start matching
424    /// * `options` - The regex match options.
425    /// * `region` - The region for return group match range info
426    ///
427    /// # Returns
428    ///
429    /// `Some(len)` if the regex matched, with `len` being the number
430    /// of bytes matched. `None` if the regex doesn't match.
431    ///
432    /// # Examples
433    ///
434    /// ```
435    /// use onig::{Regex, EncodedBytes, SearchOptions};
436    ///
437    /// let r = Regex::with_encoding(EncodedBytes::ascii(b".*")).unwrap();
438    /// let res = r.match_with_encoding(EncodedBytes::ascii(b"world"),
439    ///                                 0, SearchOptions::SEARCH_OPTION_NONE, None);
440    /// assert!(res.is_some()); // it matches
441    /// assert!(res.unwrap() == 5); // 5 characters matched
442    /// ```
443    pub fn match_with_encoding<T>(
444        &self,
445        chars: T,
446        at: usize,
447        options: SearchOptions,
448        region: Option<&mut Region>,
449    ) -> Option<usize>
450    where
451        T: EncodedChars,
452    {
453        let match_param = MatchParam::default();
454        let result = self.match_with_param(chars, at, options, region, match_param);
455
456        match result {
457            Ok(r) => r,
458            Err(e) => panic!("Onig: Regex match error: {}", e.description()),
459        }
460    }
461
462    /// Match string with encoding and match param
463    ///
464    /// Match the regex against a string. This method will start at
465    /// the offset `at` into the string and try and match the
466    /// regex. If the regex matches then the return value is the
467    /// number of characters which matched. If the regex doesn't match
468    /// the return is `None`.
469    ///
470    /// For more information see [Match vs
471    /// Search](index.html#match-vs-search)
472    ///
473    /// The contents of `chars` must have the same encoding that was
474    /// used to construct the regex.
475    ///
476    /// # Arguments
477    ///
478    /// * `chars` - The buffer to match against.
479    /// * `at` - The byte index in the passed buffer to start matching
480    /// * `options` - The regex match options.
481    /// * `region` - The region for return group match range info
482    /// * `match_param` - The match parameters
483    ///
484    /// # Returns
485    ///
486    /// `Ok(Some(len))` if the regex matched, with `len` being the number
487    /// of bytes matched. `Ok(None)` if the regex doesn't match. `Err` with an
488    /// `Error` if an error occurred (e.g. retry-limit-in-match exceeded).
489    ///
490    /// # Examples
491    ///
492    /// ```
493    /// use onig::{Regex, EncodedBytes, MatchParam, SearchOptions};
494    ///
495    /// let r = Regex::with_encoding(EncodedBytes::ascii(b".*")).unwrap();
496    /// let res = r.match_with_param(EncodedBytes::ascii(b"world"),
497    ///                              0, SearchOptions::SEARCH_OPTION_NONE,
498    ///                              None, MatchParam::default());
499    /// assert!(res.is_ok()); // matching did not error
500    /// assert!(res.unwrap() == Some(5)); // 5 characters matched
501    /// ```
502    pub fn match_with_param<T>(
503        &self,
504        chars: T,
505        at: usize,
506        options: SearchOptions,
507        region: Option<&mut Region>,
508        match_param: MatchParam,
509    ) -> Result<Option<usize>, Error>
510    where
511        T: EncodedChars,
512    {
513        if chars.encoding() != self.encoding() {
514            return Err(Error::custom(format!(
515                "Regex encoding does not match haystack encoding ({0:?}, {1:?})",
516                chars.encoding(),
517                self.encoding()
518            )));
519        }
520        let r = unsafe {
521            let offset = chars.start_ptr().add(at);
522            if offset > chars.limit_ptr() {
523                return Err(Error::custom(format!("Offset {} is too large", at)));
524            }
525            onig_sys::onig_match_with_param(
526                self.raw,
527                chars.start_ptr(),
528                chars.limit_ptr(),
529                offset,
530                match region {
531                    Some(region) => region as *mut Region as *mut onig_sys::OnigRegion,
532                    None => std::ptr::null_mut(),
533                },
534                options.bits(),
535                match_param.as_raw(),
536            )
537        };
538
539        if r >= 0 {
540            Ok(Some(r as usize))
541        } else if r == onig_sys::ONIG_MISMATCH {
542            Ok(None)
543        } else {
544            Err(Error::from_code(r))
545        }
546    }
547
548    /// Search pattern in string
549    ///
550    /// Search for matches the regex in a string. This method will return the
551    /// index of the first match of the regex within the string, if
552    /// there is one. If `from` is less than `to`, then search is performed
553    /// in forward order, otherwise – in backward order.
554    ///
555    /// For more information see [Match vs
556    /// Search](index.html#match-vs-search)
557    ///
558    /// # Arguments
559    ///
560    ///  * `str` - The string to search in.
561    ///  * `from` - The byte index in the passed slice to start search
562    ///  * `to` - The byte index in the passed slice to finish search
563    ///  * `options` - The options for the search.
564    ///  * `region` - The region for return group match range info
565    ///
566    /// # Returns
567    ///
568    /// `Some(pos)` if the regex matches, where `pos` is the
569    /// byte-position of the start of the match. `None` if the regex
570    /// doesn't match anywhere in `str`.
571    ///
572    /// # Examples
573    ///
574    /// ```
575    /// use onig::{Regex, SearchOptions};
576    ///
577    /// let r = Regex::new("l{1,2}").unwrap();
578    /// let res = r.search_with_options("hello", 0, 5, SearchOptions::SEARCH_OPTION_NONE, None);
579    /// assert!(res.is_some()); // it matches
580    /// assert!(res.unwrap() == 2); // match starts at character 3
581    /// ```
582    pub fn search_with_options(
583        &self,
584        str: &str,
585        from: usize,
586        to: usize,
587        options: SearchOptions,
588        region: Option<&mut Region>,
589    ) -> Option<usize> {
590        self.search_with_encoding(str, from, to, options, region)
591    }
592
593    /// Search for a Pattern in a String with an Encoding
594    ///
595    /// Search for matches the regex in a string. This method will
596    /// return the index of the first match of the regex within the
597    /// string, if there is one. If `from` is less than `to`, then
598    /// search is performed in forward order, otherwise – in backward
599    /// order.
600    ///
601    /// For more information see [Match vs
602    /// Search](index.html#match-vs-search)
603    ///
604    /// The encoding of the buffer passed to search in must match the
605    /// encoding of the regex.
606    ///
607    /// # Arguments
608    ///
609    ///  * `chars` - The character buffer to search in.
610    ///  * `from` - The byte index in the passed slice to start search
611    ///  * `to` - The byte index in the passed slice to finish search
612    ///  * `options` - The options for the search.
613    ///  * `region` - The region for return group match range info
614    ///
615    /// # Returns
616    ///
617    /// `Some(pos)` if the regex matches, where `pos` is the
618    /// byte-position of the start of the match. `None` if the regex
619    /// doesn't match anywhere in `chars`.
620    ///
621    /// # Examples
622    ///
623    /// ```
624    /// use onig::{Regex, EncodedBytes, SearchOptions};
625    ///
626    /// let r = Regex::with_encoding(EncodedBytes::ascii(b"l{1,2}")).unwrap();
627    /// let res = r.search_with_encoding(EncodedBytes::ascii(b"hello"),
628    ///                                  0, 5, SearchOptions::SEARCH_OPTION_NONE, None);
629    /// assert!(res.is_some()); // it matches
630    /// assert!(res.unwrap() == 2); // match starts at character 3
631    /// ```
632    pub fn search_with_encoding<T>(
633        &self,
634        chars: T,
635        from: usize,
636        to: usize,
637        options: SearchOptions,
638        region: Option<&mut Region>,
639    ) -> Option<usize>
640    where
641        T: EncodedChars,
642    {
643        let match_param = MatchParam::default();
644        let result = self.search_with_param(chars, from, to, options, region, match_param);
645
646        match result {
647            Ok(r) => r,
648            Err(e) => panic!("Onig: Regex search error: {}", e.description()),
649        }
650    }
651
652    /// Search pattern in string with encoding and match param
653    ///
654    /// Search for matches the regex in a string. This method will
655    /// return the index of the first match of the regex within the
656    /// string, if there is one. If `from` is less than `to`, then
657    /// search is performed in forward order, otherwise – in backward
658    /// order.
659    ///
660    /// For more information see [Match vs
661    /// Search](index.html#match-vs-search)
662    ///
663    /// The encoding of the buffer passed to search in must match the
664    /// encoding of the regex.
665    ///
666    /// # Arguments
667    ///
668    ///  * `chars` - The character buffer to search in.
669    ///  * `from` - The byte index in the passed slice to start search
670    ///  * `to` - The byte index in the passed slice to finish search
671    ///  * `options` - The options for the search.
672    ///  * `region` - The region for return group match range info
673    ///  * `match_param` - The match parameters
674    ///
675    /// # Returns
676    ///
677    /// `Ok(Some(pos))` if the regex matches, where `pos` is the
678    /// byte-position of the start of the match. `Ok(None)` if the regex
679    /// doesn't match anywhere in `chars`. `Err` with an `Error` if an error
680    /// occurred (e.g. retry-limit-in-match exceeded).
681    ///
682    /// # Examples
683    ///
684    /// ```
685    /// use onig::{Regex, EncodedBytes, MatchParam, SearchOptions};
686    ///
687    /// let r = Regex::with_encoding(EncodedBytes::ascii(b"l{1,2}")).unwrap();
688    /// let res = r.search_with_param(EncodedBytes::ascii(b"hello"),
689    ///                               0, 5, SearchOptions::SEARCH_OPTION_NONE,
690    ///                               None, MatchParam::default());
691    /// assert!(res.is_ok()); // matching did not error
692    /// assert!(res.unwrap() == Some(2)); // match starts at character 3
693    /// ```
694    pub fn search_with_param<T>(
695        &self,
696        chars: T,
697        from: usize,
698        to: usize,
699        options: SearchOptions,
700        region: Option<&mut Region>,
701        match_param: MatchParam,
702    ) -> Result<Option<usize>, Error>
703    where
704        T: EncodedChars,
705    {
706        let (beg, end) = (chars.start_ptr(), chars.limit_ptr());
707        if chars.encoding() != self.encoding() {
708            return Err(Error::custom(format!(
709                "Regex encoding does not match haystack encoding ({0:?}, {1:?})",
710                chars.encoding(),
711                self.encoding()
712            )));
713        }
714        let r = unsafe {
715            let start = beg.add(from);
716            let range = beg.add(to);
717            if start > end {
718                return Err(Error::custom("Start of match should be before end"));
719            }
720            if range > end {
721                return Err(Error::custom("Limit of match should be before end"));
722            }
723            onig_sys::onig_search_with_param(
724                self.raw,
725                beg,
726                end,
727                start,
728                range,
729                match region {
730                    Some(region) => region as *mut Region as *mut onig_sys::OnigRegion,
731                    None => std::ptr::null_mut(),
732                },
733                options.bits(),
734                match_param.as_raw(),
735            )
736        };
737
738        if r >= 0 {
739            Ok(Some(r as usize))
740        } else if r == onig_sys::ONIG_MISMATCH {
741            Ok(None)
742        } else {
743            Err(Error::from_code(r))
744        }
745    }
746
747    /// Returns true if and only if the regex matches the string given.
748    ///
749    /// For more information see [Match vs
750    /// Search](index.html#match-vs-search)
751    ///
752    /// # Arguments
753    ///  * `text` - The string slice to test against the pattern.
754    ///
755    /// # Returns
756    ///
757    /// `true` if the pattern matches the whole of `text`, `false` otherwise.
758    pub fn is_match(&self, text: &str) -> bool {
759        self.match_with_options(text, 0, SearchOptions::SEARCH_OPTION_NONE, None)
760            .map(|r| r == text.len())
761            .unwrap_or(false)
762    }
763
764    /// Find a Match in a Buffer, With Encoding
765    ///
766    /// Finds the first match of the regular expression within the
767    /// buffer.
768    ///
769    /// Note that this should only be used if you want to discover the
770    /// position of the match within a string. Testing if a pattern
771    /// matches the whole string is faster if you use `is_match`.  For
772    /// more information see [Match vs
773    /// Search](index.html#match-vs-search)
774    ///
775    /// # Arguments
776    ///  * `text` - The text to search in.
777    ///
778    /// # Returns
779    ///
780    ///  The offset of the start and end of the first match. If no
781    ///  match exists `None` is returned.
782    pub fn find(&self, text: &str) -> Option<(usize, usize)> {
783        self.find_with_encoding(text)
784    }
785
786    /// Find a Match in a Buffer, With Encoding
787    ///
788    /// Finds the first match of the regular expression within the
789    /// buffer.
790    ///
791    /// For more information see [Match vs
792    /// Search](index.html#match-vs-search)
793    ///
794    /// # Arguments
795    ///  * `text` - The text to search in.
796    ///
797    /// # Returns
798    ///
799    ///  The offset of the start and end of the first match. If no
800    ///  match exists `None` is returned.
801    pub fn find_with_encoding<T>(&self, text: T) -> Option<(usize, usize)>
802    where
803        T: EncodedChars,
804    {
805        let mut region = Region::new();
806        let len = text.len();
807        self.search_with_encoding(
808            text,
809            0,
810            len,
811            SearchOptions::SEARCH_OPTION_NONE,
812            Some(&mut region),
813        )
814        .and_then(|_| region.pos(0))
815    }
816
817    /// Get the Encoding of the Regex
818    ///
819    /// # Returns
820    ///
821    /// Returns a reference to an oniguruma encoding which was used
822    /// when this regex was created.
823    pub fn encoding(&self) -> onig_sys::OnigEncoding {
824        unsafe { onig_sys::onig_get_encoding(self.raw) }
825    }
826
827    /// Get the Number of Capture Groups in this Pattern
828    pub fn captures_len(&self) -> usize {
829        unsafe { onig_sys::onig_number_of_captures(self.raw) as usize }
830    }
831
832    /// Get the Size of the Capture Histories for this Pattern
833    pub fn capture_histories_len(&self) -> usize {
834        unsafe { onig_sys::onig_number_of_capture_histories(self.raw) as usize }
835    }
836}
837
838impl Drop for Regex {
839    fn drop(&mut self) {
840        unsafe {
841            onig_sys::onig_free(self.raw);
842        }
843    }
844}
845
846#[cfg(test)]
847mod tests {
848    use super::*;
849    use std::panic;
850
851    #[test]
852    fn test_regex_create() {
853        Regex::with_options(".*", RegexOptions::REGEX_OPTION_NONE, Syntax::default()).unwrap();
854
855        Regex::new(r#"a \w+ word"#).unwrap();
856    }
857
858    #[test]
859    fn test_regex_invalid() {
860        let e = Regex::new("\\p{foo}").unwrap_err();
861        assert_eq!(e.code(), -223);
862        assert_eq!(e.description(), "invalid character property name {foo}");
863    }
864
865    #[test]
866    fn test_failed_match() {
867        let regex = Regex::new("foo").unwrap();
868        let res = regex.match_with_options("bar", 0, SearchOptions::SEARCH_OPTION_NONE, None);
869        assert!(res.is_none());
870    }
871
872    #[test]
873    fn test_regex_search_with_options() {
874        let mut region = Region::new();
875        let regex = Regex::new("e(l+)").unwrap();
876
877        let r = regex.search_with_options(
878            "hello",
879            0,
880            5,
881            SearchOptions::SEARCH_OPTION_NONE,
882            Some(&mut region),
883        );
884
885        assert!(region.tree().is_none());
886        assert_eq!(r, Some(1));
887        assert_eq!(region.len(), 2);
888        let pos1 = region.pos(0).unwrap();
889        let pos2 = region.pos(1).unwrap();
890        assert_eq!(pos1, (1, 4));
891        assert_eq!(pos2, (2, 4));
892
893        // test cloning here since we already have a filled region
894        let cloned_region = region.clone();
895        let pos1_clone = cloned_region.pos(0).unwrap();
896        assert_eq!(pos1_clone, pos1);
897    }
898
899    #[test]
900    fn test_regex_match_with_options() {
901        let mut region = Region::new();
902        let regex = Regex::new("he(l+)").unwrap();
903
904        let r = regex.match_with_options(
905            "hello",
906            0,
907            SearchOptions::SEARCH_OPTION_NONE,
908            Some(&mut region),
909        );
910
911        assert!(region.tree().is_none());
912        assert_eq!(r, Some(4));
913        assert_eq!(region.len(), 2);
914        let pos1 = region.pos(0).unwrap();
915        let pos2 = region.pos(1).unwrap();
916        assert_eq!(pos1, (0, 4));
917        assert_eq!(pos2, (2, 4));
918    }
919
920    #[test]
921    fn test_regex_is_match() {
922        let regex = Regex::new("he(l+)o").unwrap();
923        assert!(regex.is_match("hello"));
924        assert!(!regex.is_match("hello 2.0"));
925    }
926
927    #[test]
928    fn test_regex_find() {
929        let regex = Regex::new("he(l+)o").unwrap();
930        assert_eq!(regex.find("hey, hello!"), Some((5, 10)));
931        assert_eq!(regex.find("hey, honey!"), None);
932    }
933
934    #[test]
935    fn test_regex_captures_len() {
936        let regex = Regex::new("(he)(l+)(o)").unwrap();
937        assert_eq!(regex.captures_len(), 3);
938    }
939
940    #[test]
941    fn test_regex_error_is_match() {
942        let regex = Regex::new("(a|b|ab)*bc").unwrap();
943        let result = regex.match_with_param(
944            "ababababababababababababababababababababababababababababacbc",
945            0,
946            SearchOptions::SEARCH_OPTION_NONE,
947            None,
948            MatchParam::default(),
949        );
950
951        let e = result.err().unwrap();
952        assert_eq!("retry-limit-in-match over", e.description());
953    }
954
955    #[test]
956    fn test_regex_panic_is_match() {
957        let regex = Regex::new("(a|b|ab)*bc").unwrap();
958        let result = panic::catch_unwind(|| {
959            regex.is_match("ababababababababababababababababababababababababababababacbc")
960        });
961        let e = result.err().unwrap();
962        let message = e.downcast_ref::<String>().unwrap();
963        assert_eq!(
964            message.as_str(),
965            "Onig: Regex match error: retry-limit-in-match over"
966        );
967    }
968
969    #[test]
970    fn test_regex_error_find() {
971        let regex = Regex::new("(a|b|ab)*bc").unwrap();
972        let s = "ababababababababababababababababababababababababababababacbc";
973        let result = regex.search_with_param(
974            s,
975            0,
976            s.len(),
977            SearchOptions::SEARCH_OPTION_NONE,
978            None,
979            MatchParam::default(),
980        );
981
982        let e = result.err().unwrap();
983        assert_eq!("retry-limit-in-match over", e.description());
984    }
985
986    #[test]
987    fn test_regex_panic_find() {
988        let regex = Regex::new("(a|b|ab)*bc").unwrap();
989        let result = panic::catch_unwind(|| {
990            regex.find("ababababababababababababababababababababababababababababacbc")
991        });
992        let e = result.err().unwrap();
993        let message = e.downcast_ref::<String>().unwrap();
994        assert_eq!(
995            message.as_str(),
996            "Onig: Regex search error: retry-limit-in-match over"
997        );
998    }
999
1000    #[test]
1001    fn test_search_with_invalid_range() {
1002        let regex = Regex::with_options("R...", RegexOptions::REGEX_OPTION_NONE, Syntax::default())
1003            .expect("regex");
1004        let string = "Ruby";
1005        let is_match = regex.search_with_param(
1006            string,
1007            5,
1008            string.len(),
1009            SearchOptions::SEARCH_OPTION_NONE,
1010            None,
1011            MatchParam::default(),
1012        );
1013        assert!(is_match.is_err());
1014
1015        let is_match = regex.search_with_param(
1016            string,
1017            2,
1018            string.len() + 1,
1019            SearchOptions::SEARCH_OPTION_NONE,
1020            None,
1021            MatchParam::default(),
1022        );
1023        assert!(is_match.is_err());
1024    }
1025
1026    #[test]
1027    fn test_search_with_invalid_range_panic() {
1028        let regex = Regex::with_options("R...", RegexOptions::REGEX_OPTION_NONE, Syntax::default())
1029            .expect("regex");
1030        let string = "Ruby";
1031        let is_match = panic::catch_unwind(|| {
1032            regex.search_with_encoding(
1033                string,
1034                5,
1035                string.len(),
1036                SearchOptions::SEARCH_OPTION_NONE,
1037                None,
1038            )
1039        });
1040        assert!(is_match.is_err());
1041    }
1042
1043    #[test]
1044    fn test_match_with_invalid_range() {
1045        let regex = Regex::with_options("R...", RegexOptions::REGEX_OPTION_NONE, Syntax::default())
1046            .expect("regex");
1047        let string = "Ruby";
1048        let is_match = regex.match_with_param(
1049            string,
1050            5,
1051            SearchOptions::SEARCH_OPTION_NONE,
1052            None,
1053            MatchParam::default(),
1054        );
1055        assert!(is_match.is_err());
1056    }
1057
1058    #[test]
1059    fn test_match_with_invalid_range_panic() {
1060        let regex = Regex::with_options("R...", RegexOptions::REGEX_OPTION_NONE, Syntax::default())
1061            .expect("regex");
1062        let string = "Ruby";
1063        let is_match = panic::catch_unwind(|| {
1064            regex.match_with_encoding(string, 5, SearchOptions::SEARCH_OPTION_NONE, None)
1065        });
1066        assert!(is_match.is_err());
1067    }
1068}