onig/lib.rs
1//! This crate provides a safe wrapper around the
2//! [Oniguruma](https://github.com/kkos/oniguruma) regular expression library.
3//!
4//! # Examples
5//!
6//! ```rust
7//! use onig::Regex;
8//!
9//! let regex = Regex::new("e(l+)").unwrap();
10//! for (i, pos) in regex.captures("hello").unwrap().iter_pos().enumerate() {
11//! match pos {
12//! Some((beg, end)) =>
13//! println!("Group {} captured in position {}:{}", i, beg, end),
14//! None =>
15//! println!("Group {} is not captured", i)
16//! }
17//! }
18//! ```
19//!
20//! # Match vs Search
21//!
22//! There are two basic things you can do with a `Regex` pattern; test
23//! if the pattern matches the whole of a given string, and search for
24//! occurences of the pattern within a string. Oniguruma exposes these
25//! two concepts with the *match* and *search* APIs.
26//!
27//! In addition two these two base Onigurma APIs this crate exposes a
28//! third *find* API, built on top of the *search* API.
29//!
30//! ```
31//! # use onig::Regex;
32//! let pattern = Regex::new("hello").unwrap();
33//! assert_eq!(true, pattern.find("hello world").is_some());
34//! assert_eq!(false, pattern.is_match("hello world"));
35//! ```
36//!
37//! ## The *Match* API
38//!
39//! Functions in the match API check if a pattern matches the entire
40//! string. The simplest of these is `Regex::is_match`. This retuns a
41//! `true` if the pattern matches the string. For more complex useage
42//! then `Regex::match_with_options` and `Regex::match_with_encoding`
43//! can be used. These allow the capture groups to be inspected,
44//! matching with different options, and matching sub-sections of a
45//! given text.
46//!
47//! ## The *Search* API
48//!
49//! Function in the search API search for a pattern anywhere within a
50//! string. The simplist of these is `Regex::find`. This returns the
51//! offset of the first occurence of the pattern within the string.
52//! For more complex useage `Regex::search_with_options` and
53//! `Regex::search_with_encoding` can be used. These allow capture
54//! groups to be inspected, searching with different options and
55//! searching within subsections of a given text.
56//!
57//! ## The *Find* API
58//!
59//! The find API is built on top of the search API. Functions in this
60//! API allow iteration across all matches of the pattern within a
61//! string, not just the first one. The functions deal with some of
62//! the complexities of this, such as zero-length matches.
63//!
64//! The simplest step-up from the basic search API `Regex::find` is
65//! getting the captures relating to a match with the
66//! `Regex::captures` method. To find capture information for all
67//! matches within a string `Regex::find_iter` and
68//! `Regex::captures_iter` can be used. The former exposes the start
69//! and end of the match as `Regex::find` does, the latter exposes the
70//! whole capture group information as `Regex::captures` does.
71//!
72//! # The `std::pattern` API
73//!
74//! In addition to the main Oniguruma API it is possible to use the
75//! `Regex` object with the
76//! [`std::pattern`](https://doc.rust-lang.org/std/str/pattern/)
77//! API. To enable support compile with the `std-pattern` feature. If
78//! you're using Cargo you can do this by adding the following to your
79//! Cargo.toml:
80//!
81//! ```toml
82//! [dependencies.onig]
83//! version = "1.2"
84//! features = ["std-pattern"]
85//! ```
86
87#![cfg_attr(not(feature = "cargo-clippy"), allow(unknown_lints))]
88#![cfg_attr(feature = "std-pattern", feature(pattern))]
89#![deny(missing_docs)]
90
91use once_cell::sync::Lazy;
92
93mod buffers;
94mod find;
95mod flags;
96mod match_param;
97mod names;
98mod region;
99mod replace;
100mod syntax;
101mod tree;
102mod utils;
103
104#[cfg(feature = "std-pattern")]
105mod pattern;
106
107// re-export the onig types publically
108pub use crate::buffers::{EncodedBytes, EncodedChars};
109pub use crate::find::{
110 Captures, FindCaptures, FindMatches, RegexSplits, RegexSplitsN, SubCaptures, SubCapturesPos,
111};
112pub use crate::flags::*;
113pub use crate::match_param::MatchParam;
114pub use crate::region::Region;
115pub use crate::replace::Replacer;
116pub use crate::syntax::{MetaChar, Syntax};
117pub use crate::tree::{CaptureTreeNode, CaptureTreeNodeIter};
118pub use crate::utils::{copyright, define_user_property, version};
119
120use std::os::raw::c_int;
121use std::ptr::{null, null_mut};
122use std::sync::Mutex;
123use std::{error, fmt, str};
124
125#[derive(Debug)]
126enum ErrorData {
127 OnigError(c_int),
128 Custom,
129}
130
131/// This struture represents an error from the underlying Oniguruma libray.
132pub struct Error {
133 data: ErrorData,
134 description: String,
135}
136
137/// This struct is a wrapper around an Oniguruma regular expression
138/// pointer. This represents a compiled regex which can be used in
139/// search and match operations.
140#[derive(Debug, Eq, PartialEq)]
141pub struct Regex {
142 raw: onig_sys::OnigRegex,
143}
144
145unsafe impl Send for Regex {}
146unsafe impl Sync for Regex {}
147
148impl Error {
149 fn from_code_and_info(code: c_int, info: &onig_sys::OnigErrorInfo) -> Self {
150 Error::new(code, info)
151 }
152
153 fn from_code(code: c_int) -> Self {
154 Error::new(code, null())
155 }
156
157 fn custom<T: Into<String>>(message: T) -> Self {
158 Error {
159 data: ErrorData::Custom,
160 description: message.into(),
161 }
162 }
163
164 fn new(code: c_int, info: *const onig_sys::OnigErrorInfo) -> Self {
165 let buff = &mut [0; onig_sys::ONIG_MAX_ERROR_MESSAGE_LEN as usize];
166 let len = unsafe { onig_sys::onig_error_code_to_str(buff.as_mut_ptr(), code, info) };
167 let description = if let Ok(description) = str::from_utf8(&buff[..len as usize]) {
168 description
169 } else {
170 return Self::custom("Onig error string was invalid UTF-8");
171 };
172 Error {
173 data: ErrorData::OnigError(code),
174 description: description.to_owned(),
175 }
176 }
177
178 /// Return Oniguruma engine error code.
179 pub fn code(&self) -> i32 {
180 match self.data {
181 ErrorData::OnigError(code) => code,
182 _ => -1,
183 }
184 }
185
186 /// Return error description provided by Oniguruma engine.
187 pub fn description(&self) -> &str {
188 &self.description
189 }
190}
191
192impl error::Error for Error {
193 fn description(&self) -> &str {
194 &self.description
195 }
196}
197
198impl fmt::Display for Error {
199 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
200 write!(f, "Oniguruma error: {}", self.description())
201 }
202}
203
204impl fmt::Debug for Error {
205 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
206 write!(f, "Error({:?}, {})", self.data, self.description())
207 }
208}
209
210static REGEX_NEW_MUTEX: Lazy<Mutex<()>> = Lazy::new(|| Mutex::new(()));
211
212impl Regex {
213 /// Create a Regex
214 ///
215 /// Simple regular expression constructor. Compiles a new regular
216 /// expression with the default options using the ruby syntax.
217 /// Once compiled, it can be used repeatedly to search in a string. If an
218 /// invalid expression is given, then an error is returned.
219 ///
220 /// # Arguments
221 ///
222 /// * `pattern` - The regex pattern to compile
223 ///
224 /// # Examples
225 ///
226 /// ```
227 /// use onig::Regex;
228 /// let r = Regex::new(r#"hello (\w+)"#);
229 /// assert!(r.is_ok());
230 /// ```
231 pub fn new(pattern: &str) -> Result<Self, Error> {
232 Regex::with_encoding(pattern)
233 }
234
235 /// Create a Regex, Specifying an Encoding
236 ///
237 /// Attempts to compile `pattern` into a new `Regex`
238 /// instance. Instead of assuming UTF-8 as the encoding scheme the
239 /// encoding is inferred from the `pattern` buffer.
240 ///
241 /// # Arguments
242 ///
243 /// * `pattern` - The regex pattern to compile
244 ///
245 /// # Examples
246 ///
247 /// ```
248 /// use onig::{Regex, EncodedBytes};
249 /// let utf8 = Regex::with_encoding("hello");
250 /// assert!(utf8.is_ok());
251 /// let ascii = Regex::with_encoding(EncodedBytes::ascii(b"world"));
252 /// assert!(ascii.is_ok());
253 /// ```
254 pub fn with_encoding<T>(pattern: T) -> Result<Regex, Error>
255 where
256 T: EncodedChars,
257 {
258 Regex::with_options_and_encoding(
259 pattern,
260 RegexOptions::REGEX_OPTION_NONE,
261 Syntax::default(),
262 )
263 }
264
265 /// Create a new Regex
266 ///
267 /// Attempts to compile a pattern into a new `Regex` instance.
268 /// Once compiled, it can be used repeatedly to search in a string. If an
269 /// invalid expression is given, then an error is returned.
270 /// See [`onig_sys::onig_new`][regex_new] for more information.
271 ///
272 /// # Arguments
273 ///
274 /// * `pattern` - The regex pattern to compile.
275 /// * `options` - The regex compilation options.
276 /// * `syntax` - The syntax which the regex is written in.
277 ///
278 /// # Examples
279 ///
280 /// ```
281 /// use onig::{Regex, Syntax, RegexOptions};
282 /// let r = Regex::with_options("hello.*world",
283 /// RegexOptions::REGEX_OPTION_NONE,
284 /// Syntax::default());
285 /// assert!(r.is_ok());
286 /// ```
287 ///
288 /// [regex_new]: ./onig_sys/fn.onig_new.html
289 pub fn with_options(
290 pattern: &str,
291 option: RegexOptions,
292 syntax: &Syntax,
293 ) -> Result<Regex, Error> {
294 Regex::with_options_and_encoding(pattern, option, syntax)
295 }
296
297 /// Create a new Regex, Specifying Options and Ecoding
298 ///
299 /// Attempts to comile the given `pattern` into a new `Regex`
300 /// instance. Instead of assuming UTF-8 as the encoding scheme the
301 /// encoding is inferred from the `pattern` buffer. If the regex
302 /// fails to compile the returned `Error` value from
303 /// [`onig_new`][regex_new] contains more information.
304 ///
305 /// [regex_new]: ./onig_sys/fn.onig_new.html
306 ///
307 /// # Arguments
308 ///
309 /// * `pattern` - The regex pattern to compile.
310 /// * `options` - The regex compilation options.
311 /// * `syntax` - The syntax which the regex is written in.
312 ///
313 /// # Examples
314 /// ```
315 /// use onig::{Regex, Syntax, EncodedBytes, RegexOptions};
316 /// let pattern = EncodedBytes::ascii(b"hello");
317 /// let r = Regex::with_options_and_encoding(pattern,
318 /// RegexOptions::REGEX_OPTION_SINGLELINE,
319 /// Syntax::default());
320 /// assert!(r.is_ok());
321 /// ```
322 pub fn with_options_and_encoding<T>(
323 pattern: T,
324 option: RegexOptions,
325 syntax: &Syntax,
326 ) -> Result<Self, Error>
327 where
328 T: EncodedChars,
329 {
330 // Convert the rust types to those required for the call to
331 // `onig_new`.
332 let mut reg: onig_sys::OnigRegex = null_mut();
333 let reg_ptr = &mut reg as *mut onig_sys::OnigRegex;
334
335 // We can use this later to get an error message to pass back
336 // if regex creation fails.
337 let mut error = onig_sys::OnigErrorInfo {
338 enc: null_mut(),
339 par: null_mut(),
340 par_end: null_mut(),
341 };
342
343 let err = unsafe {
344 // Grab a lock to make sure that `onig_new` isn't called by
345 // more than one thread at a time.
346 let _guard = REGEX_NEW_MUTEX.lock().unwrap();
347 onig_sys::onig_new(
348 reg_ptr,
349 pattern.start_ptr(),
350 pattern.limit_ptr(),
351 option.bits(),
352 pattern.encoding(),
353 syntax as *const Syntax as *mut Syntax as *mut onig_sys::OnigSyntaxType,
354 &mut error,
355 )
356 };
357
358 if err == onig_sys::ONIG_NORMAL as i32 {
359 Ok(Regex { raw: reg })
360 } else {
361 Err(Error::from_code_and_info(err, &error))
362 }
363 }
364
365 /// Match String
366 ///
367 /// Try to match the regex against the given string slice,
368 /// starting at a given offset. This method works the same way as
369 /// `match_with_encoding`, but the encoding is always utf-8.
370 ///
371 /// For more information see [Match vs
372 /// Search](index.html#match-vs-search)
373 ///
374 /// # Arguments
375 ///
376 /// * `str` - The string slice to match against.
377 /// * `at` - The byte index in the passed slice to start matching
378 /// * `options` - The regex match options.
379 /// * `region` - The region for return group match range info
380 ///
381 /// # Returns
382 ///
383 /// `Some(len)` if the regex matched, with `len` being the number
384 /// of bytes matched. `None` if the regex doesn't match.
385 ///
386 /// # Examples
387 ///
388 /// ```
389 /// use onig::{Regex, SearchOptions};
390 ///
391 /// let r = Regex::new(".*").unwrap();
392 /// let res = r.match_with_options("hello", 0, SearchOptions::SEARCH_OPTION_NONE, None);
393 /// assert!(res.is_some()); // it matches
394 /// assert!(res.unwrap() == 5); // 5 characters matched
395 /// ```
396 pub fn match_with_options(
397 &self,
398 str: &str,
399 at: usize,
400 options: SearchOptions,
401 region: Option<&mut Region>,
402 ) -> Option<usize> {
403 self.match_with_encoding(str, at, options, region)
404 }
405
406 /// Match String with Encoding
407 ///
408 /// Match the regex against a string. This method will start at
409 /// the offset `at` into the string and try and match the
410 /// regex. If the regex matches then the return value is the
411 /// number of characters which matched. If the regex doesn't match
412 /// the return is `None`.
413 ///
414 /// For more information see [Match vs
415 /// Search](index.html#match-vs-search)
416 ///
417 /// The contents of `chars` must have the same encoding that was
418 /// used to construct the regex.
419 ///
420 /// # Arguments
421 ///
422 /// * `chars` - The buffer to match against.
423 /// * `at` - The byte index in the passed buffer to start matching
424 /// * `options` - The regex match options.
425 /// * `region` - The region for return group match range info
426 ///
427 /// # Returns
428 ///
429 /// `Some(len)` if the regex matched, with `len` being the number
430 /// of bytes matched. `None` if the regex doesn't match.
431 ///
432 /// # Examples
433 ///
434 /// ```
435 /// use onig::{Regex, EncodedBytes, SearchOptions};
436 ///
437 /// let r = Regex::with_encoding(EncodedBytes::ascii(b".*")).unwrap();
438 /// let res = r.match_with_encoding(EncodedBytes::ascii(b"world"),
439 /// 0, SearchOptions::SEARCH_OPTION_NONE, None);
440 /// assert!(res.is_some()); // it matches
441 /// assert!(res.unwrap() == 5); // 5 characters matched
442 /// ```
443 pub fn match_with_encoding<T>(
444 &self,
445 chars: T,
446 at: usize,
447 options: SearchOptions,
448 region: Option<&mut Region>,
449 ) -> Option<usize>
450 where
451 T: EncodedChars,
452 {
453 let match_param = MatchParam::default();
454 let result = self.match_with_param(chars, at, options, region, match_param);
455
456 match result {
457 Ok(r) => r,
458 Err(e) => panic!("Onig: Regex match error: {}", e.description()),
459 }
460 }
461
462 /// Match string with encoding and match param
463 ///
464 /// Match the regex against a string. This method will start at
465 /// the offset `at` into the string and try and match the
466 /// regex. If the regex matches then the return value is the
467 /// number of characters which matched. If the regex doesn't match
468 /// the return is `None`.
469 ///
470 /// For more information see [Match vs
471 /// Search](index.html#match-vs-search)
472 ///
473 /// The contents of `chars` must have the same encoding that was
474 /// used to construct the regex.
475 ///
476 /// # Arguments
477 ///
478 /// * `chars` - The buffer to match against.
479 /// * `at` - The byte index in the passed buffer to start matching
480 /// * `options` - The regex match options.
481 /// * `region` - The region for return group match range info
482 /// * `match_param` - The match parameters
483 ///
484 /// # Returns
485 ///
486 /// `Ok(Some(len))` if the regex matched, with `len` being the number
487 /// of bytes matched. `Ok(None)` if the regex doesn't match. `Err` with an
488 /// `Error` if an error occurred (e.g. retry-limit-in-match exceeded).
489 ///
490 /// # Examples
491 ///
492 /// ```
493 /// use onig::{Regex, EncodedBytes, MatchParam, SearchOptions};
494 ///
495 /// let r = Regex::with_encoding(EncodedBytes::ascii(b".*")).unwrap();
496 /// let res = r.match_with_param(EncodedBytes::ascii(b"world"),
497 /// 0, SearchOptions::SEARCH_OPTION_NONE,
498 /// None, MatchParam::default());
499 /// assert!(res.is_ok()); // matching did not error
500 /// assert!(res.unwrap() == Some(5)); // 5 characters matched
501 /// ```
502 pub fn match_with_param<T>(
503 &self,
504 chars: T,
505 at: usize,
506 options: SearchOptions,
507 region: Option<&mut Region>,
508 match_param: MatchParam,
509 ) -> Result<Option<usize>, Error>
510 where
511 T: EncodedChars,
512 {
513 if chars.encoding() != self.encoding() {
514 return Err(Error::custom(format!(
515 "Regex encoding does not match haystack encoding ({0:?}, {1:?})",
516 chars.encoding(),
517 self.encoding()
518 )));
519 }
520 let r = unsafe {
521 let offset = chars.start_ptr().add(at);
522 if offset > chars.limit_ptr() {
523 return Err(Error::custom(format!("Offset {} is too large", at)));
524 }
525 onig_sys::onig_match_with_param(
526 self.raw,
527 chars.start_ptr(),
528 chars.limit_ptr(),
529 offset,
530 match region {
531 Some(region) => region as *mut Region as *mut onig_sys::OnigRegion,
532 None => std::ptr::null_mut(),
533 },
534 options.bits(),
535 match_param.as_raw(),
536 )
537 };
538
539 if r >= 0 {
540 Ok(Some(r as usize))
541 } else if r == onig_sys::ONIG_MISMATCH {
542 Ok(None)
543 } else {
544 Err(Error::from_code(r))
545 }
546 }
547
548 /// Search pattern in string
549 ///
550 /// Search for matches the regex in a string. This method will return the
551 /// index of the first match of the regex within the string, if
552 /// there is one. If `from` is less than `to`, then search is performed
553 /// in forward order, otherwise – in backward order.
554 ///
555 /// For more information see [Match vs
556 /// Search](index.html#match-vs-search)
557 ///
558 /// # Arguments
559 ///
560 /// * `str` - The string to search in.
561 /// * `from` - The byte index in the passed slice to start search
562 /// * `to` - The byte index in the passed slice to finish search
563 /// * `options` - The options for the search.
564 /// * `region` - The region for return group match range info
565 ///
566 /// # Returns
567 ///
568 /// `Some(pos)` if the regex matches, where `pos` is the
569 /// byte-position of the start of the match. `None` if the regex
570 /// doesn't match anywhere in `str`.
571 ///
572 /// # Examples
573 ///
574 /// ```
575 /// use onig::{Regex, SearchOptions};
576 ///
577 /// let r = Regex::new("l{1,2}").unwrap();
578 /// let res = r.search_with_options("hello", 0, 5, SearchOptions::SEARCH_OPTION_NONE, None);
579 /// assert!(res.is_some()); // it matches
580 /// assert!(res.unwrap() == 2); // match starts at character 3
581 /// ```
582 pub fn search_with_options(
583 &self,
584 str: &str,
585 from: usize,
586 to: usize,
587 options: SearchOptions,
588 region: Option<&mut Region>,
589 ) -> Option<usize> {
590 self.search_with_encoding(str, from, to, options, region)
591 }
592
593 /// Search for a Pattern in a String with an Encoding
594 ///
595 /// Search for matches the regex in a string. This method will
596 /// return the index of the first match of the regex within the
597 /// string, if there is one. If `from` is less than `to`, then
598 /// search is performed in forward order, otherwise – in backward
599 /// order.
600 ///
601 /// For more information see [Match vs
602 /// Search](index.html#match-vs-search)
603 ///
604 /// The encoding of the buffer passed to search in must match the
605 /// encoding of the regex.
606 ///
607 /// # Arguments
608 ///
609 /// * `chars` - The character buffer to search in.
610 /// * `from` - The byte index in the passed slice to start search
611 /// * `to` - The byte index in the passed slice to finish search
612 /// * `options` - The options for the search.
613 /// * `region` - The region for return group match range info
614 ///
615 /// # Returns
616 ///
617 /// `Some(pos)` if the regex matches, where `pos` is the
618 /// byte-position of the start of the match. `None` if the regex
619 /// doesn't match anywhere in `chars`.
620 ///
621 /// # Examples
622 ///
623 /// ```
624 /// use onig::{Regex, EncodedBytes, SearchOptions};
625 ///
626 /// let r = Regex::with_encoding(EncodedBytes::ascii(b"l{1,2}")).unwrap();
627 /// let res = r.search_with_encoding(EncodedBytes::ascii(b"hello"),
628 /// 0, 5, SearchOptions::SEARCH_OPTION_NONE, None);
629 /// assert!(res.is_some()); // it matches
630 /// assert!(res.unwrap() == 2); // match starts at character 3
631 /// ```
632 pub fn search_with_encoding<T>(
633 &self,
634 chars: T,
635 from: usize,
636 to: usize,
637 options: SearchOptions,
638 region: Option<&mut Region>,
639 ) -> Option<usize>
640 where
641 T: EncodedChars,
642 {
643 let match_param = MatchParam::default();
644 let result = self.search_with_param(chars, from, to, options, region, match_param);
645
646 match result {
647 Ok(r) => r,
648 Err(e) => panic!("Onig: Regex search error: {}", e.description()),
649 }
650 }
651
652 /// Search pattern in string with encoding and match param
653 ///
654 /// Search for matches the regex in a string. This method will
655 /// return the index of the first match of the regex within the
656 /// string, if there is one. If `from` is less than `to`, then
657 /// search is performed in forward order, otherwise – in backward
658 /// order.
659 ///
660 /// For more information see [Match vs
661 /// Search](index.html#match-vs-search)
662 ///
663 /// The encoding of the buffer passed to search in must match the
664 /// encoding of the regex.
665 ///
666 /// # Arguments
667 ///
668 /// * `chars` - The character buffer to search in.
669 /// * `from` - The byte index in the passed slice to start search
670 /// * `to` - The byte index in the passed slice to finish search
671 /// * `options` - The options for the search.
672 /// * `region` - The region for return group match range info
673 /// * `match_param` - The match parameters
674 ///
675 /// # Returns
676 ///
677 /// `Ok(Some(pos))` if the regex matches, where `pos` is the
678 /// byte-position of the start of the match. `Ok(None)` if the regex
679 /// doesn't match anywhere in `chars`. `Err` with an `Error` if an error
680 /// occurred (e.g. retry-limit-in-match exceeded).
681 ///
682 /// # Examples
683 ///
684 /// ```
685 /// use onig::{Regex, EncodedBytes, MatchParam, SearchOptions};
686 ///
687 /// let r = Regex::with_encoding(EncodedBytes::ascii(b"l{1,2}")).unwrap();
688 /// let res = r.search_with_param(EncodedBytes::ascii(b"hello"),
689 /// 0, 5, SearchOptions::SEARCH_OPTION_NONE,
690 /// None, MatchParam::default());
691 /// assert!(res.is_ok()); // matching did not error
692 /// assert!(res.unwrap() == Some(2)); // match starts at character 3
693 /// ```
694 pub fn search_with_param<T>(
695 &self,
696 chars: T,
697 from: usize,
698 to: usize,
699 options: SearchOptions,
700 region: Option<&mut Region>,
701 match_param: MatchParam,
702 ) -> Result<Option<usize>, Error>
703 where
704 T: EncodedChars,
705 {
706 let (beg, end) = (chars.start_ptr(), chars.limit_ptr());
707 if chars.encoding() != self.encoding() {
708 return Err(Error::custom(format!(
709 "Regex encoding does not match haystack encoding ({0:?}, {1:?})",
710 chars.encoding(),
711 self.encoding()
712 )));
713 }
714 let r = unsafe {
715 let start = beg.add(from);
716 let range = beg.add(to);
717 if start > end {
718 return Err(Error::custom("Start of match should be before end"));
719 }
720 if range > end {
721 return Err(Error::custom("Limit of match should be before end"));
722 }
723 onig_sys::onig_search_with_param(
724 self.raw,
725 beg,
726 end,
727 start,
728 range,
729 match region {
730 Some(region) => region as *mut Region as *mut onig_sys::OnigRegion,
731 None => std::ptr::null_mut(),
732 },
733 options.bits(),
734 match_param.as_raw(),
735 )
736 };
737
738 if r >= 0 {
739 Ok(Some(r as usize))
740 } else if r == onig_sys::ONIG_MISMATCH {
741 Ok(None)
742 } else {
743 Err(Error::from_code(r))
744 }
745 }
746
747 /// Returns true if and only if the regex matches the string given.
748 ///
749 /// For more information see [Match vs
750 /// Search](index.html#match-vs-search)
751 ///
752 /// # Arguments
753 /// * `text` - The string slice to test against the pattern.
754 ///
755 /// # Returns
756 ///
757 /// `true` if the pattern matches the whole of `text`, `false` otherwise.
758 pub fn is_match(&self, text: &str) -> bool {
759 self.match_with_options(text, 0, SearchOptions::SEARCH_OPTION_NONE, None)
760 .map(|r| r == text.len())
761 .unwrap_or(false)
762 }
763
764 /// Find a Match in a Buffer, With Encoding
765 ///
766 /// Finds the first match of the regular expression within the
767 /// buffer.
768 ///
769 /// Note that this should only be used if you want to discover the
770 /// position of the match within a string. Testing if a pattern
771 /// matches the whole string is faster if you use `is_match`. For
772 /// more information see [Match vs
773 /// Search](index.html#match-vs-search)
774 ///
775 /// # Arguments
776 /// * `text` - The text to search in.
777 ///
778 /// # Returns
779 ///
780 /// The offset of the start and end of the first match. If no
781 /// match exists `None` is returned.
782 pub fn find(&self, text: &str) -> Option<(usize, usize)> {
783 self.find_with_encoding(text)
784 }
785
786 /// Find a Match in a Buffer, With Encoding
787 ///
788 /// Finds the first match of the regular expression within the
789 /// buffer.
790 ///
791 /// For more information see [Match vs
792 /// Search](index.html#match-vs-search)
793 ///
794 /// # Arguments
795 /// * `text` - The text to search in.
796 ///
797 /// # Returns
798 ///
799 /// The offset of the start and end of the first match. If no
800 /// match exists `None` is returned.
801 pub fn find_with_encoding<T>(&self, text: T) -> Option<(usize, usize)>
802 where
803 T: EncodedChars,
804 {
805 let mut region = Region::new();
806 let len = text.len();
807 self.search_with_encoding(
808 text,
809 0,
810 len,
811 SearchOptions::SEARCH_OPTION_NONE,
812 Some(&mut region),
813 )
814 .and_then(|_| region.pos(0))
815 }
816
817 /// Get the Encoding of the Regex
818 ///
819 /// # Returns
820 ///
821 /// Returns a reference to an oniguruma encoding which was used
822 /// when this regex was created.
823 pub fn encoding(&self) -> onig_sys::OnigEncoding {
824 unsafe { onig_sys::onig_get_encoding(self.raw) }
825 }
826
827 /// Get the Number of Capture Groups in this Pattern
828 pub fn captures_len(&self) -> usize {
829 unsafe { onig_sys::onig_number_of_captures(self.raw) as usize }
830 }
831
832 /// Get the Size of the Capture Histories for this Pattern
833 pub fn capture_histories_len(&self) -> usize {
834 unsafe { onig_sys::onig_number_of_capture_histories(self.raw) as usize }
835 }
836}
837
838impl Drop for Regex {
839 fn drop(&mut self) {
840 unsafe {
841 onig_sys::onig_free(self.raw);
842 }
843 }
844}
845
846#[cfg(test)]
847mod tests {
848 use super::*;
849 use std::panic;
850
851 #[test]
852 fn test_regex_create() {
853 Regex::with_options(".*", RegexOptions::REGEX_OPTION_NONE, Syntax::default()).unwrap();
854
855 Regex::new(r#"a \w+ word"#).unwrap();
856 }
857
858 #[test]
859 fn test_regex_invalid() {
860 let e = Regex::new("\\p{foo}").unwrap_err();
861 assert_eq!(e.code(), -223);
862 assert_eq!(e.description(), "invalid character property name {foo}");
863 }
864
865 #[test]
866 fn test_failed_match() {
867 let regex = Regex::new("foo").unwrap();
868 let res = regex.match_with_options("bar", 0, SearchOptions::SEARCH_OPTION_NONE, None);
869 assert!(res.is_none());
870 }
871
872 #[test]
873 fn test_regex_search_with_options() {
874 let mut region = Region::new();
875 let regex = Regex::new("e(l+)").unwrap();
876
877 let r = regex.search_with_options(
878 "hello",
879 0,
880 5,
881 SearchOptions::SEARCH_OPTION_NONE,
882 Some(&mut region),
883 );
884
885 assert!(region.tree().is_none());
886 assert_eq!(r, Some(1));
887 assert_eq!(region.len(), 2);
888 let pos1 = region.pos(0).unwrap();
889 let pos2 = region.pos(1).unwrap();
890 assert_eq!(pos1, (1, 4));
891 assert_eq!(pos2, (2, 4));
892
893 // test cloning here since we already have a filled region
894 let cloned_region = region.clone();
895 let pos1_clone = cloned_region.pos(0).unwrap();
896 assert_eq!(pos1_clone, pos1);
897 }
898
899 #[test]
900 fn test_regex_match_with_options() {
901 let mut region = Region::new();
902 let regex = Regex::new("he(l+)").unwrap();
903
904 let r = regex.match_with_options(
905 "hello",
906 0,
907 SearchOptions::SEARCH_OPTION_NONE,
908 Some(&mut region),
909 );
910
911 assert!(region.tree().is_none());
912 assert_eq!(r, Some(4));
913 assert_eq!(region.len(), 2);
914 let pos1 = region.pos(0).unwrap();
915 let pos2 = region.pos(1).unwrap();
916 assert_eq!(pos1, (0, 4));
917 assert_eq!(pos2, (2, 4));
918 }
919
920 #[test]
921 fn test_regex_is_match() {
922 let regex = Regex::new("he(l+)o").unwrap();
923 assert!(regex.is_match("hello"));
924 assert!(!regex.is_match("hello 2.0"));
925 }
926
927 #[test]
928 fn test_regex_find() {
929 let regex = Regex::new("he(l+)o").unwrap();
930 assert_eq!(regex.find("hey, hello!"), Some((5, 10)));
931 assert_eq!(regex.find("hey, honey!"), None);
932 }
933
934 #[test]
935 fn test_regex_captures_len() {
936 let regex = Regex::new("(he)(l+)(o)").unwrap();
937 assert_eq!(regex.captures_len(), 3);
938 }
939
940 #[test]
941 fn test_regex_error_is_match() {
942 let regex = Regex::new("(a|b|ab)*bc").unwrap();
943 let result = regex.match_with_param(
944 "ababababababababababababababababababababababababababababacbc",
945 0,
946 SearchOptions::SEARCH_OPTION_NONE,
947 None,
948 MatchParam::default(),
949 );
950
951 let e = result.err().unwrap();
952 assert_eq!("retry-limit-in-match over", e.description());
953 }
954
955 #[test]
956 fn test_regex_panic_is_match() {
957 let regex = Regex::new("(a|b|ab)*bc").unwrap();
958 let result = panic::catch_unwind(|| {
959 regex.is_match("ababababababababababababababababababababababababababababacbc")
960 });
961 let e = result.err().unwrap();
962 let message = e.downcast_ref::<String>().unwrap();
963 assert_eq!(
964 message.as_str(),
965 "Onig: Regex match error: retry-limit-in-match over"
966 );
967 }
968
969 #[test]
970 fn test_regex_error_find() {
971 let regex = Regex::new("(a|b|ab)*bc").unwrap();
972 let s = "ababababababababababababababababababababababababababababacbc";
973 let result = regex.search_with_param(
974 s,
975 0,
976 s.len(),
977 SearchOptions::SEARCH_OPTION_NONE,
978 None,
979 MatchParam::default(),
980 );
981
982 let e = result.err().unwrap();
983 assert_eq!("retry-limit-in-match over", e.description());
984 }
985
986 #[test]
987 fn test_regex_panic_find() {
988 let regex = Regex::new("(a|b|ab)*bc").unwrap();
989 let result = panic::catch_unwind(|| {
990 regex.find("ababababababababababababababababababababababababababababacbc")
991 });
992 let e = result.err().unwrap();
993 let message = e.downcast_ref::<String>().unwrap();
994 assert_eq!(
995 message.as_str(),
996 "Onig: Regex search error: retry-limit-in-match over"
997 );
998 }
999
1000 #[test]
1001 fn test_search_with_invalid_range() {
1002 let regex = Regex::with_options("R...", RegexOptions::REGEX_OPTION_NONE, Syntax::default())
1003 .expect("regex");
1004 let string = "Ruby";
1005 let is_match = regex.search_with_param(
1006 string,
1007 5,
1008 string.len(),
1009 SearchOptions::SEARCH_OPTION_NONE,
1010 None,
1011 MatchParam::default(),
1012 );
1013 assert!(is_match.is_err());
1014
1015 let is_match = regex.search_with_param(
1016 string,
1017 2,
1018 string.len() + 1,
1019 SearchOptions::SEARCH_OPTION_NONE,
1020 None,
1021 MatchParam::default(),
1022 );
1023 assert!(is_match.is_err());
1024 }
1025
1026 #[test]
1027 fn test_search_with_invalid_range_panic() {
1028 let regex = Regex::with_options("R...", RegexOptions::REGEX_OPTION_NONE, Syntax::default())
1029 .expect("regex");
1030 let string = "Ruby";
1031 let is_match = panic::catch_unwind(|| {
1032 regex.search_with_encoding(
1033 string,
1034 5,
1035 string.len(),
1036 SearchOptions::SEARCH_OPTION_NONE,
1037 None,
1038 )
1039 });
1040 assert!(is_match.is_err());
1041 }
1042
1043 #[test]
1044 fn test_match_with_invalid_range() {
1045 let regex = Regex::with_options("R...", RegexOptions::REGEX_OPTION_NONE, Syntax::default())
1046 .expect("regex");
1047 let string = "Ruby";
1048 let is_match = regex.match_with_param(
1049 string,
1050 5,
1051 SearchOptions::SEARCH_OPTION_NONE,
1052 None,
1053 MatchParam::default(),
1054 );
1055 assert!(is_match.is_err());
1056 }
1057
1058 #[test]
1059 fn test_match_with_invalid_range_panic() {
1060 let regex = Regex::with_options("R...", RegexOptions::REGEX_OPTION_NONE, Syntax::default())
1061 .expect("regex");
1062 let string = "Ruby";
1063 let is_match = panic::catch_unwind(|| {
1064 regex.match_with_encoding(string, 5, SearchOptions::SEARCH_OPTION_NONE, None)
1065 });
1066 assert!(is_match.is_err());
1067 }
1068}