spinoso_symbol/casecmp/unicode.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
//! Unicode case folding comparisons for byte content resolved from `Symbol`s.
use core::str;
use artichoke_core::intern::Intern;
use focaccia::CaseFold;
/// Compare the byte contents of two symbols using Unicode case-folding
/// comparison for equality.
///
/// The byte slice associated with each symbol is resolved via the given
/// interner. Unresolved symbols are compared as if they resolve to `&[]`.
///
/// This comparison function attempts to convert each symbol's byte content to a
/// UTF-8 [`str`](prim@str). If both symbols resolve to UTF-8 contents, [Unicode
/// case folding] is used when comparing the contents and this function returns
/// `Ok(Some(bool))`. If neither symbol resolves to UTF-8 contents, this
/// function falls back to [`ascii_casecmp`] and returns `Ok(Some(bool))`.
/// Otherwise, the two symbols have byte contents with different encodings and
/// `Ok(None)` is returned.
///
/// This function can be used to implement [`Symbol#casecmp?`] for the
/// [`Symbol`] type defined in Ruby Core.
///
/// # Errors
///
/// If the interner returns an error while retrieving a symbol, that error is
/// returned. See [`Intern::lookup_symbol`].
///
/// [Unicode case folding]: https://www.w3.org/International/wiki/Case_folding
/// [`ascii_casecmp`]: crate::casecmp::ascii_casecmp
/// [`Symbol#casecmp?`]: https://ruby-doc.org/core-3.1.2/Symbol.html#method-i-casecmp-3F
/// [`Symbol`]: https://ruby-doc.org/core-3.1.2/Symbol.html
#[inline]
#[cfg_attr(docsrs, doc(cfg(feature = "artichoke")))]
pub fn case_eq<T, U>(interner: &T, left: U, right: U, fold: CaseFold) -> Result<Option<bool>, T::Error>
where
T: Intern<Symbol = U>,
U: Copy,
{
let left = interner.lookup_symbol(left)?.unwrap_or_default();
let right = interner.lookup_symbol(right)?.unwrap_or_default();
let cmp = match (str::from_utf8(left), str::from_utf8(right)) {
// Both slices are UTF-8, compare with the given Unicode case folding
// scheme.
(Ok(left), Ok(right)) => fold.case_eq(left, right),
// Both slices are not UTF-8, fallback to ASCII comparator.
(Err(_), Err(_)) => focaccia::ascii_case_eq(left, right),
// Encoding mismatch, the bytes are not comparable using Unicode case
// folding.
//
// > `nil` is returned if the two symbols have incompatible encodings,
// > or if `other_symbol` is not a symbol.
// > <https://ruby-doc.org/core-3.1.2/Symbol.html#method-i-casecmp-3F>
(Ok(_), Err(_)) | (Err(_), Ok(_)) => return Ok(None),
};
Ok(Some(cmp))
}