spinoso_symbol/
lib.rs

1#![warn(clippy::all, clippy::pedantic, clippy::undocumented_unsafe_blocks)]
2#![allow(
3    clippy::let_underscore_untyped,
4    reason = "https://github.com/rust-lang/rust-clippy/pull/10442#issuecomment-1516570154"
5)]
6#![allow(
7    clippy::question_mark,
8    reason = "https://github.com/rust-lang/rust-clippy/issues/8281"
9)]
10#![allow(clippy::manual_let_else, reason = "manual_let_else was very buggy on release")]
11#![allow(clippy::missing_errors_doc, reason = "A lot of existing code fails this lint")]
12#![allow(
13    clippy::module_name_repetitions,
14    reason = "incompatible with how code is organized in private modules"
15)]
16#![allow(
17    clippy::unnecessary_lazy_evaluations,
18    reason = "https://github.com/rust-lang/rust-clippy/issues/8109"
19)]
20#![cfg_attr(
21    test,
22    allow(clippy::non_ascii_literal, reason = "tests sometimes require UTF-8 string content")
23)]
24#![allow(unknown_lints)]
25#![warn(
26    missing_copy_implementations,
27    missing_debug_implementations,
28    missing_docs,
29    rust_2024_compatibility,
30    trivial_casts,
31    trivial_numeric_casts,
32    unused_qualifications,
33    variant_size_differences
34)]
35// Enable feature callouts in generated documentation:
36// https://doc.rust-lang.org/beta/unstable-book/language-features/doc-cfg.html
37//
38// This approach is borrowed from tokio.
39#![cfg_attr(docsrs, feature(doc_cfg))]
40#![cfg_attr(docsrs, feature(doc_alias))]
41
42//! Identifier for interned byte strings and routines for manipulating the
43//! underlying byte strings.
44//!
45//! `Symbol` is a `Copy` type based on `u32`. `Symbol` is cheap to copy, store,
46//! and compare. It is suitable for representing indexes into a string interner.
47//!
48//! # Artichoke integration
49//!
50//! This crate has an `artichoke` Cargo feature. When this feature is active,
51//! this crate implements [the `Symbol` API from Ruby Core]. These APIs require
52//! resolving the underlying bytes associated with the `Symbol` via a type that
53//! implements `Intern` from `artichoke-core`.
54//!
55//! APIs that require this feature to be active are highlighted in the
56//! documentation.
57//!
58//! This crate provides an `AllSymbols` iterator for walking all symbols stored
59//! in an [`Intern`]er and an extension trait for constructing it which is
60//! suitable for implementing [`Symbol::all_symbols`] from Ruby Core.
61//!
62//! This crate provides an `Inspect` iterator for converting `Symbol` byte
63//! content to a debug representation suitable for implementing
64//! [`Symbol#inspect`] from Ruby Core.
65//!
66//! # `no_std`
67//!
68//! This crate is `no_std` compatible when built without the `std` feature. This
69//! crate does not depend on [`alloc`].
70//!
71//! # Crate features
72//!
73//! All features are enabled by default.
74//!
75//! - **artichoke** - Enables additional methods, functions, and types for
76//!   implementing APIs from Ruby Core. Dropping this feature removes the
77//!   `artichoke-core` and `focaccia` dependencies. Activating this feature also
78//!   activates the **inspect** feature.
79//! - **inspect** - Enables an iterator for generating debug output of a symbol
80//!   byte string. Activating this feature also activates the **ident-parser**
81//!   feature.
82//! - **ident-parser** - Enables a parser to determine the Ruby identifier type,
83//!   if any, for a byte string. Dropping this feature removes the `bstr` and
84//!   `scolapasta-string-escape` dependencies.
85//! - **std** - Enables a dependency on the Rust Standard Library. Activating
86//!   this feature enables [`std::error::Error`] impls on error types in this
87//!   crate.
88//!
89//! [the `Symbol` API from Ruby Core]: https://ruby-doc.org/core-3.1.2/Symbol.html
90//! [`Symbol::all_symbols`]: https://ruby-doc.org/core-3.1.2/Symbol.html#method-c-all_symbols
91//! [`Symbol#inspect`]: https://ruby-doc.org/core-3.1.2/Symbol.html#method-i-inspect
92//! [`alloc`]: https://doc.rust-lang.org/alloc/
93//! [`std::error::Error`]: https://doc.rust-lang.org/std/error/trait.Error.html
94
95#![no_std]
96
97// Ensure code blocks in `README.md` compile
98#[cfg(doctest)]
99#[doc = include_str!("../README.md")]
100mod readme {}
101
102#[cfg(any(feature = "std", test, doctest))]
103extern crate std;
104
105use core::fmt;
106use core::num::TryFromIntError;
107
108#[cfg(feature = "artichoke")]
109use artichoke_core::intern::Intern;
110#[doc(inline)]
111#[cfg(feature = "artichoke")]
112#[cfg_attr(docsrs, doc(cfg(feature = "artichoke")))]
113pub use focaccia::{CaseFold, NoSuchCaseFoldingScheme};
114
115#[cfg(feature = "artichoke")]
116mod all_symbols;
117#[cfg(feature = "artichoke")]
118mod casecmp;
119mod convert;
120mod eq;
121#[cfg(feature = "ident-parser")]
122mod ident;
123#[cfg(feature = "inspect")]
124mod inspect;
125
126#[cfg(test)]
127#[expect(clippy::needless_raw_string_hashes, reason = "generated test data")]
128mod fixtures;
129
130#[cfg(feature = "artichoke")]
131pub use all_symbols::{AllSymbols, InternerAllSymbols};
132#[cfg(feature = "artichoke")]
133pub use casecmp::{ascii_casecmp, unicode_case_eq};
134#[cfg(feature = "ident-parser")]
135pub use ident::{IdentifierType, ParseIdentifierError};
136#[cfg(feature = "inspect")]
137pub use inspect::Inspect;
138
139/// Error returned when a symbol identifier overflows.
140///
141/// Spinoso symbol uses `u32` identifiers for symbols to save space. If more
142/// than `u32::MAX` symbols are stored in the underlying table, no more
143/// identifiers can be generated.
144#[derive(Default, Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
145pub struct SymbolOverflowError {
146    _private: (),
147}
148
149impl SymbolOverflowError {
150    /// The maximum identifier of a `Symbol`.
151    pub const MAX_IDENTIFIER: usize = u32::MAX as usize;
152
153    /// Construct a new, default `SymbolOverflowError`.
154    #[inline]
155    #[must_use]
156    pub const fn new() -> Self {
157        Self { _private: () }
158    }
159}
160
161impl From<TryFromIntError> for SymbolOverflowError {
162    #[inline]
163    fn from(_err: TryFromIntError) -> Self {
164        Self::new()
165    }
166}
167
168impl fmt::Display for SymbolOverflowError {
169    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
170        f.write_str("Symbol overflow")
171    }
172}
173
174impl core::error::Error for SymbolOverflowError {}
175
176/// Identifier bound to an interned byte string.
177///
178/// A `Symbol` allows retrieving a reference to the original interned
179/// byte string. Equivalent `Symbol`s will resolve to an identical byte string.
180///
181/// `Symbol`s are based on a `u32` index. They are cheap to compare and cheap to
182/// copy.
183///
184/// `Symbol`s are not constrained to the interner which created them.
185#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
186pub struct Symbol(u32);
187
188impl Symbol {
189    /// Construct a new `Symbol` from the given `u32`.
190    ///
191    /// `Symbol`s constructed manually may fail to resolve to an underlying
192    /// byte string.
193    ///
194    /// `Symbol`s are not constrained to the interner which created them.
195    /// No runtime checks ensure that the underlying interner is called with a
196    /// `Symbol` that the interner itself issued.
197    ///
198    /// # Examples
199    ///
200    /// ```
201    /// # use spinoso_symbol::Symbol;
202    /// let sym = Symbol::new(263);
203    /// assert_eq!(sym.id(), 263);
204    /// ```
205    #[inline]
206    #[must_use]
207    pub const fn new(id: u32) -> Self {
208        Self(id)
209    }
210
211    /// Return the `u32` identifier from this `Symbol`.
212    ///
213    /// # Examples
214    ///
215    /// ```
216    /// # use spinoso_symbol::Symbol;
217    /// let sym = Symbol::new(263);
218    /// assert_eq!(sym.id(), 263);
219    /// assert_eq!(u32::from(sym), 263);
220    /// ```
221    #[inline]
222    #[must_use]
223    pub const fn id(self) -> u32 {
224        self.0
225    }
226
227    /// Returns whether the symbol is the empty byte slice `b""` in the
228    /// underlying interner.
229    ///
230    /// If there symbol does not exist in the underlying interner or there is an
231    /// error looking up the symbol in the underlying interner, `true` is
232    /// returned.
233    #[inline]
234    #[must_use]
235    #[cfg(feature = "artichoke")]
236    #[cfg_attr(docsrs, doc(cfg(feature = "artichoke")))]
237    pub fn is_empty<T, U>(self, interner: &T) -> bool
238    where
239        T: Intern<Symbol = U>,
240        U: Copy + From<Symbol>,
241    {
242        if let Ok(Some(bytes)) = interner.lookup_symbol(self.into()) {
243            bytes.is_empty()
244        } else {
245            true
246        }
247    }
248
249    /// Returns the length of the byte slice associated with the symbol in the
250    /// underlying interner.
251    ///
252    /// If there symbol does not exist in the underlying interner or there is an
253    /// error looking up the symbol in the underlying interner, `0` is returned.
254    #[inline]
255    #[must_use]
256    #[cfg(feature = "artichoke")]
257    #[cfg_attr(docsrs, doc(cfg(feature = "artichoke")))]
258    pub fn len<T, U>(self, interner: &T) -> usize
259    where
260        T: Intern<Symbol = U>,
261        U: Copy + From<Symbol>,
262    {
263        if let Ok(Some(bytes)) = interner.lookup_symbol(self.into()) {
264            bytes.len()
265        } else {
266            0_usize
267        }
268    }
269
270    /// Returns the interned byte slice associated with the symbol in the
271    /// underlying interner.
272    ///
273    /// If there symbol does not exist in the underlying interner or there is an
274    /// error looking up the symbol in the underlying interner, `&[]` is
275    /// returned.
276    #[inline]
277    #[must_use]
278    #[cfg(feature = "artichoke")]
279    #[cfg_attr(docsrs, doc(cfg(feature = "artichoke")))]
280    pub fn bytes<T, U>(self, interner: &T) -> &[u8]
281    where
282        T: Intern<Symbol = U>,
283        U: Copy + From<Symbol>,
284    {
285        if let Ok(Some(bytes)) = interner.lookup_symbol(self.into()) {
286            bytes
287        } else {
288            &[]
289        }
290    }
291
292    /// Returns an iterator that yields a debug representation of the interned
293    /// byte slice associated with the symbol in the underlying interner.
294    ///
295    /// This iterator produces [`char`] sequences like `:spinoso` and
296    /// `:"invalid-\xFF-utf8"`.
297    ///
298    /// This function can be used to implement the Ruby method
299    /// [`Symbol#inspect`].
300    ///
301    /// If the symbol does not exist in the underlying interner or there is an
302    /// error looking up the symbol in the underlying interner, a default
303    /// iterator is returned.
304    ///
305    /// [`Symbol#inspect`]: https://ruby-doc.org/core-3.1.2/Symbol.html#method-i-inspect
306    #[inline]
307    #[cfg(feature = "artichoke")]
308    #[cfg_attr(docsrs, doc(cfg(feature = "artichoke")))]
309    pub fn inspect<T, U>(self, interner: &T) -> Inspect<'_>
310    where
311        T: Intern<Symbol = U>,
312        U: Copy + From<Symbol>,
313    {
314        if let Ok(Some(bytes)) = interner.lookup_symbol(self.into()) {
315            Inspect::from(bytes)
316        } else {
317            Inspect::default()
318        }
319    }
320}