spinoso_symbol/lib.rs
1#![warn(clippy::all, clippy::pedantic, clippy::undocumented_unsafe_blocks)]
2#![allow(
3 clippy::let_underscore_untyped,
4 reason = "https://github.com/rust-lang/rust-clippy/pull/10442#issuecomment-1516570154"
5)]
6#![allow(
7 clippy::question_mark,
8 reason = "https://github.com/rust-lang/rust-clippy/issues/8281"
9)]
10#![allow(clippy::manual_let_else, reason = "manual_let_else was very buggy on release")]
11#![allow(clippy::missing_errors_doc, reason = "A lot of existing code fails this lint")]
12#![allow(
13 clippy::module_name_repetitions,
14 reason = "incompatible with how code is organized in private modules"
15)]
16#![allow(
17 clippy::unnecessary_lazy_evaluations,
18 reason = "https://github.com/rust-lang/rust-clippy/issues/8109"
19)]
20#![cfg_attr(
21 test,
22 allow(clippy::non_ascii_literal, reason = "tests sometimes require UTF-8 string content")
23)]
24#![allow(unknown_lints)]
25#![warn(
26 missing_copy_implementations,
27 missing_debug_implementations,
28 missing_docs,
29 rust_2024_compatibility,
30 trivial_casts,
31 trivial_numeric_casts,
32 unused_qualifications,
33 variant_size_differences
34)]
35// Enable feature callouts in generated documentation:
36// https://doc.rust-lang.org/beta/unstable-book/language-features/doc-cfg.html
37//
38// This approach is borrowed from tokio.
39#![cfg_attr(docsrs, feature(doc_cfg))]
40#![cfg_attr(docsrs, feature(doc_alias))]
41
42//! Identifier for interned byte strings and routines for manipulating the
43//! underlying byte strings.
44//!
45//! `Symbol` is a `Copy` type based on `u32`. `Symbol` is cheap to copy, store,
46//! and compare. It is suitable for representing indexes into a string interner.
47//!
48//! # Artichoke integration
49//!
50//! This crate has an `artichoke` Cargo feature. When this feature is active,
51//! this crate implements [the `Symbol` API from Ruby Core]. These APIs require
52//! resolving the underlying bytes associated with the `Symbol` via a type that
53//! implements `Intern` from `artichoke-core`.
54//!
55//! APIs that require this feature to be active are highlighted in the
56//! documentation.
57//!
58//! This crate provides an `AllSymbols` iterator for walking all symbols stored
59//! in an [`Intern`]er and an extension trait for constructing it which is
60//! suitable for implementing [`Symbol::all_symbols`] from Ruby Core.
61//!
62//! This crate provides an `Inspect` iterator for converting `Symbol` byte
63//! content to a debug representation suitable for implementing
64//! [`Symbol#inspect`] from Ruby Core.
65//!
66//! # `no_std`
67//!
68//! This crate is `no_std` compatible when built without the `std` feature. This
69//! crate does not depend on [`alloc`].
70//!
71//! # Crate features
72//!
73//! All features are enabled by default.
74//!
75//! - **artichoke** - Enables additional methods, functions, and types for
76//! implementing APIs from Ruby Core. Dropping this feature removes the
77//! `artichoke-core` and `focaccia` dependencies. Activating this feature also
78//! activates the **inspect** feature.
79//! - **inspect** - Enables an iterator for generating debug output of a symbol
80//! byte string. Activating this feature also activates the **ident-parser**
81//! feature.
82//! - **ident-parser** - Enables a parser to determine the Ruby identifier type,
83//! if any, for a byte string. Dropping this feature removes the `bstr` and
84//! `scolapasta-string-escape` dependencies.
85//! - **std** - Enables a dependency on the Rust Standard Library. Activating
86//! this feature enables [`std::error::Error`] impls on error types in this
87//! crate.
88//!
89//! [the `Symbol` API from Ruby Core]: https://ruby-doc.org/core-3.1.2/Symbol.html
90//! [`Symbol::all_symbols`]: https://ruby-doc.org/core-3.1.2/Symbol.html#method-c-all_symbols
91//! [`Symbol#inspect`]: https://ruby-doc.org/core-3.1.2/Symbol.html#method-i-inspect
92//! [`alloc`]: https://doc.rust-lang.org/alloc/
93//! [`std::error::Error`]: https://doc.rust-lang.org/std/error/trait.Error.html
94
95#![no_std]
96
97// Ensure code blocks in `README.md` compile
98#[cfg(doctest)]
99#[doc = include_str!("../README.md")]
100mod readme {}
101
102#[cfg(any(feature = "std", test, doctest))]
103extern crate std;
104
105use core::fmt;
106use core::num::TryFromIntError;
107
108#[cfg(feature = "artichoke")]
109use artichoke_core::intern::Intern;
110#[doc(inline)]
111#[cfg(feature = "artichoke")]
112#[cfg_attr(docsrs, doc(cfg(feature = "artichoke")))]
113pub use focaccia::{CaseFold, NoSuchCaseFoldingScheme};
114
115#[cfg(feature = "artichoke")]
116mod all_symbols;
117#[cfg(feature = "artichoke")]
118mod casecmp;
119mod convert;
120mod eq;
121#[cfg(feature = "ident-parser")]
122mod ident;
123#[cfg(feature = "inspect")]
124mod inspect;
125
126#[cfg(test)]
127#[expect(clippy::needless_raw_string_hashes, reason = "generated test data")]
128mod fixtures;
129
130#[cfg(feature = "artichoke")]
131pub use all_symbols::{AllSymbols, InternerAllSymbols};
132#[cfg(feature = "artichoke")]
133pub use casecmp::{ascii_casecmp, unicode_case_eq};
134#[cfg(feature = "ident-parser")]
135pub use ident::{IdentifierType, ParseIdentifierError};
136#[cfg(feature = "inspect")]
137pub use inspect::Inspect;
138
139/// Error returned when a symbol identifier overflows.
140///
141/// Spinoso symbol uses `u32` identifiers for symbols to save space. If more
142/// than `u32::MAX` symbols are stored in the underlying table, no more
143/// identifiers can be generated.
144#[derive(Default, Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
145pub struct SymbolOverflowError {
146 _private: (),
147}
148
149impl SymbolOverflowError {
150 /// The maximum identifier of a `Symbol`.
151 pub const MAX_IDENTIFIER: usize = u32::MAX as usize;
152
153 /// Construct a new, default `SymbolOverflowError`.
154 #[inline]
155 #[must_use]
156 pub const fn new() -> Self {
157 Self { _private: () }
158 }
159}
160
161impl From<TryFromIntError> for SymbolOverflowError {
162 #[inline]
163 fn from(_err: TryFromIntError) -> Self {
164 Self::new()
165 }
166}
167
168impl fmt::Display for SymbolOverflowError {
169 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
170 f.write_str("Symbol overflow")
171 }
172}
173
174impl core::error::Error for SymbolOverflowError {}
175
176/// Identifier bound to an interned byte string.
177///
178/// A `Symbol` allows retrieving a reference to the original interned
179/// byte string. Equivalent `Symbol`s will resolve to an identical byte string.
180///
181/// `Symbol`s are based on a `u32` index. They are cheap to compare and cheap to
182/// copy.
183///
184/// `Symbol`s are not constrained to the interner which created them.
185#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
186pub struct Symbol(u32);
187
188impl Symbol {
189 /// Construct a new `Symbol` from the given `u32`.
190 ///
191 /// `Symbol`s constructed manually may fail to resolve to an underlying
192 /// byte string.
193 ///
194 /// `Symbol`s are not constrained to the interner which created them.
195 /// No runtime checks ensure that the underlying interner is called with a
196 /// `Symbol` that the interner itself issued.
197 ///
198 /// # Examples
199 ///
200 /// ```
201 /// # use spinoso_symbol::Symbol;
202 /// let sym = Symbol::new(263);
203 /// assert_eq!(sym.id(), 263);
204 /// ```
205 #[inline]
206 #[must_use]
207 pub const fn new(id: u32) -> Self {
208 Self(id)
209 }
210
211 /// Return the `u32` identifier from this `Symbol`.
212 ///
213 /// # Examples
214 ///
215 /// ```
216 /// # use spinoso_symbol::Symbol;
217 /// let sym = Symbol::new(263);
218 /// assert_eq!(sym.id(), 263);
219 /// assert_eq!(u32::from(sym), 263);
220 /// ```
221 #[inline]
222 #[must_use]
223 pub const fn id(self) -> u32 {
224 self.0
225 }
226
227 /// Returns whether the symbol is the empty byte slice `b""` in the
228 /// underlying interner.
229 ///
230 /// If there symbol does not exist in the underlying interner or there is an
231 /// error looking up the symbol in the underlying interner, `true` is
232 /// returned.
233 #[inline]
234 #[must_use]
235 #[cfg(feature = "artichoke")]
236 #[cfg_attr(docsrs, doc(cfg(feature = "artichoke")))]
237 pub fn is_empty<T, U>(self, interner: &T) -> bool
238 where
239 T: Intern<Symbol = U>,
240 U: Copy + From<Symbol>,
241 {
242 if let Ok(Some(bytes)) = interner.lookup_symbol(self.into()) {
243 bytes.is_empty()
244 } else {
245 true
246 }
247 }
248
249 /// Returns the length of the byte slice associated with the symbol in the
250 /// underlying interner.
251 ///
252 /// If there symbol does not exist in the underlying interner or there is an
253 /// error looking up the symbol in the underlying interner, `0` is returned.
254 #[inline]
255 #[must_use]
256 #[cfg(feature = "artichoke")]
257 #[cfg_attr(docsrs, doc(cfg(feature = "artichoke")))]
258 pub fn len<T, U>(self, interner: &T) -> usize
259 where
260 T: Intern<Symbol = U>,
261 U: Copy + From<Symbol>,
262 {
263 if let Ok(Some(bytes)) = interner.lookup_symbol(self.into()) {
264 bytes.len()
265 } else {
266 0_usize
267 }
268 }
269
270 /// Returns the interned byte slice associated with the symbol in the
271 /// underlying interner.
272 ///
273 /// If there symbol does not exist in the underlying interner or there is an
274 /// error looking up the symbol in the underlying interner, `&[]` is
275 /// returned.
276 #[inline]
277 #[must_use]
278 #[cfg(feature = "artichoke")]
279 #[cfg_attr(docsrs, doc(cfg(feature = "artichoke")))]
280 pub fn bytes<T, U>(self, interner: &T) -> &[u8]
281 where
282 T: Intern<Symbol = U>,
283 U: Copy + From<Symbol>,
284 {
285 if let Ok(Some(bytes)) = interner.lookup_symbol(self.into()) {
286 bytes
287 } else {
288 &[]
289 }
290 }
291
292 /// Returns an iterator that yields a debug representation of the interned
293 /// byte slice associated with the symbol in the underlying interner.
294 ///
295 /// This iterator produces [`char`] sequences like `:spinoso` and
296 /// `:"invalid-\xFF-utf8"`.
297 ///
298 /// This function can be used to implement the Ruby method
299 /// [`Symbol#inspect`].
300 ///
301 /// If the symbol does not exist in the underlying interner or there is an
302 /// error looking up the symbol in the underlying interner, a default
303 /// iterator is returned.
304 ///
305 /// [`Symbol#inspect`]: https://ruby-doc.org/core-3.1.2/Symbol.html#method-i-inspect
306 #[inline]
307 #[cfg(feature = "artichoke")]
308 #[cfg_attr(docsrs, doc(cfg(feature = "artichoke")))]
309 pub fn inspect<T, U>(self, interner: &T) -> Inspect<'_>
310 where
311 T: Intern<Symbol = U>,
312 U: Copy + From<Symbol>,
313 {
314 if let Ok(Some(bytes)) = interner.lookup_symbol(self.into()) {
315 Inspect::from(bytes)
316 } else {
317 Inspect::default()
318 }
319 }
320}