intaglio/
lib.rs

1#![warn(clippy::all)]
2#![warn(clippy::pedantic)]
3#![warn(clippy::cargo)]
4#![warn(clippy::undocumented_unsafe_blocks)]
5#![allow(clippy::cast_possible_truncation)]
6#![allow(unknown_lints)]
7#![warn(missing_copy_implementations)]
8#![warn(missing_debug_implementations)]
9#![warn(missing_docs)]
10#![warn(rust_2018_idioms)]
11#![warn(trivial_casts, trivial_numeric_casts)]
12#![warn(unsafe_op_in_unsafe_fn)]
13#![warn(unused_qualifications)]
14#![warn(variant_size_differences)]
15// Enable feature callouts in generated documentation:
16// https://doc.rust-lang.org/beta/unstable-book/language-features/doc-cfg.html
17//
18// This approach is borrowed from tokio.
19#![cfg_attr(docsrs, feature(doc_cfg))]
20#![cfg_attr(docsrs, feature(doc_alias))]
21
22//! This crate provides a library for interning strings.
23//!
24//! The primary API is a symbol table. Its API is similar to a bimap in that
25//! symbols can resolve an underlying string and a string slice can retrieve
26//! its associated symbol.
27//!
28//! For more specific details on the API for interning strings into a symbol
29//! table, please see the documentation for the [`SymbolTable`] type.
30//!
31//! # Examples
32//!
33//! ```
34//! # use intaglio::SymbolTable;
35//! # fn example() -> Result<(), Box<dyn std::error::Error>> {
36//! let mut table = SymbolTable::new();
37//! let sym_id = table.intern("abc")?;
38//! assert_eq!(sym_id, table.intern("abc".to_string())?);
39//! assert!(table.contains(sym_id));
40//! assert!(table.is_interned("abc"));
41//! # Ok(())
42//! # }
43//! # example().unwrap();
44//! ```
45//!
46//! # String interning
47//!
48//! Intaglio `SymbolTable`s store at most one copy of a string. All requests to
49//! intern a string that is already present in the table, regardless of whether
50//! the string is an owned `String` or borrowed `&'static str`, will return the
51//! same immutable [`Symbol`].
52//!
53//! [`Symbol`]s are `u32` indexes into a `SymbolTable` that are cheap to
54//! compare, copy, store, and send.
55//!
56//! # Allocations
57//!
58//! `SymbolTable` exposes several constructors for tuning the initial allocated
59//! size of the table. It also exposes several APIs for tuning the table's
60//! memory usage such as [`SymbolTable::reserve`] and [`SymbolTable::shrink_to_fit`].
61//!
62//! [`SymbolTable::intern`] does not clone or copy interned strings. It takes
63//! ownership of the string contents with no additional allocations.
64//!
65//! # Types of Interners
66//!
67//! Intaglio includes multiple symbol tables which differ in the types of strings
68//! they allow you to intern.
69//!
70//! - [`SymbolTable`] interns UTF-8 strings: [`String`] and [`&str`](prim@str).
71#![cfg_attr(
72    feature = "bytes",
73    doc = "- [`bytes::SymbolTable`] interns binary strings: [`Vec<u8>`] and `&[u8]`."
74)]
75#![cfg_attr(
76    feature = "cstr",
77    doc = "- [`cstr::SymbolTable`] interns C strings: [`CString`] and [`&CStr`]."
78)]
79#![cfg_attr(
80    feature = "osstr",
81    doc = "- [`osstr::SymbolTable`] interns platform strings: [`OsString`] and [`&OsStr`]."
82)]
83#![cfg_attr(
84    feature = "path",
85    doc = "- [`path::SymbolTable`] interns path strings: [`PathBuf`] and [`&Path`]."
86)]
87//!
88//! # Crate features
89//!
90//! All features are enabled by default.
91//!
92//! - **bytes** - Enables an additional symbol table implementation for interning
93//!   byte strings ([`Vec<u8>`] and `&'static [u8]`).
94//! - **cstr** - Enables an additional symbol table implementation for interning
95//!   C strings ([`CString`] and [`&'static CStr`]).
96//! - **osstr** - Enables an additional symbol table implementation for interning
97//!   platform strings ([`OsString`] and [`&'static OsStr`]).
98//! - **path** - Enables an additional symbol table implementation for interning
99//!   path strings ([`PathBuf`] and [`&'static Path`]).
100//!
101//! [`Vec<u8>`]: std::vec::Vec
102//! [`CString`]: std::ffi::CString
103//! [`&CStr`]: std::ffi::CStr
104//! [`&'static CStr`]: std::ffi::CStr
105//! [`OsString`]: std::ffi::OsString
106//! [`&OsStr`]: std::ffi::OsStr
107//! [`&'static OsStr`]: std::ffi::OsStr
108//! [`PathBuf`]: std::path::PathBuf
109//! [`&Path`]: std::path::Path
110//! [`&'static Path`]: std::path::Path
111
112#![doc(html_root_url = "https://docs.rs/intaglio/1.10.0")]
113
114use core::fmt;
115use core::num::TryFromIntError;
116use std::error;
117
118macro_rules! const_assert {
119    ($x:expr $(,)?) => {
120        #[allow(unknown_lints, clippy::eq_op)]
121        const _: [(); 0 - !{
122            const ASSERT: bool = $x;
123            ASSERT
124        } as usize] = [];
125    };
126}
127
128#[cfg(feature = "bytes")]
129#[cfg_attr(docsrs, doc(cfg(feature = "bytes")))]
130pub mod bytes;
131mod convert;
132#[cfg(feature = "cstr")]
133#[cfg_attr(docsrs, doc(cfg(feature = "cstr")))]
134pub mod cstr;
135mod eq;
136mod internal;
137#[cfg(feature = "osstr")]
138#[cfg_attr(docsrs, doc(cfg(feature = "osstr")))]
139pub mod osstr;
140#[cfg(feature = "path")]
141#[cfg_attr(docsrs, doc(cfg(feature = "path")))]
142pub mod path;
143mod str;
144
145pub use crate::str::*;
146
147// To prevent overflows when indexing into the backing `Vec`, `intaglio`
148// requires `usize` to be at least as big as `u32`.
149const_assert!(usize::BITS >= u32::BITS);
150
151/// Default capacity for a new [`SymbolTable`] created with
152/// [`SymbolTable::new`].
153pub const DEFAULT_SYMBOL_TABLE_CAPACITY: usize = 4096;
154
155/// Error returned when a [`SymbolTable`] or symbol identifier overflows.
156///
157/// `SymbolTable` uses `u32` identifiers for symbols to save space. If more than
158/// `u32::MAX` symbols are stored in the table, no more identifiers can be
159/// generated. Any subsequent inserts into the table will fail with this error.
160#[derive(Default, Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
161pub struct SymbolOverflowError {
162    _private: (),
163}
164
165impl SymbolOverflowError {
166    /// Construct a new `SymbolOverflowError` with no source.
167    #[inline]
168    #[must_use]
169    pub const fn new() -> Self {
170        Self { _private: () }
171    }
172
173    /// Return the maximum capacity of the [`SymbolTable`] that returned this
174    /// error.
175    #[inline]
176    #[must_use]
177    #[allow(clippy::unused_self)]
178    pub const fn max_capacity(self) -> usize {
179        // The valid representation of `Symbol` is:
180        //
181        // ```
182        // Symbol(0_u32)..=Symbol(u32::MAX)
183        // ```
184        //
185        // The length of a range from `0..uX::MAX` is `uX::MAX + 1`.
186        //
187        // On 32-bit architectures, `usize` cannot hold `u32::MAX + 1`, but a
188        // `SymbolTable` will not be able to allocate that much anyway, so
189        // saturate and return `usize::MAX`.
190        let capa = u32::MAX as usize;
191        capa.saturating_add(1)
192    }
193}
194
195impl From<TryFromIntError> for SymbolOverflowError {
196    #[inline]
197    fn from(_err: TryFromIntError) -> Self {
198        Self::new()
199    }
200}
201
202impl fmt::Display for SymbolOverflowError {
203    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
204        f.write_str("Symbol overflow")
205    }
206}
207
208impl error::Error for SymbolOverflowError {}
209
210/// Identifier bound to an interned string.
211///
212/// [`SymbolTable`] is guaranteed to return an equivalent `Symbol` each time
213/// an equivalent string is interned.
214///
215/// A `Symbol` allows retrieving a reference to the original interned string.
216///
217/// `Symbol`s are based on a `u32` index.
218///
219/// `Symbol`s are not constrained to the `SymbolTable` which created them.  No
220/// runtime checks ensure that [`SymbolTable::get`] is called with a `Symbol`
221/// that the table itself issued.
222#[repr(transparent)]
223#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
224pub struct Symbol(u32);
225
226impl Symbol {
227    /// Construct a new `Symbol` from the given `u32`.
228    ///
229    /// `Symbol`s constructed outside a [`SymbolTable`] may fail to resolve to
230    /// an underlying string using [`SymbolTable::get`].
231    ///
232    /// `Symbol`s are not constrained to the `SymbolTable` which created them.
233    /// No runtime checks ensure that [`SymbolTable::get`] is called with a
234    /// `Symbol` that the table itself issued.
235    ///
236    /// # Examples
237    ///
238    /// ```
239    /// # use intaglio::Symbol;
240    /// let sym = Symbol::new(263);
241    /// assert_eq!(263, sym.id());
242    /// ```
243    #[inline]
244    #[must_use]
245    pub const fn new(sym: u32) -> Self {
246        Self(sym)
247    }
248
249    /// Return the `u32` identifier from this `Symbol`.
250    ///
251    /// # Examples
252    ///
253    /// ```
254    /// # use intaglio::SymbolTable;
255    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
256    /// let mut table = SymbolTable::new();
257    /// let sym = table.intern("intaglio")?;
258    /// assert_eq!(u32::from(sym), sym.id());
259    /// # Ok(())
260    /// # }
261    /// # example().unwrap();
262    /// ```
263    #[inline]
264    #[must_use]
265    pub const fn id(self) -> u32 {
266        self.0
267    }
268}
269
270#[cfg(test)]
271mod tests {
272    use core::cmp::Ordering;
273    use core::fmt::Write as _;
274    use core::hash::BuildHasher as _;
275    use core::marker::Unpin;
276    use core::panic::{RefUnwindSafe, UnwindSafe};
277    use std::collections::hash_map::RandomState;
278
279    use super::SymbolOverflowError;
280
281    #[test]
282    #[cfg(target_pointer_width = "64")]
283    fn max_capacity_is_length_of_symbol_range_usize_64_bit() {
284        let symbol_range = 0_u32..=u32::MAX;
285        let len = symbol_range.size_hint().0;
286        assert_eq!(SymbolOverflowError::new().max_capacity(), len);
287        let len = symbol_range.size_hint().1.unwrap();
288        assert_eq!(SymbolOverflowError::new().max_capacity(), len);
289    }
290
291    #[test]
292    #[cfg(target_pointer_width = "32")]
293    fn max_capacity_is_length_of_symbol_range_usize_32_bit() {
294        assert_eq!(SymbolOverflowError::new().max_capacity(), usize::MAX);
295    }
296
297    #[test]
298    fn error_display_is_not_empty() {
299        let tc = SymbolOverflowError::new();
300        let mut buf = String::new();
301        write!(&mut buf, "{tc}").unwrap();
302        assert!(!buf.is_empty());
303    }
304
305    #[test]
306    fn error_debug_is_not_empty() {
307        let tc = SymbolOverflowError::new();
308        let mut buf = String::new();
309        write!(&mut buf, "{tc:?}").unwrap();
310        assert!(!buf.is_empty());
311    }
312
313    #[test]
314    fn error_from_int_conversion_error() {
315        let try_from_int_error = i8::try_from(u8::MAX).unwrap_err();
316        let err = SymbolOverflowError::from(try_from_int_error);
317        assert_eq!(err, SymbolOverflowError::new());
318    }
319
320    #[test]
321    fn error_default_is_error_new() {
322        let default = SymbolOverflowError::default();
323        let new = SymbolOverflowError::new();
324        assert_eq!(default, new);
325    }
326
327    #[test]
328    fn error_clone_is_equal_to_self() {
329        let default = SymbolOverflowError::default();
330        #[allow(clippy::clone_on_copy)]
331        let clone = default.clone();
332        assert_eq!(default, clone);
333    }
334
335    #[test]
336    fn error_ord_is_equal_to_self() {
337        let default = SymbolOverflowError::default();
338        let new = SymbolOverflowError::new();
339        assert_eq!(default.cmp(&new), Ordering::Equal);
340        assert_eq!(new.cmp(&default), Ordering::Equal);
341    }
342
343    #[test]
344    fn error_hash_is_equal_to_self() {
345        let default = SymbolOverflowError::default();
346        let new = SymbolOverflowError::new();
347
348        let s = RandomState::new();
349        let default_hash = s.hash_one(default);
350        let new_hash = s.hash_one(new);
351
352        assert_eq!(default_hash, new_hash);
353    }
354
355    #[test]
356    fn auto_traits_are_implemented() {
357        fn constraint<T: RefUnwindSafe + Send + Sync + Unpin + UnwindSafe>(_table: T) {}
358
359        constraint(crate::SymbolTable::with_capacity(0));
360        #[cfg(feature = "bytes")]
361        constraint(crate::bytes::SymbolTable::with_capacity(0));
362        #[cfg(feature = "cstr")]
363        constraint(crate::cstr::SymbolTable::with_capacity(0));
364        #[cfg(feature = "osstr")]
365        constraint(crate::osstr::SymbolTable::with_capacity(0));
366        #[cfg(feature = "path")]
367        constraint(crate::path::SymbolTable::with_capacity(0));
368    }
369}
370
371// Ensure code blocks in `README.md` compile
372//
373// The README contains examples from all interners, so only run these doctests
374// when all features are enabled.
375//
376// This module declaration should be kept at the end of the file, in order to
377// not interfere with code coverage.
378#[cfg(all(
379    doctest,
380    feature = "bytes",
381    feature = "cstr",
382    feature = "osstr",
383    feature = "path"
384))]
385#[doc = include_str!("../README.md")]
386mod readme {}