intaglio/lib.rs
1#![warn(clippy::all)]
2#![warn(clippy::pedantic)]
3#![warn(clippy::cargo)]
4#![warn(clippy::undocumented_unsafe_blocks)]
5#![allow(clippy::cast_possible_truncation)]
6#![allow(unknown_lints)]
7#![warn(missing_copy_implementations)]
8#![warn(missing_debug_implementations)]
9#![warn(missing_docs)]
10#![warn(rust_2018_idioms)]
11#![warn(trivial_casts, trivial_numeric_casts)]
12#![warn(unsafe_op_in_unsafe_fn)]
13#![warn(unused_qualifications)]
14#![warn(variant_size_differences)]
15// Enable feature callouts in generated documentation:
16// https://doc.rust-lang.org/beta/unstable-book/language-features/doc-cfg.html
17//
18// This approach is borrowed from tokio.
19#![cfg_attr(docsrs, feature(doc_cfg))]
20#![cfg_attr(docsrs, feature(doc_alias))]
21
22//! This crate provides a library for interning strings.
23//!
24//! The primary API is a symbol table. Its API is similar to a bimap in that
25//! symbols can resolve an underlying string and a string slice can retrieve
26//! its associated symbol.
27//!
28//! For more specific details on the API for interning strings into a symbol
29//! table, please see the documentation for the [`SymbolTable`] type.
30//!
31//! # Examples
32//!
33//! ```
34//! # use intaglio::SymbolTable;
35//! # fn example() -> Result<(), Box<dyn std::error::Error>> {
36//! let mut table = SymbolTable::new();
37//! let sym_id = table.intern("abc")?;
38//! assert_eq!(sym_id, table.intern("abc".to_string())?);
39//! assert!(table.contains(sym_id));
40//! assert!(table.is_interned("abc"));
41//! # Ok(())
42//! # }
43//! # example().unwrap();
44//! ```
45//!
46//! # String interning
47//!
48//! Intaglio `SymbolTable`s store at most one copy of a string. All requests to
49//! intern a string that is already present in the table, regardless of whether
50//! the string is an owned `String` or borrowed `&'static str`, will return the
51//! same immutable [`Symbol`].
52//!
53//! [`Symbol`]s are `u32` indexes into a `SymbolTable` that are cheap to
54//! compare, copy, store, and send.
55//!
56//! # Allocations
57//!
58//! `SymbolTable` exposes several constructors for tuning the initial allocated
59//! size of the table. It also exposes several APIs for tuning the table's
60//! memory usage such as [`SymbolTable::reserve`] and [`SymbolTable::shrink_to_fit`].
61//!
62//! [`SymbolTable::intern`] does not clone or copy interned strings. It takes
63//! ownership of the string contents with no additional allocations.
64//!
65//! # Types of Interners
66//!
67//! Intaglio includes multiple symbol tables which differ in the types of strings
68//! they allow you to intern.
69//!
70//! - [`SymbolTable`] interns UTF-8 strings: [`String`] and [`&str`](prim@str).
71#![cfg_attr(
72 feature = "bytes",
73 doc = "- [`bytes::SymbolTable`] interns binary strings: [`Vec<u8>`] and `&[u8]`."
74)]
75#![cfg_attr(
76 feature = "cstr",
77 doc = "- [`cstr::SymbolTable`] interns C strings: [`CString`] and [`&CStr`]."
78)]
79#![cfg_attr(
80 feature = "osstr",
81 doc = "- [`osstr::SymbolTable`] interns platform strings: [`OsString`] and [`&OsStr`]."
82)]
83#![cfg_attr(
84 feature = "path",
85 doc = "- [`path::SymbolTable`] interns path strings: [`PathBuf`] and [`&Path`]."
86)]
87//!
88//! # Crate features
89//!
90//! All features are enabled by default.
91//!
92//! - **bytes** - Enables an additional symbol table implementation for interning
93//! byte strings ([`Vec<u8>`] and `&'static [u8]`).
94//! - **cstr** - Enables an additional symbol table implementation for interning
95//! C strings ([`CString`] and [`&'static CStr`]).
96//! - **osstr** - Enables an additional symbol table implementation for interning
97//! platform strings ([`OsString`] and [`&'static OsStr`]).
98//! - **path** - Enables an additional symbol table implementation for interning
99//! path strings ([`PathBuf`] and [`&'static Path`]).
100//!
101//! [`Vec<u8>`]: std::vec::Vec
102//! [`CString`]: std::ffi::CString
103//! [`&CStr`]: std::ffi::CStr
104//! [`&'static CStr`]: std::ffi::CStr
105//! [`OsString`]: std::ffi::OsString
106//! [`&OsStr`]: std::ffi::OsStr
107//! [`&'static OsStr`]: std::ffi::OsStr
108//! [`PathBuf`]: std::path::PathBuf
109//! [`&Path`]: std::path::Path
110//! [`&'static Path`]: std::path::Path
111
112#![doc(html_root_url = "https://docs.rs/intaglio/1.10.0")]
113
114use core::fmt;
115use core::num::TryFromIntError;
116use std::error;
117
118macro_rules! const_assert {
119 ($x:expr $(,)?) => {
120 #[allow(unknown_lints, clippy::eq_op)]
121 const _: [(); 0 - !{
122 const ASSERT: bool = $x;
123 ASSERT
124 } as usize] = [];
125 };
126}
127
128#[cfg(feature = "bytes")]
129#[cfg_attr(docsrs, doc(cfg(feature = "bytes")))]
130pub mod bytes;
131mod convert;
132#[cfg(feature = "cstr")]
133#[cfg_attr(docsrs, doc(cfg(feature = "cstr")))]
134pub mod cstr;
135mod eq;
136mod internal;
137#[cfg(feature = "osstr")]
138#[cfg_attr(docsrs, doc(cfg(feature = "osstr")))]
139pub mod osstr;
140#[cfg(feature = "path")]
141#[cfg_attr(docsrs, doc(cfg(feature = "path")))]
142pub mod path;
143mod str;
144
145pub use crate::str::*;
146
147// To prevent overflows when indexing into the backing `Vec`, `intaglio`
148// requires `usize` to be at least as big as `u32`.
149const_assert!(usize::BITS >= u32::BITS);
150
151/// Default capacity for a new [`SymbolTable`] created with
152/// [`SymbolTable::new`].
153pub const DEFAULT_SYMBOL_TABLE_CAPACITY: usize = 4096;
154
155/// Error returned when a [`SymbolTable`] or symbol identifier overflows.
156///
157/// `SymbolTable` uses `u32` identifiers for symbols to save space. If more than
158/// `u32::MAX` symbols are stored in the table, no more identifiers can be
159/// generated. Any subsequent inserts into the table will fail with this error.
160#[derive(Default, Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
161pub struct SymbolOverflowError {
162 _private: (),
163}
164
165impl SymbolOverflowError {
166 /// Construct a new `SymbolOverflowError` with no source.
167 #[inline]
168 #[must_use]
169 pub const fn new() -> Self {
170 Self { _private: () }
171 }
172
173 /// Return the maximum capacity of the [`SymbolTable`] that returned this
174 /// error.
175 #[inline]
176 #[must_use]
177 #[allow(clippy::unused_self)]
178 pub const fn max_capacity(self) -> usize {
179 // The valid representation of `Symbol` is:
180 //
181 // ```
182 // Symbol(0_u32)..=Symbol(u32::MAX)
183 // ```
184 //
185 // The length of a range from `0..uX::MAX` is `uX::MAX + 1`.
186 //
187 // On 32-bit architectures, `usize` cannot hold `u32::MAX + 1`, but a
188 // `SymbolTable` will not be able to allocate that much anyway, so
189 // saturate and return `usize::MAX`.
190 let capa = u32::MAX as usize;
191 capa.saturating_add(1)
192 }
193}
194
195impl From<TryFromIntError> for SymbolOverflowError {
196 #[inline]
197 fn from(_err: TryFromIntError) -> Self {
198 Self::new()
199 }
200}
201
202impl fmt::Display for SymbolOverflowError {
203 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
204 f.write_str("Symbol overflow")
205 }
206}
207
208impl error::Error for SymbolOverflowError {}
209
210/// Identifier bound to an interned string.
211///
212/// [`SymbolTable`] is guaranteed to return an equivalent `Symbol` each time
213/// an equivalent string is interned.
214///
215/// A `Symbol` allows retrieving a reference to the original interned string.
216///
217/// `Symbol`s are based on a `u32` index.
218///
219/// `Symbol`s are not constrained to the `SymbolTable` which created them. No
220/// runtime checks ensure that [`SymbolTable::get`] is called with a `Symbol`
221/// that the table itself issued.
222#[repr(transparent)]
223#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
224pub struct Symbol(u32);
225
226impl Symbol {
227 /// Construct a new `Symbol` from the given `u32`.
228 ///
229 /// `Symbol`s constructed outside a [`SymbolTable`] may fail to resolve to
230 /// an underlying string using [`SymbolTable::get`].
231 ///
232 /// `Symbol`s are not constrained to the `SymbolTable` which created them.
233 /// No runtime checks ensure that [`SymbolTable::get`] is called with a
234 /// `Symbol` that the table itself issued.
235 ///
236 /// # Examples
237 ///
238 /// ```
239 /// # use intaglio::Symbol;
240 /// let sym = Symbol::new(263);
241 /// assert_eq!(263, sym.id());
242 /// ```
243 #[inline]
244 #[must_use]
245 pub const fn new(sym: u32) -> Self {
246 Self(sym)
247 }
248
249 /// Return the `u32` identifier from this `Symbol`.
250 ///
251 /// # Examples
252 ///
253 /// ```
254 /// # use intaglio::SymbolTable;
255 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
256 /// let mut table = SymbolTable::new();
257 /// let sym = table.intern("intaglio")?;
258 /// assert_eq!(u32::from(sym), sym.id());
259 /// # Ok(())
260 /// # }
261 /// # example().unwrap();
262 /// ```
263 #[inline]
264 #[must_use]
265 pub const fn id(self) -> u32 {
266 self.0
267 }
268}
269
270#[cfg(test)]
271mod tests {
272 use core::cmp::Ordering;
273 use core::fmt::Write as _;
274 use core::hash::BuildHasher as _;
275 use core::marker::Unpin;
276 use core::panic::{RefUnwindSafe, UnwindSafe};
277 use std::collections::hash_map::RandomState;
278
279 use super::SymbolOverflowError;
280
281 #[test]
282 #[cfg(target_pointer_width = "64")]
283 fn max_capacity_is_length_of_symbol_range_usize_64_bit() {
284 let symbol_range = 0_u32..=u32::MAX;
285 let len = symbol_range.size_hint().0;
286 assert_eq!(SymbolOverflowError::new().max_capacity(), len);
287 let len = symbol_range.size_hint().1.unwrap();
288 assert_eq!(SymbolOverflowError::new().max_capacity(), len);
289 }
290
291 #[test]
292 #[cfg(target_pointer_width = "32")]
293 fn max_capacity_is_length_of_symbol_range_usize_32_bit() {
294 assert_eq!(SymbolOverflowError::new().max_capacity(), usize::MAX);
295 }
296
297 #[test]
298 fn error_display_is_not_empty() {
299 let tc = SymbolOverflowError::new();
300 let mut buf = String::new();
301 write!(&mut buf, "{tc}").unwrap();
302 assert!(!buf.is_empty());
303 }
304
305 #[test]
306 fn error_debug_is_not_empty() {
307 let tc = SymbolOverflowError::new();
308 let mut buf = String::new();
309 write!(&mut buf, "{tc:?}").unwrap();
310 assert!(!buf.is_empty());
311 }
312
313 #[test]
314 fn error_from_int_conversion_error() {
315 let try_from_int_error = i8::try_from(u8::MAX).unwrap_err();
316 let err = SymbolOverflowError::from(try_from_int_error);
317 assert_eq!(err, SymbolOverflowError::new());
318 }
319
320 #[test]
321 fn error_default_is_error_new() {
322 let default = SymbolOverflowError::default();
323 let new = SymbolOverflowError::new();
324 assert_eq!(default, new);
325 }
326
327 #[test]
328 fn error_clone_is_equal_to_self() {
329 let default = SymbolOverflowError::default();
330 #[allow(clippy::clone_on_copy)]
331 let clone = default.clone();
332 assert_eq!(default, clone);
333 }
334
335 #[test]
336 fn error_ord_is_equal_to_self() {
337 let default = SymbolOverflowError::default();
338 let new = SymbolOverflowError::new();
339 assert_eq!(default.cmp(&new), Ordering::Equal);
340 assert_eq!(new.cmp(&default), Ordering::Equal);
341 }
342
343 #[test]
344 fn error_hash_is_equal_to_self() {
345 let default = SymbolOverflowError::default();
346 let new = SymbolOverflowError::new();
347
348 let s = RandomState::new();
349 let default_hash = s.hash_one(default);
350 let new_hash = s.hash_one(new);
351
352 assert_eq!(default_hash, new_hash);
353 }
354
355 #[test]
356 fn auto_traits_are_implemented() {
357 fn constraint<T: RefUnwindSafe + Send + Sync + Unpin + UnwindSafe>(_table: T) {}
358
359 constraint(crate::SymbolTable::with_capacity(0));
360 #[cfg(feature = "bytes")]
361 constraint(crate::bytes::SymbolTable::with_capacity(0));
362 #[cfg(feature = "cstr")]
363 constraint(crate::cstr::SymbolTable::with_capacity(0));
364 #[cfg(feature = "osstr")]
365 constraint(crate::osstr::SymbolTable::with_capacity(0));
366 #[cfg(feature = "path")]
367 constraint(crate::path::SymbolTable::with_capacity(0));
368 }
369}
370
371// Ensure code blocks in `README.md` compile
372//
373// The README contains examples from all interners, so only run these doctests
374// when all features are enabled.
375//
376// This module declaration should be kept at the end of the file, in order to
377// not interfere with code coverage.
378#[cfg(all(
379 doctest,
380 feature = "bytes",
381 feature = "cstr",
382 feature = "osstr",
383 feature = "path"
384))]
385#[doc = include_str!("../README.md")]
386mod readme {}