intaglio/
bytes.rs

1//! Intern arbitrary bytes.
2//!
3//! This module provides a nearly identical API to the one found in the
4//! top-level of this crate. There is one important difference:
5//!
6//! 1. Interned contents are `&[u8]` instead of `&str`. Additionally, `Vec<u8>`
7//!    is used where `String` would have been used.
8//!
9//! # Example: intern byte string
10//!
11//! ```
12//! # use intaglio::bytes::SymbolTable;
13//! # fn example() -> Result<(), Box<dyn std::error::Error>> {
14//! let mut table = SymbolTable::new();
15//! let sym = table.intern(&b"abc"[..])?;
16//! assert_eq!(sym, table.intern(b"abc".to_vec())?);
17//! assert_eq!(Some(&b"abc"[..]), table.get(sym));
18//! # Ok(())
19//! # }
20//! # example().unwrap();
21//! ```
22//!
23//! # Example: symbol iterators
24//!
25//! ```
26//! # use std::collections::HashMap;
27//! # use intaglio::bytes::SymbolTable;
28//! # use intaglio::Symbol;
29//! # fn example() -> Result<(), Box<dyn std::error::Error>> {
30//! let mut table = SymbolTable::new();
31//! let sym = table.intern(&b"abc"[..])?;
32//! // Retrieve set of `Symbol`s.
33//! let all_symbols = table.all_symbols();
34//! assert_eq!(vec![sym], all_symbols.collect::<Vec<_>>());
35//!
36//! table.intern(&b"xyz"[..])?;
37//! let mut map = HashMap::new();
38//! map.insert(Symbol::new(0), &b"abc"[..]);
39//! map.insert(Symbol::new(1), &b"xyz"[..]);
40//! // Retrieve symbol to byte content mappings.
41//! let iter = table.iter();
42//! assert_eq!(map, iter.collect::<HashMap<_, _>>());
43//! # Ok(())
44//! # }
45//! # example().unwrap();
46//! ```
47//!
48//! # Performance
49//!
50//! In general, one should expect this crate's performance on `&[u8]` to be
51//! roughly similar to performance on `&str`.
52
53use core::hash::BuildHasher;
54use core::iter::{FromIterator, FusedIterator, Zip};
55use core::marker::PhantomData;
56use core::mem::ManuallyDrop;
57use core::ops::Range;
58use core::slice;
59use std::borrow::Cow;
60use std::collections::hash_map::{HashMap, RandomState};
61
62use crate::internal::Interned;
63use crate::{Symbol, SymbolOverflowError, DEFAULT_SYMBOL_TABLE_CAPACITY};
64
65/// An iterator over all [`Symbol`]s in a [`SymbolTable`].
66///
67/// See the [`all_symbols`](SymbolTable::all_symbols) method in [`SymbolTable`].
68///
69/// # Usage
70///
71/// ```
72/// # use intaglio::bytes::SymbolTable;
73/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
74/// let mut table = SymbolTable::new();
75/// let sym = table.intern(&b"abc"[..])?;
76/// let all_symbols = table.all_symbols();
77/// assert_eq!(vec![sym], all_symbols.collect::<Vec<_>>());
78/// # Ok(())
79/// # }
80/// # example().unwrap();
81/// ```
82#[derive(Debug, Clone, Hash, PartialEq, Eq)]
83#[cfg_attr(docsrs, doc(cfg(feature = "bytes")))]
84pub struct AllSymbols<'a> {
85    range: Range<usize>,
86    // Hold a shared reference to the underlying `SymbolTable` to ensure the
87    // table is not modified while we are iterating which would make the results
88    // not match the real state.
89    phantom: PhantomData<&'a SymbolTable>,
90}
91
92impl Iterator for AllSymbols<'_> {
93    type Item = Symbol;
94
95    fn next(&mut self) -> Option<Self::Item> {
96        let next = self.range.next()?;
97        debug_assert!(u32::try_from(next).is_ok());
98        Some((next as u32).into())
99    }
100
101    fn size_hint(&self) -> (usize, Option<usize>) {
102        self.range.size_hint()
103    }
104
105    fn count(self) -> usize {
106        self.range.count()
107    }
108
109    fn last(self) -> Option<Self::Item> {
110        let last = self.range.last()?;
111        debug_assert!(u32::try_from(last).is_ok());
112        Some((last as u32).into())
113    }
114
115    fn nth(&mut self, n: usize) -> Option<Self::Item> {
116        let nth = self.range.nth(n)?;
117        debug_assert!(u32::try_from(nth).is_ok());
118        Some((nth as u32).into())
119    }
120
121    fn collect<B: FromIterator<Self::Item>>(self) -> B {
122        self.range.map(|sym| Symbol::from(sym as u32)).collect()
123    }
124}
125
126impl DoubleEndedIterator for AllSymbols<'_> {
127    fn next_back(&mut self) -> Option<Self::Item> {
128        let next = self.range.next_back()?;
129        debug_assert!(u32::try_from(next).is_ok());
130        Some((next as u32).into())
131    }
132
133    fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
134        let nth = self.range.nth_back(n)?;
135        debug_assert!(u32::try_from(nth).is_ok());
136        Some((nth as u32).into())
137    }
138}
139
140impl FusedIterator for AllSymbols<'_> {}
141
142/// An iterator over all interned byte strings in a [`SymbolTable`].
143///
144/// See the [`bytestrings`](SymbolTable::bytestrings) method in [`SymbolTable`].
145///
146/// # Usage
147///
148/// ```
149/// # use intaglio::bytes::SymbolTable;
150/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
151/// let mut table = SymbolTable::new();
152/// let sym = table.intern(b"abc".to_vec())?;
153/// let bytestrings = table.bytestrings();
154/// assert_eq!(vec![&b"abc"[..]], bytestrings.collect::<Vec<_>>());
155/// # Ok(())
156/// # }
157/// # example().unwrap();
158/// ```
159#[derive(Debug, Clone)]
160#[cfg_attr(docsrs, doc(cfg(feature = "bytes")))]
161pub struct Bytestrings<'a>(slice::Iter<'a, Interned<[u8]>>);
162
163impl<'a> Iterator for Bytestrings<'a> {
164    type Item = &'a [u8];
165
166    fn next(&mut self) -> Option<Self::Item> {
167        self.0.next().map(Interned::as_slice)
168    }
169
170    fn size_hint(&self) -> (usize, Option<usize>) {
171        self.0.size_hint()
172    }
173
174    fn count(self) -> usize {
175        self.0.count()
176    }
177
178    fn last(self) -> Option<Self::Item> {
179        self.0.last().map(Interned::as_slice)
180    }
181
182    fn nth(&mut self, n: usize) -> Option<Self::Item> {
183        self.0.nth(n).map(Interned::as_slice)
184    }
185
186    fn collect<B: FromIterator<Self::Item>>(self) -> B {
187        self.0.map(Interned::as_slice).collect()
188    }
189}
190
191impl DoubleEndedIterator for Bytestrings<'_> {
192    fn next_back(&mut self) -> Option<Self::Item> {
193        self.0.next_back().map(Interned::as_slice)
194    }
195
196    fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
197        self.0.nth_back(n).map(Interned::as_slice)
198    }
199
200    fn rfold<B, F>(self, accum: B, f: F) -> B
201    where
202        F: FnMut(B, Self::Item) -> B,
203    {
204        self.0.map(Interned::as_slice).rfold(accum, f)
205    }
206}
207
208impl ExactSizeIterator for Bytestrings<'_> {
209    fn len(&self) -> usize {
210        self.0.len()
211    }
212}
213
214impl FusedIterator for Bytestrings<'_> {}
215
216/// An iterator over all symbols and interned byte strings in a [`SymbolTable`].
217///
218/// See the [`iter`](SymbolTable::iter) method in [`SymbolTable`].
219///
220/// # Usage
221///
222/// ```
223/// # use std::collections::HashMap;
224/// # use intaglio::bytes::SymbolTable;
225/// # use intaglio::Symbol;
226/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
227/// let mut table = SymbolTable::new();
228/// let sym = table.intern(b"abc".to_vec())?;
229/// let iter = table.iter();
230/// let mut map = HashMap::new();
231/// map.insert(Symbol::new(0), &b"abc"[..]);
232/// assert_eq!(map, iter.collect::<HashMap<_, _>>());
233/// # Ok(())
234/// # }
235/// # example().unwrap();
236/// ```
237#[derive(Debug, Clone)]
238#[cfg_attr(docsrs, doc(cfg(feature = "bytes")))]
239pub struct Iter<'a>(Zip<AllSymbols<'a>, Bytestrings<'a>>);
240
241impl<'a> Iterator for Iter<'a> {
242    type Item = (Symbol, &'a [u8]);
243
244    fn next(&mut self) -> Option<Self::Item> {
245        self.0.next()
246    }
247
248    fn size_hint(&self) -> (usize, Option<usize>) {
249        self.0.size_hint()
250    }
251
252    fn count(self) -> usize {
253        self.0.count()
254    }
255
256    fn last(self) -> Option<Self::Item> {
257        self.0.last()
258    }
259
260    fn nth(&mut self, n: usize) -> Option<Self::Item> {
261        self.0.nth(n)
262    }
263
264    fn collect<B: FromIterator<Self::Item>>(self) -> B {
265        self.0.collect()
266    }
267}
268
269impl FusedIterator for Iter<'_> {}
270
271impl<'a, S> IntoIterator for &'a SymbolTable<S> {
272    type Item = (Symbol, &'a [u8]);
273    type IntoIter = Iter<'a>;
274
275    fn into_iter(self) -> Self::IntoIter {
276        self.iter()
277    }
278}
279
280/// Byte string interner.
281///
282/// This symbol table is implemented by storing byte strings with a fast path for
283/// `&[u8]` that are already `'static`.
284///
285/// See module documentation for more.
286///
287/// # Usage
288///
289/// ```
290/// # use intaglio::bytes::SymbolTable;
291/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
292/// let mut table = SymbolTable::new();
293/// let sym = table.intern(&b"abc"[..])?;
294/// assert_eq!(sym, table.intern(b"abc".to_vec())?);
295/// assert!(table.contains(sym));
296/// assert!(table.is_interned(b"abc"));
297/// # Ok(())
298/// # }
299/// # example().unwrap();
300/// ```
301#[derive(Default, Debug)]
302#[cfg_attr(docsrs, doc(cfg(feature = "bytes")))]
303pub struct SymbolTable<S = RandomState> {
304    map: ManuallyDrop<HashMap<&'static [u8], Symbol, S>>,
305    vec: ManuallyDrop<Vec<Interned<[u8]>>>,
306}
307
308impl<S> Drop for SymbolTable<S> {
309    fn drop(&mut self) {
310        // SAFETY: No mutable references to `SymbolTable` internal fields are
311        // given out, which means `ManuallyDrop::drop` can only be invoked in
312        // this `Drop::drop` impl. Interal fields are guaranteed to be
313        // initialized by `SymbolTable` constructors.
314        unsafe {
315            // `Interned` requires that the `'static` references it gives out
316            // are dropped before the owning buffer stored in the `Interned`.
317            ManuallyDrop::drop(&mut self.map);
318            ManuallyDrop::drop(&mut self.vec);
319        }
320    }
321}
322
323impl SymbolTable<RandomState> {
324    /// Constructs a new, empty `SymbolTable` with [default capacity].
325    ///
326    /// This function will always allocate. To construct a symbol table without
327    /// allocating, call [`SymbolTable::with_capacity(0)`].
328    ///
329    /// # Examples
330    ///
331    /// ```
332    /// # use intaglio::bytes::SymbolTable;
333    /// let table = SymbolTable::new();
334    /// assert_eq!(0, table.len());
335    /// assert!(table.capacity() >= 4096);
336    /// ```
337    ///
338    /// [default capacity]: DEFAULT_SYMBOL_TABLE_CAPACITY
339    /// [`SymbolTable::with_capacity(0)`]: Self::with_capacity
340    #[must_use]
341    pub fn new() -> Self {
342        Self::with_capacity(DEFAULT_SYMBOL_TABLE_CAPACITY)
343    }
344
345    /// Constructs a new, empty `SymbolTable` with the specified capacity.
346    ///
347    /// The symbol table will be able to hold at least `capacity` byte strings
348    /// without reallocating. If `capacity` is 0, the symbol table will not
349    /// allocate.
350    ///
351    /// # Examples
352    ///
353    /// ```
354    /// # use intaglio::bytes::SymbolTable;
355    /// let table = SymbolTable::with_capacity(10);
356    /// assert_eq!(0, table.len());
357    /// assert!(table.capacity() >= 10);
358    /// ```
359    #[must_use]
360    pub fn with_capacity(capacity: usize) -> Self {
361        let capacity = capacity.next_power_of_two();
362        Self {
363            map: ManuallyDrop::new(HashMap::with_capacity(capacity)),
364            vec: ManuallyDrop::new(Vec::with_capacity(capacity)),
365        }
366    }
367}
368
369impl<S> SymbolTable<S> {
370    /// Constructs a new, empty `SymbolTable` with
371    /// [default capacity](DEFAULT_SYMBOL_TABLE_CAPACITY) and the given hash
372    /// builder.
373    ///
374    /// # Examples
375    ///
376    /// ```
377    /// # use std::collections::hash_map::RandomState;
378    /// # use intaglio::bytes::SymbolTable;
379    /// let hash_builder = RandomState::new();
380    /// let table = SymbolTable::with_hasher(hash_builder);
381    /// assert_eq!(0, table.len());
382    /// assert!(table.capacity() >= 4096);
383    /// ```
384    pub fn with_hasher(hash_builder: S) -> Self {
385        Self::with_capacity_and_hasher(DEFAULT_SYMBOL_TABLE_CAPACITY, hash_builder)
386    }
387
388    /// Constructs a new, empty `SymbolTable` with the specified capacity and
389    /// the given hash builder.
390    ///
391    /// # Examples
392    ///
393    /// ```
394    /// # use std::collections::hash_map::RandomState;
395    /// # use intaglio::bytes::SymbolTable;
396    /// let hash_builder = RandomState::new();
397    /// let table = SymbolTable::with_capacity_and_hasher(10, hash_builder);
398    /// assert_eq!(0, table.len());
399    /// assert!(table.capacity() >= 10);
400    /// ```
401    pub fn with_capacity_and_hasher(capacity: usize, hash_builder: S) -> Self {
402        Self {
403            map: ManuallyDrop::new(HashMap::with_capacity_and_hasher(capacity, hash_builder)),
404            vec: ManuallyDrop::new(Vec::with_capacity(capacity)),
405        }
406    }
407
408    /// Returns the number of byte strings the table can hold without
409    /// reallocating.
410    ///
411    /// # Examples
412    ///
413    /// ```
414    /// # use intaglio::bytes::SymbolTable;
415    /// let table = SymbolTable::with_capacity(10);
416    /// assert!(table.capacity() >= 10);
417    /// ```
418    pub fn capacity(&self) -> usize {
419        usize::min(self.vec.capacity(), self.map.capacity())
420    }
421
422    /// Returns the number of interned byte strings in the table.
423    ///
424    /// # Examples
425    ///
426    /// ```
427    /// # use intaglio::bytes::SymbolTable;
428    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
429    /// let mut table = SymbolTable::new();
430    /// assert_eq!(0, table.len());
431    ///
432    /// table.intern(b"abc".to_vec())?;
433    /// // only uniquely interned byte strings grow the symbol table.
434    /// table.intern(b"abc".to_vec())?;
435    /// table.intern(b"xyz".to_vec())?;
436    /// assert_eq!(2, table.len());
437    /// # Ok(())
438    /// # }
439    /// # example().unwrap();
440    /// ```
441    pub fn len(&self) -> usize {
442        self.vec.len()
443    }
444
445    /// Returns `true` if the symbol table contains no interned byte strings.
446    ///
447    /// # Examples
448    ///
449    /// ```
450    /// # use intaglio::bytes::SymbolTable;
451    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
452    /// let mut table = SymbolTable::new();
453    /// assert!(table.is_empty());
454    ///
455    /// table.intern(b"abc".to_vec())?;
456    /// assert!(!table.is_empty());
457    /// # Ok(())
458    /// # }
459    /// # example().unwrap();
460    /// ```
461    pub fn is_empty(&self) -> bool {
462        self.vec.is_empty()
463    }
464
465    /// Returns `true` if the symbol table contains the given symbol.
466    ///
467    /// # Examples
468    ///
469    /// ```
470    /// # use intaglio::bytes::SymbolTable;
471    /// # use intaglio::Symbol;
472    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
473    /// let mut table = SymbolTable::new();
474    /// assert!(!table.contains(Symbol::new(0)));
475    ///
476    /// let sym = table.intern(b"abc".to_vec())?;
477    /// assert!(table.contains(Symbol::new(0)));
478    /// assert!(table.contains(sym));
479    /// # Ok(())
480    /// # }
481    /// # example().unwrap();
482    /// ```
483    #[must_use]
484    pub fn contains(&self, id: Symbol) -> bool {
485        self.get(id).is_some()
486    }
487
488    /// Returns a reference to the byte string associated with the given symbol.
489    ///
490    /// If the given symbol does not exist in the underlying symbol table,
491    /// `None` is returned.
492    ///
493    /// The lifetime of the returned reference is bound to the symbol table.
494    ///
495    /// # Examples
496    ///
497    /// ```
498    /// # use intaglio::bytes::SymbolTable;
499    /// # use intaglio::Symbol;
500    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
501    /// let mut table = SymbolTable::new();
502    /// assert!(table.get(Symbol::new(0)).is_none());
503    ///
504    /// let sym = table.intern(b"abc".to_vec())?;
505    /// assert_eq!(Some(&b"abc"[..]), table.get(Symbol::new(0)));
506    /// assert_eq!(Some(&b"abc"[..]), table.get(sym));
507    /// # Ok(())
508    /// # }
509    /// # example().unwrap();
510    /// ```
511    #[must_use]
512    pub fn get(&self, id: Symbol) -> Option<&[u8]> {
513        let bytes = self.vec.get(usize::from(id))?;
514        Some(bytes.as_slice())
515    }
516
517    /// Returns an iterator over all [`Symbol`]s and byte strings in the
518    /// [`SymbolTable`].
519    ///
520    /// # Examples
521    ///
522    /// ```
523    /// # use std::collections::HashMap;
524    /// # use intaglio::bytes::SymbolTable;
525    /// # use intaglio::Symbol;
526    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
527    /// let mut table = SymbolTable::new();
528    /// table.intern(b"abc".to_vec())?;
529    /// table.intern(b"xyz".to_vec())?;
530    /// table.intern(b"123".to_vec())?;
531    /// table.intern(b"789".to_vec())?;
532    ///
533    /// let iter = table.iter();
534    /// let mut map = HashMap::new();
535    /// map.insert(Symbol::new(0), &b"abc"[..]);
536    /// map.insert(Symbol::new(1), &b"xyz"[..]);
537    /// map.insert(Symbol::new(2), &b"123"[..]);
538    /// map.insert(Symbol::new(3), &b"789"[..]);
539    /// assert_eq!(map, iter.collect::<HashMap<_, _>>());
540    /// # Ok(())
541    /// # }
542    /// # example().unwrap();
543    /// ```
544    ///
545    /// ```
546    /// # use intaglio::bytes::SymbolTable;
547    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
548    /// let mut table = SymbolTable::new();
549    /// table.intern(b"abc".to_vec())?;
550    /// table.intern(b"xyz".to_vec())?;
551    /// table.intern(b"123".to_vec())?;
552    /// table.intern(b"789".to_vec())?;
553    ///
554    /// let iter = table.iter();
555    /// assert_eq!(table.len(), iter.count());
556    /// # Ok(())
557    /// # }
558    /// # example().unwrap();
559    /// ```
560    pub fn iter(&self) -> Iter<'_> {
561        Iter(self.all_symbols().zip(self.bytestrings()))
562    }
563
564    /// Returns an iterator over all [`Symbol`]s in the [`SymbolTable`].
565    ///
566    /// # Examples
567    ///
568    /// ```
569    /// # use intaglio::bytes::SymbolTable;
570    /// # use intaglio::Symbol;
571    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
572    /// let mut table = SymbolTable::new();
573    /// table.intern(b"abc".to_vec())?;
574    /// table.intern(b"xyz".to_vec())?;
575    /// table.intern(b"123".to_vec())?;
576    /// table.intern(b"789".to_vec())?;
577    ///
578    /// let mut all_symbols = table.all_symbols();
579    /// assert_eq!(Some(Symbol::new(0)), all_symbols.next());
580    /// assert_eq!(Some(Symbol::new(1)), all_symbols.nth_back(2));
581    /// assert_eq!(None, all_symbols.next());
582    /// # Ok(())
583    /// # }
584    /// # example().unwrap();
585    /// ```
586    ///
587    /// ```
588    /// # use intaglio::bytes::SymbolTable;
589    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
590    /// let mut table = SymbolTable::new();
591    /// table.intern(b"abc".to_vec())?;
592    /// table.intern(b"xyz".to_vec())?;
593    /// table.intern(b"123".to_vec())?;
594    /// table.intern(b"789".to_vec())?;
595    ///
596    /// let all_symbols = table.all_symbols();
597    /// assert_eq!(table.len(), all_symbols.count());
598    /// # Ok(())
599    /// # }
600    /// # example().unwrap();
601    /// ```
602    pub fn all_symbols(&self) -> AllSymbols<'_> {
603        AllSymbols {
604            range: 0..self.len(),
605            phantom: PhantomData,
606        }
607    }
608
609    /// Returns an iterator over all byte strings in the [`SymbolTable`].
610    ///
611    /// # Examples
612    ///
613    /// ```
614    /// # use intaglio::bytes::SymbolTable;
615    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
616    /// let mut table = SymbolTable::new();
617    /// table.intern(b"abc".to_vec())?;
618    /// table.intern(b"xyz".to_vec())?;
619    /// table.intern(b"123".to_vec())?;
620    /// table.intern(b"789".to_vec())?;
621    ///
622    /// let mut bytestrings = table.bytestrings();
623    /// assert_eq!(Some(&b"abc"[..]), bytestrings.next());
624    /// assert_eq!(Some(&b"xyz"[..]), bytestrings.nth_back(2));
625    /// assert_eq!(None, bytestrings.next());
626    /// # Ok(())
627    /// # }
628    /// # example().unwrap();
629    /// ```
630    ///
631    /// ```
632    /// # use intaglio::bytes::SymbolTable;
633    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
634    /// let mut table = SymbolTable::new();
635    /// table.intern(b"abc".to_vec())?;
636    /// table.intern(b"xyz".to_vec())?;
637    /// table.intern(b"123".to_vec())?;
638    /// table.intern(b"789".to_vec())?;
639    ///
640    /// let bytestrings = table.bytestrings();
641    /// assert_eq!(table.len(), bytestrings.count());
642    /// # Ok(())
643    /// # }
644    /// # example().unwrap();
645    /// ```
646    pub fn bytestrings(&self) -> Bytestrings<'_> {
647        Bytestrings(self.vec.iter())
648    }
649}
650
651impl<S> SymbolTable<S>
652where
653    S: BuildHasher,
654{
655    /// Intern a byte string for the lifetime of the symbol table.
656    ///
657    /// The returned `Symbol` allows retrieving of the underlying bytes.
658    /// Equal byte strings will be inserted into the symbol table exactly once.
659    ///
660    /// This function only allocates if the underlying symbol table has no
661    /// remaining capacity.
662    ///
663    /// # Errors
664    ///
665    /// If the symbol table would grow larger than `u32::MAX` interned
666    /// byte strings, the [`Symbol`] counter would overflow and a
667    /// [`SymbolOverflowError`] is returned.
668    ///
669    /// # Examples
670    ///
671    /// ```
672    /// # use intaglio::bytes::SymbolTable;
673    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
674    /// let mut table = SymbolTable::new();
675    /// let sym = table.intern(b"abc".to_vec())?;
676    /// table.intern(b"xyz".to_vec())?;
677    /// table.intern(&b"123"[..])?;
678    /// table.intern(&b"789"[..])?;
679    ///
680    /// assert_eq!(4, table.len());
681    /// assert_eq!(Some(&b"abc"[..]), table.get(sym));
682    /// # Ok(())
683    /// # }
684    /// # example().unwrap();
685    /// ```
686    pub fn intern<T>(&mut self, contents: T) -> Result<Symbol, SymbolOverflowError>
687    where
688        T: Into<Cow<'static, [u8]>>,
689    {
690        let contents = contents.into();
691        if let Some(&id) = self.map.get(&*contents) {
692            return Ok(id);
693        }
694
695        // The `Interned::Owned` variant is derived from a `Box<T>`. When such
696        // a structure is moved or assigned, as it is below in the call to
697        // `self.vec.push`, the allocation is "retagged" in Miri/stacked borrows.
698        //
699        // Retagging an allocation pops all of the borrows derived from it off
700        // of the stack. This means we need to move the `Interned` into the
701        // `Vec` before calling `Interned::as_static_slice` to ensure the
702        // reference does not get invalidated by retagging.
703        //
704        // However, that alone may be insufficient as the `Interened` may be
705        // moved when the symbol table grows.
706        //
707        // The `SymbolTable` API prevents shared references to the `Interned`
708        // being invalidated by a retag by tying resolved symbol contents,
709        // `&'a T`, to `&'a SymbolTable`, which means the `SymbolTable` cannot
710        // grow, shrink, or otherwise reallocate/move contents while a reference
711        // to the `Interned`'s inner `T` is alive.
712        //
713        // To protect against future updates to stacked borrows or the unsafe
714        // code operational semantics, we can address this additional invariant
715        // with updated `Interned` internals which store the `Box<T>` in a raw
716        // pointer form, which allows moves to be treated as untyped copies.
717        //
718        // See:
719        //
720        // - <https://github.com/artichoke/intaglio/issues/235>
721        // - <https://github.com/artichoke/intaglio/pull/236>
722        let name = Interned::from(contents);
723        let id = Symbol::try_from(self.map.len())?;
724
725        // Move the `Interned` into the `Vec`, causing it to be retagged under
726        // stacked borrows, before taking any references to its inner `T`.
727        self.vec.push(name);
728        // Ensure we grow the map before we take any shared references to the
729        // inner `T`.
730        self.map.reserve(1);
731
732        // SAFETY: `self.vec` is non-empty because the preceding line of code
733        // pushed an entry into it.
734        let name = unsafe { self.vec.last().unwrap_unchecked() };
735
736        // SAFETY: This expression creates a reference with a `'static` lifetime
737        // from an owned and interned buffer, which is permissible because:
738        //
739        // - `Interned` is an internal implementation detail of `SymbolTable`.
740        // - `SymbolTable` never gives out `'static` references to underlying
741        //   contents.
742        // - All slice references given out by the `SymbolTable` have the same
743        //   lifetime as the `SymbolTable`.
744        // - The `map` field of `SymbolTable`, which contains the `'static`
745        //   references, is dropped before the owned buffers stored in this
746        //   `Interned`.
747        // - The shared reference may be derived from a `PinBox` which prevents
748        //   moves from retagging the underlying boxed `T` under stacked borrows.
749        // - The symbol table cannot grow, shrink, or otherwise move its contents
750        //   while this reference is alive.
751        let slice = unsafe { name.as_static_slice() };
752
753        self.map.insert(slice, id);
754
755        debug_assert_eq!(self.get(id), Some(slice));
756        debug_assert_eq!(self.intern(slice), Ok(id));
757
758        Ok(id)
759    }
760
761    /// Returns the `Symbol` identifier for `contents` if it has been interned
762    /// before, `None` otherwise.
763    ///
764    /// This method does not modify the symbol table.
765    ///
766    /// # Examples
767    ///
768    /// ```
769    /// # use intaglio::bytes::SymbolTable;
770    /// # use intaglio::Symbol;
771    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
772    /// let mut table = SymbolTable::new();
773    /// assert!(!table.is_interned(b"abc"));
774    /// assert_eq!(None, table.check_interned(b"abc"));
775    ///
776    /// table.intern(b"abc".to_vec())?;
777    /// assert!(table.is_interned(b"abc"));
778    /// assert_eq!(Some(Symbol::new(0)), table.check_interned(b"abc"));
779    /// # Ok(())
780    /// # }
781    /// # example().unwrap();
782    /// ```
783    #[must_use]
784    pub fn check_interned(&self, contents: &[u8]) -> Option<Symbol> {
785        self.map.get(contents).copied()
786    }
787
788    /// Returns `true` if the given byte string has been interned before.
789    ///
790    /// This method does not modify the symbol table.
791    ///
792    /// # Examples
793    ///
794    /// ```
795    /// # use intaglio::bytes::SymbolTable;
796    /// # use intaglio::Symbol;
797    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
798    /// let mut table = SymbolTable::new();
799    /// assert!(!table.is_interned(b"abc"));
800    /// assert_eq!(None, table.check_interned(b"abc"));
801    ///
802    /// table.intern(b"abc".to_vec())?;
803    /// assert!(table.is_interned(b"abc"));
804    /// assert_eq!(Some(Symbol::new(0)), table.check_interned(b"abc"));
805    /// # Ok(())
806    /// # }
807    /// # example().unwrap();
808    /// ```
809    #[must_use]
810    pub fn is_interned(&self, contents: &[u8]) -> bool {
811        self.map.contains_key(contents)
812    }
813
814    /// Reserves capacity for at least additional more elements to be inserted
815    /// in the given `SymbolTable`. The collection may reserve more space to
816    /// avoid frequent reallocations. After calling reserve, capacity will be
817    /// greater than or equal to `self.len() + additional`. Does nothing if
818    /// capacity is already sufficient.
819    ///
820    /// # Panics
821    ///
822    /// Panics if the new capacity overflows `usize`.
823    ///
824    /// # Examples
825    ///
826    /// ```
827    /// # use intaglio::bytes::SymbolTable;
828    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
829    /// let mut table = SymbolTable::with_capacity(1);
830    /// table.intern(b"abc".to_vec())?;
831    /// table.reserve(10);
832    /// assert!(table.capacity() >= 11);
833    /// # Ok(())
834    /// # }
835    /// # example().unwrap();
836    /// ```
837    pub fn reserve(&mut self, additional: usize) {
838        self.map.reserve(additional);
839        self.vec.reserve(additional);
840    }
841
842    /// Shrinks the capacity of the symbol table as much as possible.
843    ///
844    /// It will drop down as close as possible to the length but the allocator
845    /// may still inform the symbol table that there is space for a few more
846    /// elements.
847    ///
848    /// # Examples
849    ///
850    /// ```
851    /// # use intaglio::bytes::SymbolTable;
852    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
853    /// let mut table = SymbolTable::with_capacity(10);
854    /// table.intern(b"abc".to_vec());
855    /// table.intern(b"xyz".to_vec());
856    /// table.intern(b"123".to_vec());
857    /// table.shrink_to_fit();
858    /// assert!(table.capacity() >= 3);
859    /// # Ok(())
860    /// # }
861    /// # example().unwrap();
862    /// ```
863    pub fn shrink_to_fit(&mut self) {
864        self.map.shrink_to_fit();
865        self.vec.shrink_to_fit();
866    }
867
868    /// Shrinks the capacity of the symbol table with a lower bound.
869    ///
870    /// The capacity will remain at least as large as both the length and the
871    /// supplied value.
872    ///
873    /// If the current capacity is less than the lower limit, this is a no-op.
874    ///
875    /// # Examples
876    ///
877    /// ```
878    /// # use intaglio::bytes::SymbolTable;
879    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
880    /// let mut table = SymbolTable::with_capacity(10);
881    /// table.intern(b"abc".to_vec())?;
882    /// table.intern(b"xyz".to_vec())?;
883    /// table.intern(b"123".to_vec())?;
884    /// table.shrink_to(5);
885    /// assert!(table.capacity() >= 5);
886    /// # Ok(())
887    /// # }
888    /// # example().unwrap();
889    /// ```
890    pub fn shrink_to(&mut self, min_capacity: usize) {
891        self.map.shrink_to(min_capacity);
892        self.vec.shrink_to(min_capacity);
893    }
894}
895
896#[cfg(test)]
897#[allow(clippy::needless_pass_by_value)]
898mod tests {
899    use quickcheck::quickcheck;
900
901    use super::SymbolTable;
902
903    #[test]
904    fn alloc_drop_new() {
905        let table = SymbolTable::new();
906        drop(table);
907    }
908
909    #[test]
910    fn alloc_drop_with_capacity() {
911        let table = SymbolTable::with_capacity(1 << 14);
912        drop(table);
913    }
914
915    #[test]
916    fn drop_with_true_static_data() {
917        let mut table = SymbolTable::new();
918        table.intern(&b"1"[..]).unwrap();
919        table.intern(&b"2"[..]).unwrap();
920        table.intern(&b"3"[..]).unwrap();
921        table.intern(&b"4"[..]).unwrap();
922        table.intern(&b"5"[..]).unwrap();
923        drop(table);
924    }
925
926    #[test]
927    fn drop_with_owned_data() {
928        let mut table = SymbolTable::new();
929        table.intern(b"1".to_vec()).unwrap();
930        table.intern(b"2".to_vec()).unwrap();
931        table.intern(b"3".to_vec()).unwrap();
932        table.intern(b"4".to_vec()).unwrap();
933        table.intern(b"5".to_vec()).unwrap();
934        drop(table);
935    }
936
937    #[test]
938    fn set_owned_value_and_get_with_owned_and_borrowed() {
939        let mut table = SymbolTable::new();
940        // intern an owned value
941        let sym = table.intern(b"abc".to_vec()).unwrap();
942        // retrieve bytes
943        assert_eq!(&b"abc"[..], table.get(sym).unwrap());
944        // intern owned value again
945        assert_eq!(sym, table.intern(b"abc".to_vec()).unwrap());
946        // intern borrowed value
947        assert_eq!(sym, table.intern(&b"abc"[..]).unwrap());
948    }
949
950    #[test]
951    fn set_borrowed_value_and_get_with_owned_and_borrowed() {
952        let mut table = SymbolTable::new();
953        // intern a borrowed value
954        let sym = table.intern(&b"abc"[..]).unwrap();
955        // retrieve bytes
956        assert_eq!(&b"abc"[..], table.get(sym).unwrap());
957        // intern owned value
958        assert_eq!(sym, table.intern(b"abc".to_vec()).unwrap());
959        // intern borrowed value again
960        assert_eq!(sym, table.intern(&b"abc"[..]).unwrap());
961    }
962
963    quickcheck! {
964        fn intern_twice_symbol_equality(bytes: Vec<u8>) -> bool {
965            let mut table = SymbolTable::new();
966            let sym = table.intern(bytes.clone()).unwrap();
967            let sym_again = table.intern(bytes).unwrap();
968            sym == sym_again
969        }
970
971        fn intern_get_roundtrip(bytes: Vec<u8>) -> bool {
972            let mut table = SymbolTable::new();
973            let sym = table.intern(bytes.clone()).unwrap();
974            let retrieved_bytes = table.get(sym).unwrap();
975            bytes == retrieved_bytes
976        }
977
978        fn table_contains_sym(bytes: Vec<u8>) -> bool {
979            let mut table = SymbolTable::new();
980            let sym = table.intern(bytes).unwrap();
981            table.contains(sym)
982        }
983
984        fn table_does_not_contain_missing_symbol_ids(sym: u32) -> bool {
985            let table = SymbolTable::new();
986            !table.contains(sym.into())
987        }
988
989        fn empty_table_does_not_report_any_interned_byte_strings(bytes: Vec<u8>) -> bool {
990            let table = SymbolTable::new();
991            !table.is_interned(bytes.as_slice())
992        }
993
994        fn table_reports_interned_byte_strings_as_interned(bytes: Vec<u8>) -> bool {
995            let mut table = SymbolTable::new();
996            table.intern(bytes.clone()).unwrap();
997            table.is_interned(bytes.as_slice())
998        }
999    }
1000}