intaglio/
str.rs

1//! Intaglio interner for UTF-8 [`String`]s.
2
3use core::hash::BuildHasher;
4use core::iter::{FromIterator, FusedIterator, Zip};
5use core::marker::PhantomData;
6use core::mem::ManuallyDrop;
7use core::ops::Range;
8use core::slice;
9use std::borrow::Cow;
10use std::collections::hash_map::{HashMap, RandomState};
11
12use crate::internal::Interned;
13use crate::{Symbol, SymbolOverflowError, DEFAULT_SYMBOL_TABLE_CAPACITY};
14
15/// An iterator over all [`Symbol`]s in a [`SymbolTable`].
16///
17/// See the [`all_symbols`](SymbolTable::all_symbols) method in [`SymbolTable`].
18///
19/// # Usage
20///
21/// ```
22/// # use intaglio::SymbolTable;
23/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
24/// let mut table = SymbolTable::new();
25/// let sym = table.intern("abc")?;
26/// let all_symbols = table.all_symbols();
27/// assert_eq!(vec![sym], all_symbols.collect::<Vec<_>>());
28/// # Ok(())
29/// # }
30/// # example().unwrap();
31/// ```
32#[derive(Debug, Clone, Hash, PartialEq, Eq)]
33pub struct AllSymbols<'a> {
34    range: Range<usize>,
35    // Hold a shared reference to the underlying `SymbolTable` to ensure the
36    // table is not modified while we are iterating which would make the results
37    // not match the real state.
38    phantom: PhantomData<&'a SymbolTable>,
39}
40
41impl Iterator for AllSymbols<'_> {
42    type Item = Symbol;
43
44    fn next(&mut self) -> Option<Self::Item> {
45        let next = self.range.next()?;
46        debug_assert!(u32::try_from(next).is_ok());
47        Some((next as u32).into())
48    }
49
50    fn size_hint(&self) -> (usize, Option<usize>) {
51        self.range.size_hint()
52    }
53
54    fn count(self) -> usize {
55        self.range.count()
56    }
57
58    fn last(self) -> Option<Self::Item> {
59        let last = self.range.last()?;
60        debug_assert!(u32::try_from(last).is_ok());
61        Some((last as u32).into())
62    }
63
64    fn nth(&mut self, n: usize) -> Option<Self::Item> {
65        let nth = self.range.nth(n)?;
66        debug_assert!(u32::try_from(nth).is_ok());
67        Some((nth as u32).into())
68    }
69
70    fn collect<B: FromIterator<Self::Item>>(self) -> B {
71        self.range.map(|sym| Symbol::from(sym as u32)).collect()
72    }
73}
74
75impl DoubleEndedIterator for AllSymbols<'_> {
76    fn next_back(&mut self) -> Option<Self::Item> {
77        let next = self.range.next_back()?;
78        debug_assert!(u32::try_from(next).is_ok());
79        Some((next as u32).into())
80    }
81
82    fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
83        let nth = self.range.nth_back(n)?;
84        debug_assert!(u32::try_from(nth).is_ok());
85        Some((nth as u32).into())
86    }
87}
88
89impl FusedIterator for AllSymbols<'_> {}
90
91/// An iterator over all interned strings in a [`SymbolTable`].
92///
93/// See the [`strings`](SymbolTable::strings) method in [`SymbolTable`].
94///
95/// # Usage
96///
97/// ```
98/// # use intaglio::SymbolTable;
99/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
100/// let mut table = SymbolTable::new();
101/// let sym = table.intern("abc")?;
102/// let strings = table.strings();
103/// assert_eq!(vec!["abc"], strings.collect::<Vec<_>>());
104/// # Ok(())
105/// # }
106/// # example().unwrap();
107/// ```
108#[derive(Debug, Clone)]
109pub struct Strings<'a>(slice::Iter<'a, Interned<str>>);
110
111impl<'a> Iterator for Strings<'a> {
112    type Item = &'a str;
113
114    fn next(&mut self) -> Option<Self::Item> {
115        self.0.next().map(Interned::as_slice)
116    }
117
118    fn size_hint(&self) -> (usize, Option<usize>) {
119        self.0.size_hint()
120    }
121
122    fn count(self) -> usize {
123        self.0.count()
124    }
125
126    fn last(self) -> Option<Self::Item> {
127        self.0.last().map(Interned::as_slice)
128    }
129
130    fn nth(&mut self, n: usize) -> Option<Self::Item> {
131        self.0.nth(n).map(Interned::as_slice)
132    }
133
134    fn collect<B: FromIterator<Self::Item>>(self) -> B {
135        self.0.map(Interned::as_slice).collect()
136    }
137}
138
139impl DoubleEndedIterator for Strings<'_> {
140    fn next_back(&mut self) -> Option<Self::Item> {
141        self.0.next_back().map(Interned::as_slice)
142    }
143
144    fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
145        self.0.nth_back(n).map(Interned::as_slice)
146    }
147
148    fn rfold<B, F>(self, accum: B, f: F) -> B
149    where
150        F: FnMut(B, Self::Item) -> B,
151    {
152        self.0.map(Interned::as_slice).rfold(accum, f)
153    }
154}
155
156impl ExactSizeIterator for Strings<'_> {
157    fn len(&self) -> usize {
158        self.0.len()
159    }
160}
161
162impl FusedIterator for Strings<'_> {}
163
164/// An iterator over all symbols and interned strings in a [`SymbolTable`].
165///
166/// See the [`iter`](SymbolTable::iter) method in [`SymbolTable`].
167///
168/// # Usage
169///
170/// ```
171/// # use std::collections::HashMap;
172/// # use intaglio::{Symbol, SymbolTable};
173/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
174/// let mut table = SymbolTable::new();
175/// let sym = table.intern("abc")?;
176/// let iter = table.iter();
177/// let mut map = HashMap::new();
178/// map.insert(Symbol::new(0), "abc");
179/// assert_eq!(map, iter.collect::<HashMap<_, _>>());
180/// # Ok(())
181/// # }
182/// # example().unwrap();
183/// ```
184#[derive(Debug, Clone)]
185pub struct Iter<'a>(Zip<AllSymbols<'a>, Strings<'a>>);
186
187impl<'a> Iterator for Iter<'a> {
188    type Item = (Symbol, &'a str);
189
190    fn next(&mut self) -> Option<Self::Item> {
191        self.0.next()
192    }
193
194    fn size_hint(&self) -> (usize, Option<usize>) {
195        self.0.size_hint()
196    }
197
198    fn count(self) -> usize {
199        self.0.count()
200    }
201
202    fn last(self) -> Option<Self::Item> {
203        self.0.last()
204    }
205
206    fn nth(&mut self, n: usize) -> Option<Self::Item> {
207        self.0.nth(n)
208    }
209
210    fn collect<B: FromIterator<Self::Item>>(self) -> B {
211        self.0.collect()
212    }
213}
214
215impl FusedIterator for Iter<'_> {}
216
217impl<'a, S> IntoIterator for &'a SymbolTable<S> {
218    type Item = (Symbol, &'a str);
219    type IntoIter = Iter<'a>;
220
221    fn into_iter(self) -> Self::IntoIter {
222        self.iter()
223    }
224}
225
226/// UTF-8 string interner.
227///
228/// This symbol table is implemented by storing UTF-8 strings with a fast path
229/// for `&str` that are already `'static`.
230///
231/// See crate documentation for more.
232///
233/// # Usage
234///
235/// ```
236/// # use intaglio::SymbolTable;
237/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
238/// let mut table = SymbolTable::new();
239/// let sym = table.intern("abc")?;
240/// assert_eq!(sym, table.intern("abc".to_string())?);
241/// assert!(table.contains(sym));
242/// assert!(table.is_interned("abc"));
243/// # Ok(())
244/// # }
245/// # example().unwrap();
246/// ```
247#[derive(Default, Debug)]
248pub struct SymbolTable<S = RandomState> {
249    map: ManuallyDrop<HashMap<&'static str, Symbol, S>>,
250    vec: ManuallyDrop<Vec<Interned<str>>>,
251}
252
253impl<S> Drop for SymbolTable<S> {
254    fn drop(&mut self) {
255        // SAFETY: No mutable references to `SymbolTable` internal fields are
256        // given out, which means `ManuallyDrop::drop` can only be invoked in
257        // this `Drop::drop` impl. Interal fields are guaranteed to be
258        // initialized by `SymbolTable` constructors.
259        unsafe {
260            // `Interned` requires that the `'static` references it gives out
261            // are dropped before the owning buffer stored in the `Interned`.
262            ManuallyDrop::drop(&mut self.map);
263            ManuallyDrop::drop(&mut self.vec);
264        }
265    }
266}
267
268impl SymbolTable<RandomState> {
269    /// Constructs a new, empty `SymbolTable` with [default capacity].
270    ///
271    /// This function will always allocate. To construct a symbol table without
272    /// allocating, call [`SymbolTable::with_capacity(0)`].
273    ///
274    /// # Examples
275    ///
276    /// ```
277    /// # use intaglio::SymbolTable;
278    /// let table = SymbolTable::new();
279    /// assert_eq!(0, table.len());
280    /// assert!(table.capacity() >= 4096);
281    /// ```
282    ///
283    /// [default capacity]: DEFAULT_SYMBOL_TABLE_CAPACITY
284    /// [`SymbolTable::with_capacity(0)`]: Self::with_capacity
285    #[must_use]
286    pub fn new() -> Self {
287        Self::with_capacity(DEFAULT_SYMBOL_TABLE_CAPACITY)
288    }
289
290    /// Constructs a new, empty `SymbolTable` with the specified capacity.
291    ///
292    /// The symbol table will be able to hold at least `capacity` strings
293    /// without reallocating. If `capacity` is 0, the symbol table will not
294    /// allocate.
295    ///
296    /// # Examples
297    ///
298    /// ```
299    /// # use intaglio::SymbolTable;
300    /// let table = SymbolTable::with_capacity(10);
301    /// assert_eq!(0, table.len());
302    /// assert!(table.capacity() >= 10);
303    /// ```
304    #[must_use]
305    pub fn with_capacity(capacity: usize) -> Self {
306        let capacity = capacity.next_power_of_two();
307        Self {
308            map: ManuallyDrop::new(HashMap::with_capacity(capacity)),
309            vec: ManuallyDrop::new(Vec::with_capacity(capacity)),
310        }
311    }
312}
313
314impl<S> SymbolTable<S> {
315    /// Constructs a new, empty `SymbolTable` with
316    /// [default capacity](DEFAULT_SYMBOL_TABLE_CAPACITY) and the given hash
317    /// builder.
318    ///
319    /// # Examples
320    ///
321    /// ```
322    /// # use std::collections::hash_map::RandomState;
323    /// # use intaglio::SymbolTable;
324    /// let hash_builder = RandomState::new();
325    /// let table = SymbolTable::with_hasher(hash_builder);
326    /// assert_eq!(0, table.len());
327    /// assert!(table.capacity() >= 4096);
328    /// ```
329    pub fn with_hasher(hash_builder: S) -> Self {
330        Self::with_capacity_and_hasher(DEFAULT_SYMBOL_TABLE_CAPACITY, hash_builder)
331    }
332
333    /// Constructs a new, empty `SymbolTable` with the specified capacity and
334    /// the given hash builder.
335    ///
336    /// # Examples
337    ///
338    /// ```
339    /// # use std::collections::hash_map::RandomState;
340    /// # use intaglio::SymbolTable;
341    /// let hash_builder = RandomState::new();
342    /// let table = SymbolTable::with_capacity_and_hasher(10, hash_builder);
343    /// assert_eq!(0, table.len());
344    /// assert!(table.capacity() >= 10);
345    /// ```
346    pub fn with_capacity_and_hasher(capacity: usize, hash_builder: S) -> Self {
347        Self {
348            map: ManuallyDrop::new(HashMap::with_capacity_and_hasher(capacity, hash_builder)),
349            vec: ManuallyDrop::new(Vec::with_capacity(capacity)),
350        }
351    }
352
353    /// Returns the number of strings the table can hold without reallocating.
354    ///
355    /// # Examples
356    ///
357    /// ```
358    /// # use intaglio::SymbolTable;
359    /// let table = SymbolTable::with_capacity(10);
360    /// assert!(table.capacity() >= 10);
361    /// ```
362    pub fn capacity(&self) -> usize {
363        usize::min(self.vec.capacity(), self.map.capacity())
364    }
365
366    /// Returns the number of interned strings in the table.
367    ///
368    /// # Examples
369    ///
370    /// ```
371    /// # use intaglio::SymbolTable;
372    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
373    /// let mut table = SymbolTable::new();
374    /// assert_eq!(0, table.len());
375    ///
376    /// table.intern("abc")?;
377    /// // only uniquely interned strings grow the symbol table.
378    /// table.intern("abc")?;
379    /// table.intern("xyz")?;
380    /// assert_eq!(2, table.len());
381    /// # Ok(())
382    /// # }
383    /// # example().unwrap();
384    /// ```
385    pub fn len(&self) -> usize {
386        self.vec.len()
387    }
388
389    /// Returns `true` if the symbol table contains no interned strings.
390    ///
391    /// # Examples
392    ///
393    /// ```
394    /// # use intaglio::SymbolTable;
395    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
396    /// let mut table = SymbolTable::new();
397    /// assert!(table.is_empty());
398    ///
399    /// table.intern("abc")?;
400    /// assert!(!table.is_empty());
401    /// # Ok(())
402    /// # }
403    /// # example().unwrap();
404    /// ```
405    pub fn is_empty(&self) -> bool {
406        self.vec.is_empty()
407    }
408
409    /// Returns `true` if the symbol table contains the given symbol.
410    ///
411    /// # Examples
412    ///
413    /// ```
414    /// # use intaglio::{Symbol, SymbolTable};
415    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
416    /// let mut table = SymbolTable::new();
417    /// assert!(!table.contains(Symbol::new(0)));
418    ///
419    /// let sym = table.intern("abc")?;
420    /// assert!(table.contains(Symbol::new(0)));
421    /// assert!(table.contains(sym));
422    /// # Ok(())
423    /// # }
424    /// # example().unwrap();
425    /// ```
426    #[must_use]
427    pub fn contains(&self, id: Symbol) -> bool {
428        self.get(id).is_some()
429    }
430
431    /// Returns a reference to the string associated with the given symbol.
432    ///
433    /// If the given symbol does not exist in the underlying symbol table,
434    /// `None` is returned.
435    ///
436    /// The lifetime of the returned reference is bound to the symbol table.
437    ///
438    /// # Examples
439    ///
440    /// ```
441    /// # use intaglio::{Symbol, SymbolTable};
442    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
443    /// let mut table = SymbolTable::new();
444    /// assert!(table.get(Symbol::new(0)).is_none());
445    ///
446    /// let sym = table.intern("abc".to_string())?;
447    /// assert_eq!(Some("abc"), table.get(Symbol::new(0)));
448    /// assert_eq!(Some("abc"), table.get(sym));
449    /// # Ok(())
450    /// # }
451    /// # example().unwrap();
452    /// ```
453    #[must_use]
454    pub fn get(&self, id: Symbol) -> Option<&str> {
455        let bytes = self.vec.get(usize::from(id))?;
456        Some(bytes.as_slice())
457    }
458
459    /// Returns an iterator over all [`Symbol`]s and strings in the
460    /// [`SymbolTable`].
461    ///
462    /// # Examples
463    ///
464    /// ```
465    /// # use std::collections::HashMap;
466    /// # use intaglio::{Symbol, SymbolTable};
467    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
468    /// let mut table = SymbolTable::new();
469    /// table.intern("abc")?;
470    /// table.intern("xyz")?;
471    /// table.intern("123")?;
472    /// table.intern("789")?;
473    ///
474    /// let iter = table.iter();
475    /// let mut map = HashMap::new();
476    /// map.insert(Symbol::new(0), "abc");
477    /// map.insert(Symbol::new(1), "xyz");
478    /// map.insert(Symbol::new(2), "123");
479    /// map.insert(Symbol::new(3), "789");
480    /// assert_eq!(map, iter.collect::<HashMap<_, _>>());
481    /// # Ok(())
482    /// # }
483    /// # example().unwrap();
484    /// ```
485    ///
486    /// ```
487    /// # use intaglio::SymbolTable;
488    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
489    /// let mut table = SymbolTable::new();
490    /// table.intern("abc")?;
491    /// table.intern("xyz")?;
492    /// table.intern("123")?;
493    /// table.intern("789")?;
494    ///
495    /// let iter = table.iter();
496    /// assert_eq!(table.len(), iter.count());
497    /// # Ok(())
498    /// # }
499    /// # example().unwrap();
500    /// ```
501    pub fn iter(&self) -> Iter<'_> {
502        Iter(self.all_symbols().zip(self.strings()))
503    }
504
505    /// Returns an iterator over all [`Symbol`]s in the [`SymbolTable`].
506    ///
507    /// # Examples
508    ///
509    /// ```
510    /// # use intaglio::{Symbol, SymbolTable};
511    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
512    /// let mut table = SymbolTable::new();
513    /// table.intern("abc")?;
514    /// table.intern("xyz")?;
515    /// table.intern("123")?;
516    /// table.intern("789")?;
517    ///
518    /// let mut all_symbols = table.all_symbols();
519    /// assert_eq!(Some(Symbol::new(0)), all_symbols.next());
520    /// assert_eq!(Some(Symbol::new(1)), all_symbols.nth_back(2));
521    /// assert_eq!(None, all_symbols.next());
522    /// # Ok(())
523    /// # }
524    /// # example().unwrap();
525    /// ```
526    ///
527    /// ```
528    /// # use intaglio::SymbolTable;
529    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
530    /// let mut table = SymbolTable::new();
531    /// table.intern("abc")?;
532    /// table.intern("xyz")?;
533    /// table.intern("123")?;
534    /// table.intern("789")?;
535    ///
536    /// let all_symbols = table.all_symbols();
537    /// assert_eq!(table.len(), all_symbols.count());
538    /// # Ok(())
539    /// # }
540    /// # example().unwrap();
541    /// ```
542    pub fn all_symbols(&self) -> AllSymbols<'_> {
543        AllSymbols {
544            range: 0..self.len(),
545            phantom: PhantomData,
546        }
547    }
548
549    /// Returns an iterator over all strings in the [`SymbolTable`].
550    ///
551    /// # Examples
552    ///
553    /// ```
554    /// # use intaglio::SymbolTable;
555    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
556    /// let mut table = SymbolTable::new();
557    /// table.intern("abc")?;
558    /// table.intern("xyz")?;
559    /// table.intern("123")?;
560    /// table.intern("789")?;
561    ///
562    /// let mut strings = table.strings();
563    /// assert_eq!(Some("abc"), strings.next());
564    /// assert_eq!(Some("xyz"), strings.nth_back(2));
565    /// assert_eq!(None, strings.next());
566    /// # Ok(())
567    /// # }
568    /// # example().unwrap();
569    /// ```
570    ///
571    /// ```
572    /// # use intaglio::SymbolTable;
573    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
574    /// let mut table = SymbolTable::new();
575    /// table.intern("abc")?;
576    /// table.intern("xyz")?;
577    /// table.intern("123")?;
578    /// table.intern("789")?;
579    ///
580    /// let strings = table.strings();
581    /// assert_eq!(table.len(), strings.count());
582    /// # Ok(())
583    /// # }
584    /// # example().unwrap();
585    /// ```
586    pub fn strings(&self) -> Strings<'_> {
587        Strings(self.vec.iter())
588    }
589}
590
591impl<S> SymbolTable<S>
592where
593    S: BuildHasher,
594{
595    /// Intern a string for the lifetime of the symbol table.
596    ///
597    /// The returned `Symbol` allows retrieving of the underlying string.
598    /// Equal strings will be inserted into the symbol table exactly once.
599    ///
600    /// This function only allocates if the underlying symbol table has no
601    /// remaining capacity.
602    ///
603    /// # Errors
604    ///
605    /// If the symbol table would grow larger than `u32::MAX` interned
606    /// strings, the [`Symbol`] counter would overflow and a
607    /// [`SymbolOverflowError`] is returned.
608    ///
609    /// # Examples
610    ///
611    /// ```
612    /// # use intaglio::SymbolTable;
613    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
614    /// let mut table = SymbolTable::new();
615    /// let sym = table.intern("abc".to_string())?;
616    /// table.intern("xyz".to_string())?;
617    /// table.intern("123")?;
618    /// table.intern("789")?;
619    ///
620    /// assert_eq!(4, table.len());
621    /// assert_eq!(Some("abc"), table.get(sym));
622    /// # Ok(())
623    /// # }
624    /// # example().unwrap();
625    /// ```
626    pub fn intern<T>(&mut self, contents: T) -> Result<Symbol, SymbolOverflowError>
627    where
628        T: Into<Cow<'static, str>>,
629    {
630        let contents = contents.into();
631        if let Some(&id) = self.map.get(&*contents) {
632            return Ok(id);
633        }
634
635        // The `Interned::Owned` variant is derived from a `Box<T>`. When such
636        // a structure is moved or assigned, as it is below in the call to
637        // `self.vec.push`, the allocation is "retagged" in Miri/stacked borrows.
638        //
639        // Retagging an allocation pops all of the borrows derived from it off
640        // of the stack. This means we need to move the `Interned` into the
641        // `Vec` before calling `Interned::as_static_slice` to ensure the
642        // reference does not get invalidated by retagging.
643        //
644        // However, that alone may be insufficient as the `Interened` may be
645        // moved when the symbol table grows.
646        //
647        // The `SymbolTable` API prevents shared references to the `Interned`
648        // being invalidated by a retag by tying resolved symbol contents,
649        // `&'a T`, to `&'a SymbolTable`, which means the `SymbolTable` cannot
650        // grow, shrink, or otherwise reallocate/move contents while a reference
651        // to the `Interned`'s inner `T` is alive.
652        //
653        // To protect against future updates to stacked borrows or the unsafe
654        // code operational semantics, we can address this additional invariant
655        // with updated `Interned` internals which store the `Box<T>` in a raw
656        // pointer form, which allows moves to be treated as untyped copies.
657        //
658        // See:
659        //
660        // - <https://github.com/artichoke/intaglio/issues/235>
661        // - <https://github.com/artichoke/intaglio/pull/236>
662        let name = Interned::from(contents);
663        let id = Symbol::try_from(self.map.len())?;
664
665        // Move the `Interned` into the `Vec`, causing it to be retagged under
666        // stacked borrows, before taking any references to its inner `T`.
667        self.vec.push(name);
668        // Ensure we grow the map before we take any shared references to the
669        // inner `T`.
670        self.map.reserve(1);
671
672        // SAFETY: `self.vec` is non-empty because the preceding line of code
673        // pushed an entry into it.
674        let name = unsafe { self.vec.last().unwrap_unchecked() };
675
676        // SAFETY: This expression creates a reference with a `'static` lifetime
677        // from an owned and interned buffer, which is permissible because:
678        //
679        // - `Interned` is an internal implementation detail of `SymbolTable`.
680        // - `SymbolTable` never gives out `'static` references to underlying
681        //   contents.
682        // - All slice references given out by the `SymbolTable` have the same
683        //   lifetime as the `SymbolTable`.
684        // - The `map` field of `SymbolTable`, which contains the `'static`
685        //   references, is dropped before the owned buffers stored in this
686        //   `Interned`.
687        // - The shared reference may be derived from a `PinBox` which prevents
688        //   moves from retagging the underlying boxed `T` under stacked borrows.
689        // - The symbol table cannot grow, shrink, or otherwise move its contents
690        //   while this reference is alive.
691        let slice = unsafe { name.as_static_slice() };
692
693        self.map.insert(slice, id);
694
695        debug_assert_eq!(self.get(id), Some(slice));
696        debug_assert_eq!(self.intern(slice), Ok(id));
697
698        Ok(id)
699    }
700
701    /// Returns the `Symbol` identifier for `contents` if it has been interned
702    /// before, `None` otherwise.
703    ///
704    /// This method does not modify the symbol table.
705    ///
706    /// # Examples
707    ///
708    /// ```
709    /// # use intaglio::{SymbolTable, Symbol};
710    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
711    /// let mut table = SymbolTable::new();
712    /// assert!(!table.is_interned("abc"));
713    /// assert_eq!(None, table.check_interned("abc"));
714    ///
715    /// table.intern("abc".to_string())?;
716    /// assert!(table.is_interned("abc"));
717    /// assert_eq!(Some(Symbol::new(0)), table.check_interned("abc"));
718    /// # Ok(())
719    /// # }
720    /// # example().unwrap();
721    /// ```
722    #[must_use]
723    pub fn check_interned(&self, contents: &str) -> Option<Symbol> {
724        self.map.get(contents).copied()
725    }
726
727    /// Returns `true` if the given string has been interned before.
728    ///
729    /// This method does not modify the symbol table.
730    ///
731    /// # Examples
732    ///
733    /// ```
734    /// # use intaglio::{SymbolTable, Symbol};
735    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
736    /// let mut table = SymbolTable::new();
737    /// assert!(!table.is_interned("abc"));
738    /// assert_eq!(None, table.check_interned("abc"));
739    ///
740    /// table.intern("abc".to_string())?;
741    /// assert!(table.is_interned("abc"));
742    /// assert_eq!(Some(Symbol::new(0)), table.check_interned("abc"));
743    /// # Ok(())
744    /// # }
745    /// # example().unwrap();
746    /// ```
747    #[must_use]
748    pub fn is_interned(&self, contents: &str) -> bool {
749        self.map.contains_key(contents)
750    }
751
752    /// Reserves capacity for at least additional more elements to be inserted
753    /// in the given `SymbolTable`. The collection may reserve more space to
754    /// avoid frequent reallocations. After calling reserve, capacity will be
755    /// greater than or equal to `self.len() + additional`. Does nothing if
756    /// capacity is already sufficient.
757    ///
758    /// # Panics
759    ///
760    /// Panics if the new capacity overflows `usize`.
761    ///
762    /// # Examples
763    ///
764    /// ```
765    /// # use intaglio::SymbolTable;
766    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
767    /// let mut table = SymbolTable::with_capacity(1);
768    /// table.intern("abc")?;
769    /// table.reserve(10);
770    /// assert!(table.capacity() >= 11);
771    /// # Ok(())
772    /// # }
773    /// # example().unwrap();
774    /// ```
775    pub fn reserve(&mut self, additional: usize) {
776        self.map.reserve(additional);
777        self.vec.reserve(additional);
778    }
779
780    /// Shrinks the capacity of the symbol table as much as possible.
781    ///
782    /// It will drop down as close as possible to the length but the allocator
783    /// may still inform the symbol table that there is space for a few more
784    /// elements.
785    ///
786    /// # Examples
787    ///
788    /// ```
789    /// # use intaglio::SymbolTable;
790    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
791    /// let mut table = SymbolTable::with_capacity(10);
792    /// table.intern("abc")?;
793    /// table.intern("xyz")?;
794    /// table.intern("123")?;
795    /// table.shrink_to_fit();
796    /// assert!(table.capacity() >= 3);
797    /// # Ok(())
798    /// # }
799    /// # example().unwrap();
800    /// ```
801    pub fn shrink_to_fit(&mut self) {
802        self.map.shrink_to_fit();
803        self.vec.shrink_to_fit();
804    }
805
806    /// Shrinks the capacity of the symbol table with a lower bound.
807    ///
808    /// The capacity will remain at least as large as both the length and the
809    /// supplied value.
810    ///
811    /// If the current capacity is less than the lower limit, this is a no-op.
812    ///
813    /// # Examples
814    ///
815    /// ```
816    /// # use intaglio::SymbolTable;
817    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
818    /// let mut table = SymbolTable::with_capacity(10);
819    /// table.intern("abc")?;
820    /// table.intern("xyz")?;
821    /// table.intern("123")?;
822    /// table.shrink_to(5);
823    /// assert!(table.capacity() >= 5);
824    /// # Ok(())
825    /// # }
826    /// # example().unwrap();
827    /// ```
828    pub fn shrink_to(&mut self, min_capacity: usize) {
829        self.map.shrink_to(min_capacity);
830        self.vec.shrink_to(min_capacity);
831    }
832}
833
834#[cfg(test)]
835#[allow(clippy::needless_pass_by_value)]
836mod tests {
837    use quickcheck::quickcheck;
838
839    use super::SymbolTable;
840
841    #[test]
842    fn alloc_drop_new() {
843        let table = SymbolTable::new();
844        drop(table);
845    }
846
847    #[test]
848    fn alloc_drop_with_capacity() {
849        let table = SymbolTable::with_capacity(1 << 14);
850        drop(table);
851    }
852
853    #[test]
854    fn drop_with_true_static_data() {
855        let mut table = SymbolTable::new();
856        table.intern("1").unwrap();
857        table.intern("2").unwrap();
858        table.intern("3").unwrap();
859        table.intern("4").unwrap();
860        table.intern("5").unwrap();
861        drop(table);
862    }
863
864    #[test]
865    fn drop_with_owned_data() {
866        let mut table = SymbolTable::new();
867        table.intern("1".to_string()).unwrap();
868        table.intern("2".to_string()).unwrap();
869        table.intern("3".to_string()).unwrap();
870        table.intern("4".to_string()).unwrap();
871        table.intern("5".to_string()).unwrap();
872        drop(table);
873    }
874
875    #[test]
876    fn set_owned_value_and_get_with_owned_and_borrowed() {
877        let mut table = SymbolTable::new();
878        // intern an owned value
879        let sym = table.intern("abc".to_string()).unwrap();
880        // retrieve string
881        assert_eq!("abc", table.get(sym).unwrap());
882        // intern owned value again
883        assert_eq!(sym, table.intern("abc".to_string()).unwrap());
884        // intern borrowed value
885        assert_eq!(sym, table.intern("abc").unwrap());
886    }
887
888    #[test]
889    fn set_borrowed_value_and_get_with_owned_and_borrowed() {
890        let mut table = SymbolTable::new();
891        // intern a borrowed value
892        let sym = table.intern("abc").unwrap();
893        // retrieve string
894        assert_eq!("abc", table.get(sym).unwrap());
895        // intern owned value
896        assert_eq!(sym, table.intern("abc".to_string()).unwrap());
897        // intern borrowed value again
898        assert_eq!(sym, table.intern("abc").unwrap());
899    }
900
901    quickcheck! {
902        fn intern_twice_symbol_equality(string: String) -> bool {
903            let mut table = SymbolTable::new();
904            let sym = table.intern(string.clone()).unwrap();
905            let sym_again = table.intern(string).unwrap();
906            sym == sym_again
907        }
908
909        fn intern_get_roundtrip(string: String) -> bool {
910            let mut table = SymbolTable::new();
911            let sym = table.intern(string.clone()).unwrap();
912            let retrieved_str = table.get(sym).unwrap();
913            string == retrieved_str
914        }
915
916        fn table_contains_sym(string: String) -> bool {
917            let mut table = SymbolTable::new();
918            let sym = table.intern(string).unwrap();
919            table.contains(sym)
920        }
921
922        fn table_does_not_contain_missing_symbol_ids(sym: u32) -> bool {
923            let table = SymbolTable::new();
924            !table.contains(sym.into())
925        }
926
927        fn empty_table_does_not_report_any_interned_strings(string: String) -> bool {
928            let table = SymbolTable::new();
929            !table.is_interned(string.as_str())
930        }
931
932        fn table_reports_interned_strings_as_interned(string: String) -> bool {
933            let mut table = SymbolTable::new();
934            table.intern(string.clone()).unwrap();
935            table.is_interned(string.as_str())
936        }
937    }
938}