intaglio/str.rs
1//! Intaglio interner for UTF-8 [`String`]s.
2
3use core::hash::BuildHasher;
4use core::iter::{FromIterator, FusedIterator, Zip};
5use core::marker::PhantomData;
6use core::mem::ManuallyDrop;
7use core::ops::Range;
8use core::slice;
9use std::borrow::Cow;
10use std::collections::hash_map::{HashMap, RandomState};
11
12use crate::internal::Interned;
13use crate::{Symbol, SymbolOverflowError, DEFAULT_SYMBOL_TABLE_CAPACITY};
14
15/// An iterator over all [`Symbol`]s in a [`SymbolTable`].
16///
17/// See the [`all_symbols`](SymbolTable::all_symbols) method in [`SymbolTable`].
18///
19/// # Usage
20///
21/// ```
22/// # use intaglio::SymbolTable;
23/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
24/// let mut table = SymbolTable::new();
25/// let sym = table.intern("abc")?;
26/// let all_symbols = table.all_symbols();
27/// assert_eq!(vec![sym], all_symbols.collect::<Vec<_>>());
28/// # Ok(())
29/// # }
30/// # example().unwrap();
31/// ```
32#[derive(Debug, Clone, Hash, PartialEq, Eq)]
33pub struct AllSymbols<'a> {
34 range: Range<usize>,
35 // Hold a shared reference to the underlying `SymbolTable` to ensure the
36 // table is not modified while we are iterating which would make the results
37 // not match the real state.
38 phantom: PhantomData<&'a SymbolTable>,
39}
40
41impl Iterator for AllSymbols<'_> {
42 type Item = Symbol;
43
44 fn next(&mut self) -> Option<Self::Item> {
45 let next = self.range.next()?;
46 debug_assert!(u32::try_from(next).is_ok());
47 Some((next as u32).into())
48 }
49
50 fn size_hint(&self) -> (usize, Option<usize>) {
51 self.range.size_hint()
52 }
53
54 fn count(self) -> usize {
55 self.range.count()
56 }
57
58 fn last(self) -> Option<Self::Item> {
59 let last = self.range.last()?;
60 debug_assert!(u32::try_from(last).is_ok());
61 Some((last as u32).into())
62 }
63
64 fn nth(&mut self, n: usize) -> Option<Self::Item> {
65 let nth = self.range.nth(n)?;
66 debug_assert!(u32::try_from(nth).is_ok());
67 Some((nth as u32).into())
68 }
69
70 fn collect<B: FromIterator<Self::Item>>(self) -> B {
71 self.range.map(|sym| Symbol::from(sym as u32)).collect()
72 }
73}
74
75impl DoubleEndedIterator for AllSymbols<'_> {
76 fn next_back(&mut self) -> Option<Self::Item> {
77 let next = self.range.next_back()?;
78 debug_assert!(u32::try_from(next).is_ok());
79 Some((next as u32).into())
80 }
81
82 fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
83 let nth = self.range.nth_back(n)?;
84 debug_assert!(u32::try_from(nth).is_ok());
85 Some((nth as u32).into())
86 }
87}
88
89impl FusedIterator for AllSymbols<'_> {}
90
91/// An iterator over all interned strings in a [`SymbolTable`].
92///
93/// See the [`strings`](SymbolTable::strings) method in [`SymbolTable`].
94///
95/// # Usage
96///
97/// ```
98/// # use intaglio::SymbolTable;
99/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
100/// let mut table = SymbolTable::new();
101/// let sym = table.intern("abc")?;
102/// let strings = table.strings();
103/// assert_eq!(vec!["abc"], strings.collect::<Vec<_>>());
104/// # Ok(())
105/// # }
106/// # example().unwrap();
107/// ```
108#[derive(Debug, Clone)]
109pub struct Strings<'a>(slice::Iter<'a, Interned<str>>);
110
111impl<'a> Iterator for Strings<'a> {
112 type Item = &'a str;
113
114 fn next(&mut self) -> Option<Self::Item> {
115 self.0.next().map(Interned::as_slice)
116 }
117
118 fn size_hint(&self) -> (usize, Option<usize>) {
119 self.0.size_hint()
120 }
121
122 fn count(self) -> usize {
123 self.0.count()
124 }
125
126 fn last(self) -> Option<Self::Item> {
127 self.0.last().map(Interned::as_slice)
128 }
129
130 fn nth(&mut self, n: usize) -> Option<Self::Item> {
131 self.0.nth(n).map(Interned::as_slice)
132 }
133
134 fn collect<B: FromIterator<Self::Item>>(self) -> B {
135 self.0.map(Interned::as_slice).collect()
136 }
137}
138
139impl DoubleEndedIterator for Strings<'_> {
140 fn next_back(&mut self) -> Option<Self::Item> {
141 self.0.next_back().map(Interned::as_slice)
142 }
143
144 fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
145 self.0.nth_back(n).map(Interned::as_slice)
146 }
147
148 fn rfold<B, F>(self, accum: B, f: F) -> B
149 where
150 F: FnMut(B, Self::Item) -> B,
151 {
152 self.0.map(Interned::as_slice).rfold(accum, f)
153 }
154}
155
156impl ExactSizeIterator for Strings<'_> {
157 fn len(&self) -> usize {
158 self.0.len()
159 }
160}
161
162impl FusedIterator for Strings<'_> {}
163
164/// An iterator over all symbols and interned strings in a [`SymbolTable`].
165///
166/// See the [`iter`](SymbolTable::iter) method in [`SymbolTable`].
167///
168/// # Usage
169///
170/// ```
171/// # use std::collections::HashMap;
172/// # use intaglio::{Symbol, SymbolTable};
173/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
174/// let mut table = SymbolTable::new();
175/// let sym = table.intern("abc")?;
176/// let iter = table.iter();
177/// let mut map = HashMap::new();
178/// map.insert(Symbol::new(0), "abc");
179/// assert_eq!(map, iter.collect::<HashMap<_, _>>());
180/// # Ok(())
181/// # }
182/// # example().unwrap();
183/// ```
184#[derive(Debug, Clone)]
185pub struct Iter<'a>(Zip<AllSymbols<'a>, Strings<'a>>);
186
187impl<'a> Iterator for Iter<'a> {
188 type Item = (Symbol, &'a str);
189
190 fn next(&mut self) -> Option<Self::Item> {
191 self.0.next()
192 }
193
194 fn size_hint(&self) -> (usize, Option<usize>) {
195 self.0.size_hint()
196 }
197
198 fn count(self) -> usize {
199 self.0.count()
200 }
201
202 fn last(self) -> Option<Self::Item> {
203 self.0.last()
204 }
205
206 fn nth(&mut self, n: usize) -> Option<Self::Item> {
207 self.0.nth(n)
208 }
209
210 fn collect<B: FromIterator<Self::Item>>(self) -> B {
211 self.0.collect()
212 }
213}
214
215impl FusedIterator for Iter<'_> {}
216
217impl<'a, S> IntoIterator for &'a SymbolTable<S> {
218 type Item = (Symbol, &'a str);
219 type IntoIter = Iter<'a>;
220
221 fn into_iter(self) -> Self::IntoIter {
222 self.iter()
223 }
224}
225
226/// UTF-8 string interner.
227///
228/// This symbol table is implemented by storing UTF-8 strings with a fast path
229/// for `&str` that are already `'static`.
230///
231/// See crate documentation for more.
232///
233/// # Usage
234///
235/// ```
236/// # use intaglio::SymbolTable;
237/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
238/// let mut table = SymbolTable::new();
239/// let sym = table.intern("abc")?;
240/// assert_eq!(sym, table.intern("abc".to_string())?);
241/// assert!(table.contains(sym));
242/// assert!(table.is_interned("abc"));
243/// # Ok(())
244/// # }
245/// # example().unwrap();
246/// ```
247#[derive(Default, Debug)]
248pub struct SymbolTable<S = RandomState> {
249 map: ManuallyDrop<HashMap<&'static str, Symbol, S>>,
250 vec: ManuallyDrop<Vec<Interned<str>>>,
251}
252
253impl<S> Drop for SymbolTable<S> {
254 fn drop(&mut self) {
255 // SAFETY: No mutable references to `SymbolTable` internal fields are
256 // given out, which means `ManuallyDrop::drop` can only be invoked in
257 // this `Drop::drop` impl. Interal fields are guaranteed to be
258 // initialized by `SymbolTable` constructors.
259 unsafe {
260 // `Interned` requires that the `'static` references it gives out
261 // are dropped before the owning buffer stored in the `Interned`.
262 ManuallyDrop::drop(&mut self.map);
263 ManuallyDrop::drop(&mut self.vec);
264 }
265 }
266}
267
268impl SymbolTable<RandomState> {
269 /// Constructs a new, empty `SymbolTable` with [default capacity].
270 ///
271 /// This function will always allocate. To construct a symbol table without
272 /// allocating, call [`SymbolTable::with_capacity(0)`].
273 ///
274 /// # Examples
275 ///
276 /// ```
277 /// # use intaglio::SymbolTable;
278 /// let table = SymbolTable::new();
279 /// assert_eq!(0, table.len());
280 /// assert!(table.capacity() >= 4096);
281 /// ```
282 ///
283 /// [default capacity]: DEFAULT_SYMBOL_TABLE_CAPACITY
284 /// [`SymbolTable::with_capacity(0)`]: Self::with_capacity
285 #[must_use]
286 pub fn new() -> Self {
287 Self::with_capacity(DEFAULT_SYMBOL_TABLE_CAPACITY)
288 }
289
290 /// Constructs a new, empty `SymbolTable` with the specified capacity.
291 ///
292 /// The symbol table will be able to hold at least `capacity` strings
293 /// without reallocating. If `capacity` is 0, the symbol table will not
294 /// allocate.
295 ///
296 /// # Examples
297 ///
298 /// ```
299 /// # use intaglio::SymbolTable;
300 /// let table = SymbolTable::with_capacity(10);
301 /// assert_eq!(0, table.len());
302 /// assert!(table.capacity() >= 10);
303 /// ```
304 #[must_use]
305 pub fn with_capacity(capacity: usize) -> Self {
306 let capacity = capacity.next_power_of_two();
307 Self {
308 map: ManuallyDrop::new(HashMap::with_capacity(capacity)),
309 vec: ManuallyDrop::new(Vec::with_capacity(capacity)),
310 }
311 }
312}
313
314impl<S> SymbolTable<S> {
315 /// Constructs a new, empty `SymbolTable` with
316 /// [default capacity](DEFAULT_SYMBOL_TABLE_CAPACITY) and the given hash
317 /// builder.
318 ///
319 /// # Examples
320 ///
321 /// ```
322 /// # use std::collections::hash_map::RandomState;
323 /// # use intaglio::SymbolTable;
324 /// let hash_builder = RandomState::new();
325 /// let table = SymbolTable::with_hasher(hash_builder);
326 /// assert_eq!(0, table.len());
327 /// assert!(table.capacity() >= 4096);
328 /// ```
329 pub fn with_hasher(hash_builder: S) -> Self {
330 Self::with_capacity_and_hasher(DEFAULT_SYMBOL_TABLE_CAPACITY, hash_builder)
331 }
332
333 /// Constructs a new, empty `SymbolTable` with the specified capacity and
334 /// the given hash builder.
335 ///
336 /// # Examples
337 ///
338 /// ```
339 /// # use std::collections::hash_map::RandomState;
340 /// # use intaglio::SymbolTable;
341 /// let hash_builder = RandomState::new();
342 /// let table = SymbolTable::with_capacity_and_hasher(10, hash_builder);
343 /// assert_eq!(0, table.len());
344 /// assert!(table.capacity() >= 10);
345 /// ```
346 pub fn with_capacity_and_hasher(capacity: usize, hash_builder: S) -> Self {
347 Self {
348 map: ManuallyDrop::new(HashMap::with_capacity_and_hasher(capacity, hash_builder)),
349 vec: ManuallyDrop::new(Vec::with_capacity(capacity)),
350 }
351 }
352
353 /// Returns the number of strings the table can hold without reallocating.
354 ///
355 /// # Examples
356 ///
357 /// ```
358 /// # use intaglio::SymbolTable;
359 /// let table = SymbolTable::with_capacity(10);
360 /// assert!(table.capacity() >= 10);
361 /// ```
362 pub fn capacity(&self) -> usize {
363 usize::min(self.vec.capacity(), self.map.capacity())
364 }
365
366 /// Returns the number of interned strings in the table.
367 ///
368 /// # Examples
369 ///
370 /// ```
371 /// # use intaglio::SymbolTable;
372 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
373 /// let mut table = SymbolTable::new();
374 /// assert_eq!(0, table.len());
375 ///
376 /// table.intern("abc")?;
377 /// // only uniquely interned strings grow the symbol table.
378 /// table.intern("abc")?;
379 /// table.intern("xyz")?;
380 /// assert_eq!(2, table.len());
381 /// # Ok(())
382 /// # }
383 /// # example().unwrap();
384 /// ```
385 pub fn len(&self) -> usize {
386 self.vec.len()
387 }
388
389 /// Returns `true` if the symbol table contains no interned strings.
390 ///
391 /// # Examples
392 ///
393 /// ```
394 /// # use intaglio::SymbolTable;
395 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
396 /// let mut table = SymbolTable::new();
397 /// assert!(table.is_empty());
398 ///
399 /// table.intern("abc")?;
400 /// assert!(!table.is_empty());
401 /// # Ok(())
402 /// # }
403 /// # example().unwrap();
404 /// ```
405 pub fn is_empty(&self) -> bool {
406 self.vec.is_empty()
407 }
408
409 /// Returns `true` if the symbol table contains the given symbol.
410 ///
411 /// # Examples
412 ///
413 /// ```
414 /// # use intaglio::{Symbol, SymbolTable};
415 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
416 /// let mut table = SymbolTable::new();
417 /// assert!(!table.contains(Symbol::new(0)));
418 ///
419 /// let sym = table.intern("abc")?;
420 /// assert!(table.contains(Symbol::new(0)));
421 /// assert!(table.contains(sym));
422 /// # Ok(())
423 /// # }
424 /// # example().unwrap();
425 /// ```
426 #[must_use]
427 pub fn contains(&self, id: Symbol) -> bool {
428 self.get(id).is_some()
429 }
430
431 /// Returns a reference to the string associated with the given symbol.
432 ///
433 /// If the given symbol does not exist in the underlying symbol table,
434 /// `None` is returned.
435 ///
436 /// The lifetime of the returned reference is bound to the symbol table.
437 ///
438 /// # Examples
439 ///
440 /// ```
441 /// # use intaglio::{Symbol, SymbolTable};
442 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
443 /// let mut table = SymbolTable::new();
444 /// assert!(table.get(Symbol::new(0)).is_none());
445 ///
446 /// let sym = table.intern("abc".to_string())?;
447 /// assert_eq!(Some("abc"), table.get(Symbol::new(0)));
448 /// assert_eq!(Some("abc"), table.get(sym));
449 /// # Ok(())
450 /// # }
451 /// # example().unwrap();
452 /// ```
453 #[must_use]
454 pub fn get(&self, id: Symbol) -> Option<&str> {
455 let bytes = self.vec.get(usize::from(id))?;
456 Some(bytes.as_slice())
457 }
458
459 /// Returns an iterator over all [`Symbol`]s and strings in the
460 /// [`SymbolTable`].
461 ///
462 /// # Examples
463 ///
464 /// ```
465 /// # use std::collections::HashMap;
466 /// # use intaglio::{Symbol, SymbolTable};
467 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
468 /// let mut table = SymbolTable::new();
469 /// table.intern("abc")?;
470 /// table.intern("xyz")?;
471 /// table.intern("123")?;
472 /// table.intern("789")?;
473 ///
474 /// let iter = table.iter();
475 /// let mut map = HashMap::new();
476 /// map.insert(Symbol::new(0), "abc");
477 /// map.insert(Symbol::new(1), "xyz");
478 /// map.insert(Symbol::new(2), "123");
479 /// map.insert(Symbol::new(3), "789");
480 /// assert_eq!(map, iter.collect::<HashMap<_, _>>());
481 /// # Ok(())
482 /// # }
483 /// # example().unwrap();
484 /// ```
485 ///
486 /// ```
487 /// # use intaglio::SymbolTable;
488 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
489 /// let mut table = SymbolTable::new();
490 /// table.intern("abc")?;
491 /// table.intern("xyz")?;
492 /// table.intern("123")?;
493 /// table.intern("789")?;
494 ///
495 /// let iter = table.iter();
496 /// assert_eq!(table.len(), iter.count());
497 /// # Ok(())
498 /// # }
499 /// # example().unwrap();
500 /// ```
501 pub fn iter(&self) -> Iter<'_> {
502 Iter(self.all_symbols().zip(self.strings()))
503 }
504
505 /// Returns an iterator over all [`Symbol`]s in the [`SymbolTable`].
506 ///
507 /// # Examples
508 ///
509 /// ```
510 /// # use intaglio::{Symbol, SymbolTable};
511 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
512 /// let mut table = SymbolTable::new();
513 /// table.intern("abc")?;
514 /// table.intern("xyz")?;
515 /// table.intern("123")?;
516 /// table.intern("789")?;
517 ///
518 /// let mut all_symbols = table.all_symbols();
519 /// assert_eq!(Some(Symbol::new(0)), all_symbols.next());
520 /// assert_eq!(Some(Symbol::new(1)), all_symbols.nth_back(2));
521 /// assert_eq!(None, all_symbols.next());
522 /// # Ok(())
523 /// # }
524 /// # example().unwrap();
525 /// ```
526 ///
527 /// ```
528 /// # use intaglio::SymbolTable;
529 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
530 /// let mut table = SymbolTable::new();
531 /// table.intern("abc")?;
532 /// table.intern("xyz")?;
533 /// table.intern("123")?;
534 /// table.intern("789")?;
535 ///
536 /// let all_symbols = table.all_symbols();
537 /// assert_eq!(table.len(), all_symbols.count());
538 /// # Ok(())
539 /// # }
540 /// # example().unwrap();
541 /// ```
542 pub fn all_symbols(&self) -> AllSymbols<'_> {
543 AllSymbols {
544 range: 0..self.len(),
545 phantom: PhantomData,
546 }
547 }
548
549 /// Returns an iterator over all strings in the [`SymbolTable`].
550 ///
551 /// # Examples
552 ///
553 /// ```
554 /// # use intaglio::SymbolTable;
555 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
556 /// let mut table = SymbolTable::new();
557 /// table.intern("abc")?;
558 /// table.intern("xyz")?;
559 /// table.intern("123")?;
560 /// table.intern("789")?;
561 ///
562 /// let mut strings = table.strings();
563 /// assert_eq!(Some("abc"), strings.next());
564 /// assert_eq!(Some("xyz"), strings.nth_back(2));
565 /// assert_eq!(None, strings.next());
566 /// # Ok(())
567 /// # }
568 /// # example().unwrap();
569 /// ```
570 ///
571 /// ```
572 /// # use intaglio::SymbolTable;
573 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
574 /// let mut table = SymbolTable::new();
575 /// table.intern("abc")?;
576 /// table.intern("xyz")?;
577 /// table.intern("123")?;
578 /// table.intern("789")?;
579 ///
580 /// let strings = table.strings();
581 /// assert_eq!(table.len(), strings.count());
582 /// # Ok(())
583 /// # }
584 /// # example().unwrap();
585 /// ```
586 pub fn strings(&self) -> Strings<'_> {
587 Strings(self.vec.iter())
588 }
589}
590
591impl<S> SymbolTable<S>
592where
593 S: BuildHasher,
594{
595 /// Intern a string for the lifetime of the symbol table.
596 ///
597 /// The returned `Symbol` allows retrieving of the underlying string.
598 /// Equal strings will be inserted into the symbol table exactly once.
599 ///
600 /// This function only allocates if the underlying symbol table has no
601 /// remaining capacity.
602 ///
603 /// # Errors
604 ///
605 /// If the symbol table would grow larger than `u32::MAX` interned
606 /// strings, the [`Symbol`] counter would overflow and a
607 /// [`SymbolOverflowError`] is returned.
608 ///
609 /// # Examples
610 ///
611 /// ```
612 /// # use intaglio::SymbolTable;
613 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
614 /// let mut table = SymbolTable::new();
615 /// let sym = table.intern("abc".to_string())?;
616 /// table.intern("xyz".to_string())?;
617 /// table.intern("123")?;
618 /// table.intern("789")?;
619 ///
620 /// assert_eq!(4, table.len());
621 /// assert_eq!(Some("abc"), table.get(sym));
622 /// # Ok(())
623 /// # }
624 /// # example().unwrap();
625 /// ```
626 pub fn intern<T>(&mut self, contents: T) -> Result<Symbol, SymbolOverflowError>
627 where
628 T: Into<Cow<'static, str>>,
629 {
630 let contents = contents.into();
631 if let Some(&id) = self.map.get(&*contents) {
632 return Ok(id);
633 }
634
635 // The `Interned::Owned` variant is derived from a `Box<T>`. When such
636 // a structure is moved or assigned, as it is below in the call to
637 // `self.vec.push`, the allocation is "retagged" in Miri/stacked borrows.
638 //
639 // Retagging an allocation pops all of the borrows derived from it off
640 // of the stack. This means we need to move the `Interned` into the
641 // `Vec` before calling `Interned::as_static_slice` to ensure the
642 // reference does not get invalidated by retagging.
643 //
644 // However, that alone may be insufficient as the `Interened` may be
645 // moved when the symbol table grows.
646 //
647 // The `SymbolTable` API prevents shared references to the `Interned`
648 // being invalidated by a retag by tying resolved symbol contents,
649 // `&'a T`, to `&'a SymbolTable`, which means the `SymbolTable` cannot
650 // grow, shrink, or otherwise reallocate/move contents while a reference
651 // to the `Interned`'s inner `T` is alive.
652 //
653 // To protect against future updates to stacked borrows or the unsafe
654 // code operational semantics, we can address this additional invariant
655 // with updated `Interned` internals which store the `Box<T>` in a raw
656 // pointer form, which allows moves to be treated as untyped copies.
657 //
658 // See:
659 //
660 // - <https://github.com/artichoke/intaglio/issues/235>
661 // - <https://github.com/artichoke/intaglio/pull/236>
662 let name = Interned::from(contents);
663 let id = Symbol::try_from(self.map.len())?;
664
665 // Move the `Interned` into the `Vec`, causing it to be retagged under
666 // stacked borrows, before taking any references to its inner `T`.
667 self.vec.push(name);
668 // Ensure we grow the map before we take any shared references to the
669 // inner `T`.
670 self.map.reserve(1);
671
672 // SAFETY: `self.vec` is non-empty because the preceding line of code
673 // pushed an entry into it.
674 let name = unsafe { self.vec.last().unwrap_unchecked() };
675
676 // SAFETY: This expression creates a reference with a `'static` lifetime
677 // from an owned and interned buffer, which is permissible because:
678 //
679 // - `Interned` is an internal implementation detail of `SymbolTable`.
680 // - `SymbolTable` never gives out `'static` references to underlying
681 // contents.
682 // - All slice references given out by the `SymbolTable` have the same
683 // lifetime as the `SymbolTable`.
684 // - The `map` field of `SymbolTable`, which contains the `'static`
685 // references, is dropped before the owned buffers stored in this
686 // `Interned`.
687 // - The shared reference may be derived from a `PinBox` which prevents
688 // moves from retagging the underlying boxed `T` under stacked borrows.
689 // - The symbol table cannot grow, shrink, or otherwise move its contents
690 // while this reference is alive.
691 let slice = unsafe { name.as_static_slice() };
692
693 self.map.insert(slice, id);
694
695 debug_assert_eq!(self.get(id), Some(slice));
696 debug_assert_eq!(self.intern(slice), Ok(id));
697
698 Ok(id)
699 }
700
701 /// Returns the `Symbol` identifier for `contents` if it has been interned
702 /// before, `None` otherwise.
703 ///
704 /// This method does not modify the symbol table.
705 ///
706 /// # Examples
707 ///
708 /// ```
709 /// # use intaglio::{SymbolTable, Symbol};
710 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
711 /// let mut table = SymbolTable::new();
712 /// assert!(!table.is_interned("abc"));
713 /// assert_eq!(None, table.check_interned("abc"));
714 ///
715 /// table.intern("abc".to_string())?;
716 /// assert!(table.is_interned("abc"));
717 /// assert_eq!(Some(Symbol::new(0)), table.check_interned("abc"));
718 /// # Ok(())
719 /// # }
720 /// # example().unwrap();
721 /// ```
722 #[must_use]
723 pub fn check_interned(&self, contents: &str) -> Option<Symbol> {
724 self.map.get(contents).copied()
725 }
726
727 /// Returns `true` if the given string has been interned before.
728 ///
729 /// This method does not modify the symbol table.
730 ///
731 /// # Examples
732 ///
733 /// ```
734 /// # use intaglio::{SymbolTable, Symbol};
735 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
736 /// let mut table = SymbolTable::new();
737 /// assert!(!table.is_interned("abc"));
738 /// assert_eq!(None, table.check_interned("abc"));
739 ///
740 /// table.intern("abc".to_string())?;
741 /// assert!(table.is_interned("abc"));
742 /// assert_eq!(Some(Symbol::new(0)), table.check_interned("abc"));
743 /// # Ok(())
744 /// # }
745 /// # example().unwrap();
746 /// ```
747 #[must_use]
748 pub fn is_interned(&self, contents: &str) -> bool {
749 self.map.contains_key(contents)
750 }
751
752 /// Reserves capacity for at least additional more elements to be inserted
753 /// in the given `SymbolTable`. The collection may reserve more space to
754 /// avoid frequent reallocations. After calling reserve, capacity will be
755 /// greater than or equal to `self.len() + additional`. Does nothing if
756 /// capacity is already sufficient.
757 ///
758 /// # Panics
759 ///
760 /// Panics if the new capacity overflows `usize`.
761 ///
762 /// # Examples
763 ///
764 /// ```
765 /// # use intaglio::SymbolTable;
766 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
767 /// let mut table = SymbolTable::with_capacity(1);
768 /// table.intern("abc")?;
769 /// table.reserve(10);
770 /// assert!(table.capacity() >= 11);
771 /// # Ok(())
772 /// # }
773 /// # example().unwrap();
774 /// ```
775 pub fn reserve(&mut self, additional: usize) {
776 self.map.reserve(additional);
777 self.vec.reserve(additional);
778 }
779
780 /// Shrinks the capacity of the symbol table as much as possible.
781 ///
782 /// It will drop down as close as possible to the length but the allocator
783 /// may still inform the symbol table that there is space for a few more
784 /// elements.
785 ///
786 /// # Examples
787 ///
788 /// ```
789 /// # use intaglio::SymbolTable;
790 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
791 /// let mut table = SymbolTable::with_capacity(10);
792 /// table.intern("abc")?;
793 /// table.intern("xyz")?;
794 /// table.intern("123")?;
795 /// table.shrink_to_fit();
796 /// assert!(table.capacity() >= 3);
797 /// # Ok(())
798 /// # }
799 /// # example().unwrap();
800 /// ```
801 pub fn shrink_to_fit(&mut self) {
802 self.map.shrink_to_fit();
803 self.vec.shrink_to_fit();
804 }
805
806 /// Shrinks the capacity of the symbol table with a lower bound.
807 ///
808 /// The capacity will remain at least as large as both the length and the
809 /// supplied value.
810 ///
811 /// If the current capacity is less than the lower limit, this is a no-op.
812 ///
813 /// # Examples
814 ///
815 /// ```
816 /// # use intaglio::SymbolTable;
817 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
818 /// let mut table = SymbolTable::with_capacity(10);
819 /// table.intern("abc")?;
820 /// table.intern("xyz")?;
821 /// table.intern("123")?;
822 /// table.shrink_to(5);
823 /// assert!(table.capacity() >= 5);
824 /// # Ok(())
825 /// # }
826 /// # example().unwrap();
827 /// ```
828 pub fn shrink_to(&mut self, min_capacity: usize) {
829 self.map.shrink_to(min_capacity);
830 self.vec.shrink_to(min_capacity);
831 }
832}
833
834#[cfg(test)]
835#[allow(clippy::needless_pass_by_value)]
836mod tests {
837 use quickcheck::quickcheck;
838
839 use super::SymbolTable;
840
841 #[test]
842 fn alloc_drop_new() {
843 let table = SymbolTable::new();
844 drop(table);
845 }
846
847 #[test]
848 fn alloc_drop_with_capacity() {
849 let table = SymbolTable::with_capacity(1 << 14);
850 drop(table);
851 }
852
853 #[test]
854 fn drop_with_true_static_data() {
855 let mut table = SymbolTable::new();
856 table.intern("1").unwrap();
857 table.intern("2").unwrap();
858 table.intern("3").unwrap();
859 table.intern("4").unwrap();
860 table.intern("5").unwrap();
861 drop(table);
862 }
863
864 #[test]
865 fn drop_with_owned_data() {
866 let mut table = SymbolTable::new();
867 table.intern("1".to_string()).unwrap();
868 table.intern("2".to_string()).unwrap();
869 table.intern("3".to_string()).unwrap();
870 table.intern("4".to_string()).unwrap();
871 table.intern("5".to_string()).unwrap();
872 drop(table);
873 }
874
875 #[test]
876 fn set_owned_value_and_get_with_owned_and_borrowed() {
877 let mut table = SymbolTable::new();
878 // intern an owned value
879 let sym = table.intern("abc".to_string()).unwrap();
880 // retrieve string
881 assert_eq!("abc", table.get(sym).unwrap());
882 // intern owned value again
883 assert_eq!(sym, table.intern("abc".to_string()).unwrap());
884 // intern borrowed value
885 assert_eq!(sym, table.intern("abc").unwrap());
886 }
887
888 #[test]
889 fn set_borrowed_value_and_get_with_owned_and_borrowed() {
890 let mut table = SymbolTable::new();
891 // intern a borrowed value
892 let sym = table.intern("abc").unwrap();
893 // retrieve string
894 assert_eq!("abc", table.get(sym).unwrap());
895 // intern owned value
896 assert_eq!(sym, table.intern("abc".to_string()).unwrap());
897 // intern borrowed value again
898 assert_eq!(sym, table.intern("abc").unwrap());
899 }
900
901 quickcheck! {
902 fn intern_twice_symbol_equality(string: String) -> bool {
903 let mut table = SymbolTable::new();
904 let sym = table.intern(string.clone()).unwrap();
905 let sym_again = table.intern(string).unwrap();
906 sym == sym_again
907 }
908
909 fn intern_get_roundtrip(string: String) -> bool {
910 let mut table = SymbolTable::new();
911 let sym = table.intern(string.clone()).unwrap();
912 let retrieved_str = table.get(sym).unwrap();
913 string == retrieved_str
914 }
915
916 fn table_contains_sym(string: String) -> bool {
917 let mut table = SymbolTable::new();
918 let sym = table.intern(string).unwrap();
919 table.contains(sym)
920 }
921
922 fn table_does_not_contain_missing_symbol_ids(sym: u32) -> bool {
923 let table = SymbolTable::new();
924 !table.contains(sym.into())
925 }
926
927 fn empty_table_does_not_report_any_interned_strings(string: String) -> bool {
928 let table = SymbolTable::new();
929 !table.is_interned(string.as_str())
930 }
931
932 fn table_reports_interned_strings_as_interned(string: String) -> bool {
933 let mut table = SymbolTable::new();
934 table.intern(string.clone()).unwrap();
935 table.is_interned(string.as_str())
936 }
937 }
938}