base64/
alphabet.rs

1//! Provides [Alphabet] and constants for alphabets commonly used in the wild.
2
3use crate::PAD_BYTE;
4use core::{convert, fmt};
5#[cfg(any(feature = "std", test))]
6use std::error;
7
8const ALPHABET_SIZE: usize = 64;
9
10/// An alphabet defines the 64 ASCII characters (symbols) used for base64.
11///
12/// Common alphabets are provided as constants, and custom alphabets
13/// can be made via `from_str` or the `TryFrom<str>` implementation.
14///
15/// # Examples
16///
17/// Building and using a custom Alphabet:
18///
19/// ```
20/// let custom = base64::alphabet::Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap();
21///
22/// let engine = base64::engine::GeneralPurpose::new(
23///     &custom,
24///     base64::engine::general_purpose::PAD);
25/// ```
26///
27/// Building a const:
28///
29/// ```
30/// use base64::alphabet::Alphabet;
31///
32/// static CUSTOM: Alphabet = {
33///     // Result::unwrap() isn't const yet, but panic!() is OK
34///     match Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") {
35///         Ok(x) => x,
36///         Err(_) => panic!("creation of alphabet failed"),
37///     }
38/// };
39/// ```
40///
41/// Building lazily:
42///
43/// ```
44/// use base64::{
45///     alphabet::Alphabet,
46///     engine::{general_purpose::GeneralPurpose, GeneralPurposeConfig},
47/// };
48/// use once_cell::sync::Lazy;
49///
50/// static CUSTOM: Lazy<Alphabet> = Lazy::new(||
51///     Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap()
52/// );
53/// ```
54#[derive(Clone, Debug, Eq, PartialEq)]
55pub struct Alphabet {
56    pub(crate) symbols: [u8; ALPHABET_SIZE],
57}
58
59impl Alphabet {
60    /// Performs no checks so that it can be const.
61    /// Used only for known-valid strings.
62    const fn from_str_unchecked(alphabet: &str) -> Self {
63        let mut symbols = [0_u8; ALPHABET_SIZE];
64        let source_bytes = alphabet.as_bytes();
65
66        // a way to copy that's allowed in const fn
67        let mut index = 0;
68        while index < ALPHABET_SIZE {
69            symbols[index] = source_bytes[index];
70            index += 1;
71        }
72
73        Self { symbols }
74    }
75
76    /// Create an `Alphabet` from a string of 64 unique printable ASCII bytes.
77    ///
78    /// The `=` byte is not allowed as it is used for padding.
79    pub const fn new(alphabet: &str) -> Result<Self, ParseAlphabetError> {
80        let bytes = alphabet.as_bytes();
81        if bytes.len() != ALPHABET_SIZE {
82            return Err(ParseAlphabetError::InvalidLength);
83        }
84
85        {
86            let mut index = 0;
87            while index < ALPHABET_SIZE {
88                let byte = bytes[index];
89
90                // must be ascii printable. 127 (DEL) is commonly considered printable
91                // for some reason but clearly unsuitable for base64.
92                if !(byte >= 32_u8 && byte <= 126_u8) {
93                    return Err(ParseAlphabetError::UnprintableByte(byte));
94                }
95                // = is assumed to be padding, so cannot be used as a symbol
96                if byte == PAD_BYTE {
97                    return Err(ParseAlphabetError::ReservedByte(byte));
98                }
99
100                // Check for duplicates while staying within what const allows.
101                // It's n^2, but only over 64 hot bytes, and only once, so it's likely in the single digit
102                // microsecond range.
103
104                let mut probe_index = 0;
105                while probe_index < ALPHABET_SIZE {
106                    if probe_index == index {
107                        probe_index += 1;
108                        continue;
109                    }
110
111                    let probe_byte = bytes[probe_index];
112
113                    if byte == probe_byte {
114                        return Err(ParseAlphabetError::DuplicatedByte(byte));
115                    }
116
117                    probe_index += 1;
118                }
119
120                index += 1;
121            }
122        }
123
124        Ok(Self::from_str_unchecked(alphabet))
125    }
126
127    /// Create a `&str` from the symbols in the `Alphabet`
128    pub fn as_str(&self) -> &str {
129        core::str::from_utf8(&self.symbols).unwrap()
130    }
131}
132
133impl convert::TryFrom<&str> for Alphabet {
134    type Error = ParseAlphabetError;
135
136    fn try_from(value: &str) -> Result<Self, Self::Error> {
137        Self::new(value)
138    }
139}
140
141/// Possible errors when constructing an [Alphabet] from a `str`.
142#[derive(Debug, Eq, PartialEq)]
143pub enum ParseAlphabetError {
144    /// Alphabets must be 64 ASCII bytes
145    InvalidLength,
146    /// All bytes must be unique
147    DuplicatedByte(u8),
148    /// All bytes must be printable (in the range `[32, 126]`).
149    UnprintableByte(u8),
150    /// `=` cannot be used
151    ReservedByte(u8),
152}
153
154impl fmt::Display for ParseAlphabetError {
155    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
156        match self {
157            Self::InvalidLength => write!(f, "Invalid length - must be 64 bytes"),
158            Self::DuplicatedByte(b) => write!(f, "Duplicated byte: {:#04x}", b),
159            Self::UnprintableByte(b) => write!(f, "Unprintable byte: {:#04x}", b),
160            Self::ReservedByte(b) => write!(f, "Reserved byte: {:#04x}", b),
161        }
162    }
163}
164
165#[cfg(any(feature = "std", test))]
166impl error::Error for ParseAlphabetError {}
167
168/// The standard alphabet (with `+` and `/`) specified in [RFC 4648][].
169///
170/// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-4
171pub const STANDARD: Alphabet = Alphabet::from_str_unchecked(
172    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
173);
174
175/// The URL-safe alphabet (with `-` and `_`) specified in [RFC 4648][].
176///
177/// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-5
178pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked(
179    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
180);
181
182/// The `crypt(3)` alphabet (with `.` and `/` as the _first_ two characters).
183///
184/// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses.
185pub const CRYPT: Alphabet = Alphabet::from_str_unchecked(
186    "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
187);
188
189/// The bcrypt alphabet.
190pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked(
191    "./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
192);
193
194/// The alphabet used in IMAP-modified UTF-7 (with `+` and `,`).
195///
196/// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3)
197pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked(
198    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
199);
200
201/// The alphabet used in BinHex 4.0 files.
202///
203/// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt)
204pub const BIN_HEX: Alphabet = Alphabet::from_str_unchecked(
205    "!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr",
206);
207
208#[cfg(test)]
209mod tests {
210    use crate::alphabet::*;
211    use core::convert::TryFrom as _;
212
213    #[test]
214    fn detects_duplicate_start() {
215        assert_eq!(
216            ParseAlphabetError::DuplicatedByte(b'A'),
217            Alphabet::new("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
218                .unwrap_err()
219        );
220    }
221
222    #[test]
223    fn detects_duplicate_end() {
224        assert_eq!(
225            ParseAlphabetError::DuplicatedByte(b'/'),
226            Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//")
227                .unwrap_err()
228        );
229    }
230
231    #[test]
232    fn detects_duplicate_middle() {
233        assert_eq!(
234            ParseAlphabetError::DuplicatedByte(b'Z'),
235            Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/")
236                .unwrap_err()
237        );
238    }
239
240    #[test]
241    fn detects_length() {
242        assert_eq!(
243            ParseAlphabetError::InvalidLength,
244            Alphabet::new(
245                "xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/",
246            )
247            .unwrap_err()
248        );
249    }
250
251    #[test]
252    fn detects_padding() {
253        assert_eq!(
254            ParseAlphabetError::ReservedByte(b'='),
255            Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=")
256                .unwrap_err()
257        );
258    }
259
260    #[test]
261    fn detects_unprintable() {
262        // form feed
263        assert_eq!(
264            ParseAlphabetError::UnprintableByte(0xc),
265            Alphabet::new("\x0cBCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
266                .unwrap_err()
267        );
268    }
269
270    #[test]
271    fn same_as_unchecked() {
272        assert_eq!(
273            STANDARD,
274            Alphabet::try_from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
275                .unwrap()
276        );
277    }
278
279    #[test]
280    fn str_same_as_input() {
281        let alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
282        let a = Alphabet::try_from(alphabet).unwrap();
283        assert_eq!(alphabet, a.as_str())
284    }
285}