simdutf8/implementation/
algorithm.rs

1/// Macros requires newtypes in scope:
2/// `SimdU8Value` - implementation of SIMD primitives
3/// `SimdInput` - which  holds 64 bytes of SIMD input
4/// `TempSimdChunk` - correctly aligned `TempSimdChunk`, either `TempSimdChunkA16` or `TempSimdChunkA32`
5
6macro_rules! algorithm_simd {
7    ($feat:expr) => {
8        use crate::{basic, compat};
9
10        impl Utf8CheckAlgorithm<SimdU8Value> {
11            #[cfg_attr(not(target_arch="aarch64"), target_feature(enable = $feat))]
12            #[inline]
13            unsafe fn default() -> Self {
14                Self {
15                    prev: SimdU8Value::splat0(),
16                    incomplete: SimdU8Value::splat0(),
17                    error: SimdU8Value::splat0(),
18                }
19            }
20
21            #[cfg_attr(not(target_arch="aarch64"), target_feature(enable = $feat))]
22            #[inline]
23            unsafe fn check_incomplete_pending(&mut self) {
24                self.error = self.error.or(self.incomplete);
25            }
26
27            #[cfg_attr(not(target_arch="aarch64"), target_feature(enable = $feat))]
28            #[inline]
29            unsafe fn is_incomplete(input: SimdU8Value) -> SimdU8Value {
30                input.saturating_sub(SimdU8Value::from_32_cut_off_leading(
31                    0xff,
32                    0xff,
33                    0xff,
34                    0xff,
35                    0xff,
36                    0xff,
37                    0xff,
38                    0xff,
39                    0xff,
40                    0xff,
41                    0xff,
42                    0xff,
43                    0xff,
44                    0xff,
45                    0xff,
46                    0xff,
47                    0xff,
48                    0xff,
49                    0xff,
50                    0xff,
51                    0xff,
52                    0xff,
53                    0xff,
54                    0xff,
55                    0xff,
56                    0xff,
57                    0xff,
58                    0xff,
59                    0xff,
60                    0b1111_0000 - 1,
61                    0b1110_0000 - 1,
62                    0b1100_0000 - 1,
63                ))
64            }
65
66            #[cfg_attr(not(target_arch="aarch64"), target_feature(enable = $feat))]
67            #[inline]
68            #[allow(clippy::too_many_lines)]
69            unsafe fn check_special_cases(input: SimdU8Value, prev1: SimdU8Value) -> SimdU8Value {
70                const TOO_SHORT: u8 = 1 << 0;
71                const TOO_LONG: u8 = 1 << 1;
72                const OVERLONG_3: u8 = 1 << 2;
73                const SURROGATE: u8 = 1 << 4;
74                const OVERLONG_2: u8 = 1 << 5;
75                const TWO_CONTS: u8 = 1 << 7;
76                const TOO_LARGE: u8 = 1 << 3;
77                const TOO_LARGE_1000: u8 = 1 << 6;
78                const OVERLONG_4: u8 = 1 << 6;
79                const CARRY: u8 = TOO_SHORT | TOO_LONG | TWO_CONTS;
80
81                let byte_1_high = prev1.shr4().lookup_16(
82                    TOO_LONG,
83                    TOO_LONG,
84                    TOO_LONG,
85                    TOO_LONG,
86                    TOO_LONG,
87                    TOO_LONG,
88                    TOO_LONG,
89                    TOO_LONG,
90                    TWO_CONTS,
91                    TWO_CONTS,
92                    TWO_CONTS,
93                    TWO_CONTS,
94                    TOO_SHORT | OVERLONG_2,
95                    TOO_SHORT,
96                    TOO_SHORT | OVERLONG_3 | SURROGATE,
97                    TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4,
98                );
99
100                let byte_1_low = prev1.and(SimdU8Value::splat(0x0F)).lookup_16(
101                    CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4,
102                    CARRY | OVERLONG_2,
103                    CARRY,
104                    CARRY,
105                    CARRY | TOO_LARGE,
106                    CARRY | TOO_LARGE | TOO_LARGE_1000,
107                    CARRY | TOO_LARGE | TOO_LARGE_1000,
108                    CARRY | TOO_LARGE | TOO_LARGE_1000,
109                    CARRY | TOO_LARGE | TOO_LARGE_1000,
110                    CARRY | TOO_LARGE | TOO_LARGE_1000,
111                    CARRY | TOO_LARGE | TOO_LARGE_1000,
112                    CARRY | TOO_LARGE | TOO_LARGE_1000,
113                    CARRY | TOO_LARGE | TOO_LARGE_1000,
114                    CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE,
115                    CARRY | TOO_LARGE | TOO_LARGE_1000,
116                    CARRY | TOO_LARGE | TOO_LARGE_1000,
117                );
118
119                let byte_2_high = input.shr4().lookup_16(
120                    TOO_SHORT,
121                    TOO_SHORT,
122                    TOO_SHORT,
123                    TOO_SHORT,
124                    TOO_SHORT,
125                    TOO_SHORT,
126                    TOO_SHORT,
127                    TOO_SHORT,
128                    TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4,
129                    TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
130                    TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE,
131                    TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE,
132                    TOO_SHORT,
133                    TOO_SHORT,
134                    TOO_SHORT,
135                    TOO_SHORT,
136                );
137
138                byte_1_high.and(byte_1_low).and(byte_2_high)
139            }
140
141            #[cfg_attr(not(target_arch="aarch64"), target_feature(enable = $feat))]
142            #[inline]
143            unsafe fn check_multibyte_lengths(
144                input: SimdU8Value,
145                prev: SimdU8Value,
146                special_cases: SimdU8Value,
147            ) -> SimdU8Value {
148                let prev2 = input.prev2(prev);
149                let prev3 = input.prev3(prev);
150                let must23 = Self::must_be_2_3_continuation(prev2, prev3);
151                let must23_80 = must23.and(SimdU8Value::splat(0x80));
152                must23_80.xor(special_cases)
153            }
154
155            #[cfg_attr(not(target_arch="aarch64"), target_feature(enable = $feat))]
156            #[inline]
157            unsafe fn has_error(&self) -> bool {
158                self.error.any_bit_set()
159            }
160
161            #[cfg_attr(not(target_arch="aarch64"), target_feature(enable = $feat))]
162            #[inline]
163            unsafe fn check_bytes(&mut self, input: SimdU8Value) {
164                let prev1 = input.prev1(self.prev);
165                let sc = Self::check_special_cases(input, prev1);
166                self.error = self
167                    .error
168                    .or(Self::check_multibyte_lengths(input, self.prev, sc));
169                self.prev = input;
170            }
171
172            #[cfg_attr(not(target_arch="aarch64"), target_feature(enable = $feat))]
173            #[inline]
174            unsafe fn check_utf8(&mut self, input: SimdInput) {
175                if input.is_ascii() {
176                    self.check_incomplete_pending();
177                } else {
178                    self.check_block(input);
179                }
180            }
181
182            #[cfg_attr(not(target_arch="aarch64"), target_feature(enable = $feat))]
183            #[inline]
184            unsafe fn check_block(&mut self, input: SimdInput) {
185                // WORKAROUND
186                // necessary because the for loop is not unrolled on ARM64
187                if input.vals.len() == 2 {
188                    self.check_bytes(*input.vals.get_unchecked(0));
189                    self.check_bytes(*input.vals.get_unchecked(1));
190                    self.incomplete = Self::is_incomplete(*input.vals.get_unchecked(1));
191                } else if input.vals.len() == 4 {
192                    self.check_bytes(*input.vals.get_unchecked(0));
193                    self.check_bytes(*input.vals.get_unchecked(1));
194                    self.check_bytes(*input.vals.get_unchecked(2));
195                    self.check_bytes(*input.vals.get_unchecked(3));
196                    self.incomplete = Self::is_incomplete(*input.vals.get_unchecked(3));
197                } else {
198                    panic!("Unsupported number of chunks");
199                }
200            }
201        }
202
203        /// Validation implementation for CPUs supporting the SIMD extension (see module).
204        ///
205        /// # Errors
206        /// Returns the zero-sized [`basic::Utf8Error`] on failure.
207        ///
208        /// # Safety
209        /// This function is inherently unsafe because it is compiled with SIMD extensions
210        /// enabled. Make sure that the CPU supports it before calling.
211        ///
212        #[cfg_attr(not(target_arch="aarch64"), target_feature(enable = $feat))]
213        #[inline]
214        pub unsafe fn validate_utf8_basic(
215            input: &[u8],
216        ) -> core::result::Result<(), basic::Utf8Error> {
217            use crate::implementation::helpers::SIMD_CHUNK_SIZE;
218            let len = input.len();
219            let mut algorithm = Utf8CheckAlgorithm::<SimdU8Value>::default();
220            let mut idx: usize = 0;
221            let iter_lim = len - (len % SIMD_CHUNK_SIZE);
222
223            while idx < iter_lim {
224                let simd_input = SimdInput::new(input.get_unchecked(idx as usize..));
225                idx += SIMD_CHUNK_SIZE;
226                if !simd_input.is_ascii() {
227                    algorithm.check_block(simd_input);
228                    break;
229                }
230            }
231
232            while idx < iter_lim {
233                if PREFETCH {
234                    simd_prefetch(input.as_ptr().add(idx + SIMD_CHUNK_SIZE * 2));
235                }
236                let input = SimdInput::new(input.get_unchecked(idx as usize..));
237                algorithm.check_utf8(input);
238                idx += SIMD_CHUNK_SIZE;
239            }
240
241            if idx < len {
242                let mut tmpbuf = TempSimdChunk::new();
243                crate::implementation::helpers::memcpy_unaligned_nonoverlapping_inline_opt_lt_64(
244                    input.as_ptr().add(idx),
245                    tmpbuf.0.as_mut_ptr(),
246                    len - idx,
247                );
248                let simd_input = SimdInput::new(&tmpbuf.0);
249                algorithm.check_utf8(simd_input);
250            }
251            algorithm.check_incomplete_pending();
252            if algorithm.has_error() {
253                Err(basic::Utf8Error {})
254            } else {
255                Ok(())
256            }
257        }
258
259        /// Validation implementation for CPUs supporting the SIMD extension (see module).
260        ///
261        /// # Errors
262        /// Returns [`compat::Utf8Error`] with detailed error information on failure.
263        ///
264        /// # Safety
265        /// This function is inherently unsafe because it is compiled with SIMD extensions
266        /// enabled. Make sure that the CPU supports it before calling.
267        ///
268        #[cfg_attr(not(target_arch="aarch64"), target_feature(enable = $feat))]
269        #[inline]
270        pub unsafe fn validate_utf8_compat(
271            input: &[u8],
272        ) -> core::result::Result<(), compat::Utf8Error> {
273            validate_utf8_compat_simd0(input)
274                .map_err(|idx| crate::implementation::helpers::get_compat_error(input, idx))
275        }
276
277        #[cfg_attr(not(target_arch="aarch64"), target_feature(enable = $feat))]
278        #[inline]
279        unsafe fn validate_utf8_compat_simd0(input: &[u8]) -> core::result::Result<(), usize> {
280            use crate::implementation::helpers::SIMD_CHUNK_SIZE;
281            let len = input.len();
282            let mut algorithm = Utf8CheckAlgorithm::<SimdU8Value>::default();
283            let mut idx: usize = 0;
284            let mut only_ascii = true;
285            let iter_lim = len - (len % SIMD_CHUNK_SIZE);
286
287            'outer: loop {
288                if only_ascii {
289                    while idx < iter_lim {
290                        let simd_input = SimdInput::new(input.get_unchecked(idx as usize..));
291                        if !simd_input.is_ascii() {
292                            algorithm.check_block(simd_input);
293                            if algorithm.has_error() {
294                                return Err(idx);
295                            } else {
296                                only_ascii = false;
297                                idx += SIMD_CHUNK_SIZE;
298                                continue 'outer;
299                            }
300                        }
301                        idx += SIMD_CHUNK_SIZE;
302                    }
303                } else {
304                    while idx < iter_lim {
305                        if PREFETCH {
306                            simd_prefetch(input.as_ptr().add(idx + SIMD_CHUNK_SIZE * 2));
307                        }
308                        let simd_input = SimdInput::new(input.get_unchecked(idx as usize..));
309                        if simd_input.is_ascii() {
310                            algorithm.check_incomplete_pending();
311                            if algorithm.has_error() {
312                                return Err(idx);
313                            } else {
314                                // we are in pure ASCII territory again
315                                only_ascii = true;
316                                idx += SIMD_CHUNK_SIZE;
317                                continue 'outer;
318                            }
319                        } else {
320                            algorithm.check_block(simd_input);
321                            if algorithm.has_error() {
322                                return Err(idx);
323                            }
324                        }
325                        idx += SIMD_CHUNK_SIZE;
326                    }
327                }
328                break;
329            }
330            if idx < len {
331                let mut tmpbuf = TempSimdChunk::new();
332                crate::implementation::helpers::memcpy_unaligned_nonoverlapping_inline_opt_lt_64(
333                    input.as_ptr().add(idx),
334                    tmpbuf.0.as_mut_ptr(),
335                    len - idx,
336                );
337                let simd_input = SimdInput::new(&tmpbuf.0);
338
339                algorithm.check_utf8(simd_input);
340            }
341            algorithm.check_incomplete_pending();
342            if algorithm.has_error() {
343                Err(idx)
344            } else {
345                Ok(())
346            }
347        }
348
349        /// Low-level implementation of the [`basic::imp::Utf8Validator`] trait.
350        ///
351        /// This is implementation requires CPU SIMD features specified by the module it resides in.
352        /// It is undefined behavior to call it if the required CPU features are not
353        /// available.
354        #[cfg(feature = "public_imp")]
355        pub struct Utf8ValidatorImp {
356            algorithm: Utf8CheckAlgorithm<SimdU8Value>,
357            incomplete_data: [u8; 64],
358            incomplete_len: usize,
359        }
360
361        #[cfg(feature = "public_imp")]
362        impl Utf8ValidatorImp {
363            #[cfg_attr(not(target_arch="aarch64"), target_feature(enable = $feat))]
364            #[inline]
365            unsafe fn update_from_incomplete_data(&mut self) {
366                let simd_input = SimdInput::new(&self.incomplete_data);
367                self.algorithm.check_utf8(simd_input);
368                self.incomplete_len = 0;
369            }
370        }
371
372        #[cfg(feature = "public_imp")]
373        impl basic::imp::Utf8Validator for Utf8ValidatorImp {
374            #[cfg_attr(not(target_arch="aarch64"), target_feature(enable = $feat))]
375            #[inline]
376            #[must_use]
377            unsafe fn new() -> Self {
378                Self {
379                    algorithm: Utf8CheckAlgorithm::<SimdU8Value>::default(),
380                    incomplete_data: [0; 64],
381                    incomplete_len: 0,
382                }
383            }
384
385            #[cfg_attr(not(target_arch="aarch64"), target_feature(enable = $feat))]
386            #[inline]
387            unsafe fn update(&mut self, mut input: &[u8]) {
388                use crate::implementation::helpers::SIMD_CHUNK_SIZE;
389                if input.is_empty() {
390                    return;
391                }
392                if self.incomplete_len != 0 {
393                    let to_copy =
394                        core::cmp::min(SIMD_CHUNK_SIZE - self.incomplete_len, input.len());
395                    self.incomplete_data
396                        .as_mut_ptr()
397                        .add(self.incomplete_len)
398                        .copy_from_nonoverlapping(input.as_ptr(), to_copy);
399                    if self.incomplete_len + to_copy == SIMD_CHUNK_SIZE {
400                        self.update_from_incomplete_data();
401                        input = &input[to_copy..];
402                    } else {
403                        self.incomplete_len += to_copy;
404                        return;
405                    }
406                }
407                let len = input.len();
408                let mut idx: usize = 0;
409                let iter_lim = len - (len % SIMD_CHUNK_SIZE);
410                while idx < iter_lim {
411                    let input = SimdInput::new(input.get_unchecked(idx as usize..));
412                    self.algorithm.check_utf8(input);
413                    idx += SIMD_CHUNK_SIZE;
414                }
415                if idx < len {
416                    let to_copy = len - idx;
417                    self.incomplete_data
418                        .as_mut_ptr()
419                        .copy_from_nonoverlapping(input.as_ptr().add(idx), to_copy);
420                    self.incomplete_len = to_copy;
421                }
422            }
423
424            #[cfg_attr(not(target_arch="aarch64"), target_feature(enable = $feat))]
425            #[inline]
426            unsafe fn finalize(mut self) -> core::result::Result<(), basic::Utf8Error> {
427                if self.incomplete_len != 0 {
428                    for i in &mut self.incomplete_data[self.incomplete_len..] {
429                        *i = 0;
430                    }
431                    self.update_from_incomplete_data();
432                }
433                self.algorithm.check_incomplete_pending();
434                if self.algorithm.has_error() {
435                    Err(basic::Utf8Error {})
436                } else {
437                    Ok(())
438                }
439            }
440        }
441
442        /// Low-level implementation of the [`basic::imp::ChunkedUtf8Validator`] trait.
443        ///
444        /// This is implementation requires CPU SIMD features specified by the module it resides in.
445        /// It is undefined behavior to call it if the required CPU features are not
446        /// available.
447        #[cfg(feature = "public_imp")]
448        pub struct ChunkedUtf8ValidatorImp {
449            algorithm: Utf8CheckAlgorithm<SimdU8Value>,
450        }
451
452        #[cfg(feature = "public_imp")]
453        impl basic::imp::ChunkedUtf8Validator for ChunkedUtf8ValidatorImp {
454            #[cfg_attr(not(target_arch="aarch64"), target_feature(enable = $feat))]
455            #[inline]
456            #[must_use]
457            unsafe fn new() -> Self {
458                Self {
459                    algorithm: Utf8CheckAlgorithm::<SimdU8Value>::default(),
460                }
461            }
462
463            #[cfg_attr(not(target_arch="aarch64"), target_feature(enable = $feat))]
464            #[inline]
465            unsafe fn update_from_chunks(&mut self, input: &[u8]) {
466                use crate::implementation::helpers::SIMD_CHUNK_SIZE;
467
468                assert!(
469                    input.len() % SIMD_CHUNK_SIZE == 0,
470                    "Input size must be a multiple of 64."
471                );
472                for chunk in input.chunks_exact(SIMD_CHUNK_SIZE) {
473                    let input = SimdInput::new(chunk);
474                    self.algorithm.check_utf8(input);
475                }
476            }
477
478            #[cfg_attr(not(target_arch="aarch64"), target_feature(enable = $feat))]
479            #[inline]
480            unsafe fn finalize(
481                mut self,
482                remaining_input: core::option::Option<&[u8]>,
483            ) -> core::result::Result<(), basic::Utf8Error> {
484                use crate::implementation::helpers::SIMD_CHUNK_SIZE;
485
486                if let Some(mut remaining_input) = remaining_input {
487                    if !remaining_input.is_empty() {
488                        let len = remaining_input.len();
489                        let chunks_lim = len - (len % SIMD_CHUNK_SIZE);
490                        if chunks_lim > 0 {
491                            self.update_from_chunks(&remaining_input[..chunks_lim]);
492                        }
493                        let rem = len - chunks_lim;
494                        if rem > 0 {
495                            remaining_input = &remaining_input[chunks_lim..];
496                            let mut tmpbuf = TempSimdChunk::new();
497                            tmpbuf.0.as_mut_ptr().copy_from_nonoverlapping(
498                                remaining_input.as_ptr(),
499                                remaining_input.len(),
500                            );
501                            let simd_input = SimdInput::new(&tmpbuf.0);
502                            self.algorithm.check_utf8(simd_input);
503                        }
504                    }
505                }
506                self.algorithm.check_incomplete_pending();
507                if self.algorithm.has_error() {
508                    Err(basic::Utf8Error {})
509                } else {
510                    Ok(())
511                }
512            }
513        }
514    };
515}
516
517macro_rules! simd_input_128_bit {
518    ($feat:expr) => {
519        #[repr(C)]
520        struct SimdInput {
521            vals: [SimdU8Value; 4],
522        }
523
524        impl SimdInput {
525            #[cfg_attr(not(target_arch="aarch64"), target_feature(enable = $feat))]
526            #[inline]
527            #[allow(clippy::cast_ptr_alignment)]
528            unsafe fn new(ptr: &[u8]) -> Self {
529                Self {
530                    vals: [
531                        SimdU8Value::load_from(ptr.as_ptr()),
532                        SimdU8Value::load_from(ptr.as_ptr().add(16)),
533                        SimdU8Value::load_from(ptr.as_ptr().add(32)),
534                        SimdU8Value::load_from(ptr.as_ptr().add(48)),
535                    ],
536                }
537            }
538
539            #[cfg_attr(not(target_arch="aarch64"), target_feature(enable = $feat))]
540            #[inline]
541            unsafe fn is_ascii(&self) -> bool {
542                let r1 = self.vals[0].or(self.vals[1]);
543                let r2 = self.vals[2].or(self.vals[3]);
544                let r = r1.or(r2);
545                r.is_ascii()
546            }
547        }
548    };
549}
550
551macro_rules! simd_input_256_bit {
552    ($feat:expr) => {
553        #[repr(C)]
554        struct SimdInput {
555            vals: [SimdU8Value; 2],
556        }
557
558        impl SimdInput {
559            #[cfg_attr(not(target_arch="aarch64"), target_feature(enable = $feat))]
560            #[inline]
561            #[allow(clippy::cast_ptr_alignment)]
562            unsafe fn new(ptr: &[u8]) -> Self {
563                Self {
564                    vals: [
565                        SimdU8Value::load_from(ptr.as_ptr()),
566                        SimdU8Value::load_from(ptr.as_ptr().add(32)),
567                    ],
568                }
569            }
570
571            #[cfg_attr(not(target_arch="aarch64"), target_feature(enable = $feat))]
572            #[inline]
573            unsafe fn is_ascii(&self) -> bool {
574                self.vals[0].or(self.vals[1]).is_ascii()
575            }
576        }
577    };
578}