1#![allow(clippy::too_many_arguments)]
4
5#[cfg(target_arch = "x86")]
6use core::arch::x86::{
7 __m128i, _mm_alignr_epi8, _mm_and_si128, _mm_cmpgt_epi8, _mm_loadu_si128, _mm_movemask_epi8,
8 _mm_or_si128, _mm_prefetch, _mm_set1_epi8, _mm_setr_epi8, _mm_setzero_si128, _mm_shuffle_epi8,
9 _mm_srli_epi16, _mm_subs_epu8, _mm_testz_si128, _mm_xor_si128, _MM_HINT_T0,
10};
11#[cfg(target_arch = "x86_64")]
12use core::arch::x86_64::{
13 __m128i, _mm_alignr_epi8, _mm_and_si128, _mm_cmpgt_epi8, _mm_loadu_si128, _mm_movemask_epi8,
14 _mm_or_si128, _mm_prefetch, _mm_set1_epi8, _mm_setr_epi8, _mm_setzero_si128, _mm_shuffle_epi8,
15 _mm_srli_epi16, _mm_subs_epu8, _mm_testz_si128, _mm_xor_si128, _MM_HINT_T0,
16};
17
18use crate::implementation::helpers::Utf8CheckAlgorithm;
19
20type SimdU8Value = crate::implementation::helpers::SimdU8Value<__m128i>;
23
24impl SimdU8Value {
25 #[target_feature(enable = "sse4.2")]
26 #[inline]
27 unsafe fn from_32_cut_off_leading(
28 _v0: u8,
29 _v1: u8,
30 _v2: u8,
31 _v3: u8,
32 _v4: u8,
33 _v5: u8,
34 _v6: u8,
35 _v7: u8,
36 _v8: u8,
37 _v9: u8,
38 _v10: u8,
39 _v11: u8,
40 _v12: u8,
41 _v13: u8,
42 _v14: u8,
43 _v15: u8,
44 v16: u8,
45 v17: u8,
46 v18: u8,
47 v19: u8,
48 v20: u8,
49 v21: u8,
50 v22: u8,
51 v23: u8,
52 v24: u8,
53 v25: u8,
54 v26: u8,
55 v27: u8,
56 v28: u8,
57 v29: u8,
58 v30: u8,
59 v31: u8,
60 ) -> Self {
61 #[allow(clippy::cast_possible_wrap)]
62 Self::from(_mm_setr_epi8(
63 v16 as i8, v17 as i8, v18 as i8, v19 as i8, v20 as i8, v21 as i8, v22 as i8, v23 as i8,
64 v24 as i8, v25 as i8, v26 as i8, v27 as i8, v28 as i8, v29 as i8, v30 as i8, v31 as i8,
65 ))
66 }
67
68 #[target_feature(enable = "sse4.2")]
69 #[inline]
70 unsafe fn repeat_16(
71 v0: u8,
72 v1: u8,
73 v2: u8,
74 v3: u8,
75 v4: u8,
76 v5: u8,
77 v6: u8,
78 v7: u8,
79 v8: u8,
80 v9: u8,
81 v10: u8,
82 v11: u8,
83 v12: u8,
84 v13: u8,
85 v14: u8,
86 v15: u8,
87 ) -> Self {
88 #[allow(clippy::cast_possible_wrap)]
89 Self::from(_mm_setr_epi8(
90 v0 as i8, v1 as i8, v2 as i8, v3 as i8, v4 as i8, v5 as i8, v6 as i8, v7 as i8,
91 v8 as i8, v9 as i8, v10 as i8, v11 as i8, v12 as i8, v13 as i8, v14 as i8, v15 as i8,
92 ))
93 }
94
95 #[target_feature(enable = "sse4.2")]
96 #[inline]
97 unsafe fn load_from(ptr: *const u8) -> Self {
98 #[allow(clippy::cast_ptr_alignment)]
99 Self::from(_mm_loadu_si128(ptr.cast::<__m128i>()))
100 }
101
102 #[target_feature(enable = "sse4.2")]
103 #[inline]
104 unsafe fn lookup_16(
105 self,
106 v0: u8,
107 v1: u8,
108 v2: u8,
109 v3: u8,
110 v4: u8,
111 v5: u8,
112 v6: u8,
113 v7: u8,
114 v8: u8,
115 v9: u8,
116 v10: u8,
117 v11: u8,
118 v12: u8,
119 v13: u8,
120 v14: u8,
121 v15: u8,
122 ) -> Self {
123 Self::from(_mm_shuffle_epi8(
124 Self::repeat_16(
125 v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15,
126 )
127 .0,
128 self.0,
129 ))
130 }
131
132 #[target_feature(enable = "sse4.2")]
133 #[inline]
134 unsafe fn splat(val: u8) -> Self {
135 #[allow(clippy::cast_possible_wrap)]
136 Self::from(_mm_set1_epi8(val as i8))
137 }
138
139 #[target_feature(enable = "sse4.2")]
140 #[inline]
141 unsafe fn splat0() -> Self {
142 Self::from(_mm_setzero_si128())
143 }
144
145 #[target_feature(enable = "sse4.2")]
146 #[inline]
147 unsafe fn or(self, b: Self) -> Self {
148 Self::from(_mm_or_si128(self.0, b.0))
149 }
150
151 #[target_feature(enable = "sse4.2")]
152 #[inline]
153 unsafe fn and(self, b: Self) -> Self {
154 Self::from(_mm_and_si128(self.0, b.0))
155 }
156
157 #[target_feature(enable = "sse4.2")]
158 #[inline]
159 unsafe fn xor(self, b: Self) -> Self {
160 Self::from(_mm_xor_si128(self.0, b.0))
161 }
162
163 #[target_feature(enable = "sse4.2")]
164 #[inline]
165 unsafe fn saturating_sub(self, b: Self) -> Self {
166 Self::from(_mm_subs_epu8(self.0, b.0))
167 }
168
169 #[target_feature(enable = "sse4.2")]
171 #[inline]
172 unsafe fn shr4(self) -> Self {
173 Self::from(_mm_srli_epi16(self.0, 4)).and(Self::splat(0xFF >> 4))
174 }
175
176 #[target_feature(enable = "sse4.2")]
178 #[inline]
179 unsafe fn prev1(self, prev: Self) -> Self {
180 Self::from(_mm_alignr_epi8(self.0, prev.0, 16 - 1))
181 }
182
183 #[target_feature(enable = "sse4.2")]
185 #[inline]
186 unsafe fn prev2(self, prev: Self) -> Self {
187 Self::from(_mm_alignr_epi8(self.0, prev.0, 16 - 2))
188 }
189
190 #[target_feature(enable = "sse4.2")]
192 #[inline]
193 unsafe fn prev3(self, prev: Self) -> Self {
194 Self::from(_mm_alignr_epi8(self.0, prev.0, 16 - 3))
195 }
196
197 #[target_feature(enable = "sse4.2")]
198 #[inline]
199 unsafe fn signed_gt(self, other: Self) -> Self {
200 Self::from(_mm_cmpgt_epi8(self.0, other.0))
201 }
202
203 #[target_feature(enable = "sse4.2")]
204 #[inline]
205 unsafe fn any_bit_set(self) -> bool {
206 _mm_testz_si128(self.0, self.0) != 1
207 }
208
209 #[target_feature(enable = "sse4.2")]
210 #[inline]
211 unsafe fn is_ascii(self) -> bool {
212 _mm_movemask_epi8(self.0) == 0
213 }
214}
215
216impl From<__m128i> for SimdU8Value {
217 #[inline]
218 fn from(val: __m128i) -> Self {
219 Self(val)
220 }
221}
222
223impl Utf8CheckAlgorithm<SimdU8Value> {
224 #[target_feature(enable = "sse4.2")]
225 #[inline]
226 unsafe fn must_be_2_3_continuation(prev2: SimdU8Value, prev3: SimdU8Value) -> SimdU8Value {
227 let is_third_byte = prev2.saturating_sub(SimdU8Value::splat(0b1110_0000 - 1));
228 let is_fourth_byte = prev3.saturating_sub(SimdU8Value::splat(0b1111_0000 - 1));
229
230 is_third_byte
231 .or(is_fourth_byte)
232 .signed_gt(SimdU8Value::splat0())
233 }
234}
235
236#[target_feature(enable = "sse4.2")]
237#[inline]
238unsafe fn simd_prefetch(ptr: *const u8) {
239 _mm_prefetch(ptr.cast::<i8>(), _MM_HINT_T0);
240}
241
242const PREFETCH: bool = false;
243use crate::implementation::helpers::TempSimdChunkA16 as TempSimdChunk;
244simd_input_128_bit!("sse4.2");
245algorithm_simd!("sse4.2");