ppv_lite86/
soft.rs

1//! Implement 256- and 512- bit in terms of 128-bit, for machines without native wide SIMD.
2
3use crate::types::*;
4use crate::{vec128_storage, vec256_storage, vec512_storage};
5use core::marker::PhantomData;
6use core::ops::*;
7use zerocopy::{AsBytes, FromBytes, FromZeroes};
8
9#[derive(Copy, Clone, Default, FromBytes, AsBytes, FromZeroes)]
10#[repr(transparent)]
11#[allow(non_camel_case_types)]
12pub struct x2<W, G>(pub [W; 2], PhantomData<G>);
13impl<W, G> x2<W, G> {
14    #[inline(always)]
15    pub fn new(xs: [W; 2]) -> Self {
16        x2(xs, PhantomData)
17    }
18}
19macro_rules! fwd_binop_x2 {
20    ($trait:ident, $fn:ident) => {
21        impl<W: $trait + Copy, G> $trait for x2<W, G> {
22            type Output = x2<W::Output, G>;
23            #[inline(always)]
24            fn $fn(self, rhs: Self) -> Self::Output {
25                x2::new([self.0[0].$fn(rhs.0[0]), self.0[1].$fn(rhs.0[1])])
26            }
27        }
28    };
29}
30macro_rules! fwd_binop_assign_x2 {
31    ($trait:ident, $fn_assign:ident) => {
32        impl<W: $trait + Copy, G> $trait for x2<W, G> {
33            #[inline(always)]
34            fn $fn_assign(&mut self, rhs: Self) {
35                (self.0[0]).$fn_assign(rhs.0[0]);
36                (self.0[1]).$fn_assign(rhs.0[1]);
37            }
38        }
39    };
40}
41macro_rules! fwd_unop_x2 {
42    ($fn:ident) => {
43        #[inline(always)]
44        fn $fn(self) -> Self {
45            x2::new([self.0[0].$fn(), self.0[1].$fn()])
46        }
47    };
48}
49impl<W, G> RotateEachWord32 for x2<W, G>
50where
51    W: Copy + RotateEachWord32,
52{
53    fwd_unop_x2!(rotate_each_word_right7);
54    fwd_unop_x2!(rotate_each_word_right8);
55    fwd_unop_x2!(rotate_each_word_right11);
56    fwd_unop_x2!(rotate_each_word_right12);
57    fwd_unop_x2!(rotate_each_word_right16);
58    fwd_unop_x2!(rotate_each_word_right20);
59    fwd_unop_x2!(rotate_each_word_right24);
60    fwd_unop_x2!(rotate_each_word_right25);
61}
62impl<W, G> RotateEachWord64 for x2<W, G>
63where
64    W: Copy + RotateEachWord64,
65{
66    fwd_unop_x2!(rotate_each_word_right32);
67}
68impl<W, G> RotateEachWord128 for x2<W, G> where W: RotateEachWord128 {}
69impl<W, G> BitOps0 for x2<W, G>
70where
71    W: BitOps0,
72    G: Copy,
73{
74}
75impl<W, G> BitOps32 for x2<W, G>
76where
77    W: BitOps32 + BitOps0,
78    G: Copy,
79{
80}
81impl<W, G> BitOps64 for x2<W, G>
82where
83    W: BitOps64 + BitOps0,
84    G: Copy,
85{
86}
87impl<W, G> BitOps128 for x2<W, G>
88where
89    W: BitOps128 + BitOps0,
90    G: Copy,
91{
92}
93fwd_binop_x2!(BitAnd, bitand);
94fwd_binop_x2!(BitOr, bitor);
95fwd_binop_x2!(BitXor, bitxor);
96fwd_binop_x2!(AndNot, andnot);
97fwd_binop_assign_x2!(BitAndAssign, bitand_assign);
98fwd_binop_assign_x2!(BitOrAssign, bitor_assign);
99fwd_binop_assign_x2!(BitXorAssign, bitxor_assign);
100impl<W, G> ArithOps for x2<W, G>
101where
102    W: ArithOps,
103    G: Copy,
104{
105}
106fwd_binop_x2!(Add, add);
107fwd_binop_assign_x2!(AddAssign, add_assign);
108impl<W: Not + Copy, G> Not for x2<W, G> {
109    type Output = x2<W::Output, G>;
110    #[inline(always)]
111    fn not(self) -> Self::Output {
112        x2::new([self.0[0].not(), self.0[1].not()])
113    }
114}
115impl<W, G> UnsafeFrom<[W; 2]> for x2<W, G> {
116    #[inline(always)]
117    unsafe fn unsafe_from(xs: [W; 2]) -> Self {
118        x2::new(xs)
119    }
120}
121impl<W: Copy, G> Vec2<W> for x2<W, G> {
122    #[inline(always)]
123    fn extract(self, i: u32) -> W {
124        self.0[i as usize]
125    }
126    #[inline(always)]
127    fn insert(mut self, w: W, i: u32) -> Self {
128        self.0[i as usize] = w;
129        self
130    }
131}
132impl<W: Copy + Store<vec128_storage>, G> Store<vec256_storage> for x2<W, G> {
133    #[inline(always)]
134    unsafe fn unpack(p: vec256_storage) -> Self {
135        let p = p.split128();
136        x2::new([W::unpack(p[0]), W::unpack(p[1])])
137    }
138}
139impl<W, G> From<x2<W, G>> for vec256_storage
140where
141    W: Copy,
142    vec128_storage: From<W>,
143{
144    #[inline(always)]
145    fn from(x: x2<W, G>) -> Self {
146        vec256_storage::new128([x.0[0].into(), x.0[1].into()])
147    }
148}
149impl<W, G> Swap64 for x2<W, G>
150where
151    W: Swap64 + Copy,
152{
153    fwd_unop_x2!(swap1);
154    fwd_unop_x2!(swap2);
155    fwd_unop_x2!(swap4);
156    fwd_unop_x2!(swap8);
157    fwd_unop_x2!(swap16);
158    fwd_unop_x2!(swap32);
159    fwd_unop_x2!(swap64);
160}
161impl<W: Copy, G> MultiLane<[W; 2]> for x2<W, G> {
162    #[inline(always)]
163    fn to_lanes(self) -> [W; 2] {
164        self.0
165    }
166    #[inline(always)]
167    fn from_lanes(lanes: [W; 2]) -> Self {
168        x2::new(lanes)
169    }
170}
171impl<W: BSwap + Copy, G> BSwap for x2<W, G> {
172    #[inline(always)]
173    fn bswap(self) -> Self {
174        x2::new([self.0[0].bswap(), self.0[1].bswap()])
175    }
176}
177impl<W: StoreBytes + BSwap + Copy, G> StoreBytes for x2<W, G> {
178    #[inline(always)]
179    unsafe fn unsafe_read_le(input: &[u8]) -> Self {
180        let input = input.split_at(input.len() / 2);
181        x2::new([W::unsafe_read_le(input.0), W::unsafe_read_le(input.1)])
182    }
183    #[inline(always)]
184    unsafe fn unsafe_read_be(input: &[u8]) -> Self {
185        let input = input.split_at(input.len() / 2);
186        x2::new([W::unsafe_read_be(input.0), W::unsafe_read_be(input.1)])
187    }
188    #[inline(always)]
189    fn write_le(self, out: &mut [u8]) {
190        let out = out.split_at_mut(out.len() / 2);
191        self.0[0].write_le(out.0);
192        self.0[1].write_le(out.1);
193    }
194    #[inline(always)]
195    fn write_be(self, out: &mut [u8]) {
196        let out = out.split_at_mut(out.len() / 2);
197        self.0[0].write_be(out.0);
198        self.0[1].write_be(out.1);
199    }
200}
201impl<W: Copy + LaneWords4, G: Copy> LaneWords4 for x2<W, G> {
202    #[inline(always)]
203    fn shuffle_lane_words2301(self) -> Self {
204        Self::new([
205            self.0[0].shuffle_lane_words2301(),
206            self.0[1].shuffle_lane_words2301(),
207        ])
208    }
209    #[inline(always)]
210    fn shuffle_lane_words1230(self) -> Self {
211        Self::new([
212            self.0[0].shuffle_lane_words1230(),
213            self.0[1].shuffle_lane_words1230(),
214        ])
215    }
216    #[inline(always)]
217    fn shuffle_lane_words3012(self) -> Self {
218        Self::new([
219            self.0[0].shuffle_lane_words3012(),
220            self.0[1].shuffle_lane_words3012(),
221        ])
222    }
223}
224
225#[derive(Copy, Clone, Default, FromBytes, AsBytes, FromZeroes)]
226#[repr(transparent)]
227#[allow(non_camel_case_types)]
228pub struct x4<W>(pub [W; 4]);
229impl<W> x4<W> {
230    #[inline(always)]
231    pub fn new(xs: [W; 4]) -> Self {
232        x4(xs)
233    }
234}
235macro_rules! fwd_binop_x4 {
236    ($trait:ident, $fn:ident) => {
237        impl<W: $trait + Copy> $trait for x4<W> {
238            type Output = x4<W::Output>;
239            #[inline(always)]
240            fn $fn(self, rhs: Self) -> Self::Output {
241                x4([
242                    self.0[0].$fn(rhs.0[0]),
243                    self.0[1].$fn(rhs.0[1]),
244                    self.0[2].$fn(rhs.0[2]),
245                    self.0[3].$fn(rhs.0[3]),
246                ])
247            }
248        }
249    };
250}
251macro_rules! fwd_binop_assign_x4 {
252    ($trait:ident, $fn_assign:ident) => {
253        impl<W: $trait + Copy> $trait for x4<W> {
254            #[inline(always)]
255            fn $fn_assign(&mut self, rhs: Self) {
256                self.0[0].$fn_assign(rhs.0[0]);
257                self.0[1].$fn_assign(rhs.0[1]);
258                self.0[2].$fn_assign(rhs.0[2]);
259                self.0[3].$fn_assign(rhs.0[3]);
260            }
261        }
262    };
263}
264macro_rules! fwd_unop_x4 {
265    ($fn:ident) => {
266        #[inline(always)]
267        fn $fn(self) -> Self {
268            x4([
269                self.0[0].$fn(),
270                self.0[1].$fn(),
271                self.0[2].$fn(),
272                self.0[3].$fn(),
273            ])
274        }
275    };
276}
277impl<W> RotateEachWord32 for x4<W>
278where
279    W: Copy + RotateEachWord32,
280{
281    fwd_unop_x4!(rotate_each_word_right7);
282    fwd_unop_x4!(rotate_each_word_right8);
283    fwd_unop_x4!(rotate_each_word_right11);
284    fwd_unop_x4!(rotate_each_word_right12);
285    fwd_unop_x4!(rotate_each_word_right16);
286    fwd_unop_x4!(rotate_each_word_right20);
287    fwd_unop_x4!(rotate_each_word_right24);
288    fwd_unop_x4!(rotate_each_word_right25);
289}
290impl<W> RotateEachWord64 for x4<W>
291where
292    W: Copy + RotateEachWord64,
293{
294    fwd_unop_x4!(rotate_each_word_right32);
295}
296impl<W> RotateEachWord128 for x4<W> where W: RotateEachWord128 {}
297impl<W> BitOps0 for x4<W> where W: BitOps0 {}
298impl<W> BitOps32 for x4<W> where W: BitOps32 + BitOps0 {}
299impl<W> BitOps64 for x4<W> where W: BitOps64 + BitOps0 {}
300impl<W> BitOps128 for x4<W> where W: BitOps128 + BitOps0 {}
301fwd_binop_x4!(BitAnd, bitand);
302fwd_binop_x4!(BitOr, bitor);
303fwd_binop_x4!(BitXor, bitxor);
304fwd_binop_x4!(AndNot, andnot);
305fwd_binop_assign_x4!(BitAndAssign, bitand_assign);
306fwd_binop_assign_x4!(BitOrAssign, bitor_assign);
307fwd_binop_assign_x4!(BitXorAssign, bitxor_assign);
308impl<W> ArithOps for x4<W> where W: ArithOps {}
309fwd_binop_x4!(Add, add);
310fwd_binop_assign_x4!(AddAssign, add_assign);
311impl<W: Not + Copy> Not for x4<W> {
312    type Output = x4<W::Output>;
313    #[inline(always)]
314    fn not(self) -> Self::Output {
315        x4([
316            self.0[0].not(),
317            self.0[1].not(),
318            self.0[2].not(),
319            self.0[3].not(),
320        ])
321    }
322}
323impl<W> UnsafeFrom<[W; 4]> for x4<W> {
324    #[inline(always)]
325    unsafe fn unsafe_from(xs: [W; 4]) -> Self {
326        x4(xs)
327    }
328}
329impl<W: Copy> Vec4<W> for x4<W> {
330    #[inline(always)]
331    fn extract(self, i: u32) -> W {
332        self.0[i as usize]
333    }
334    #[inline(always)]
335    fn insert(mut self, w: W, i: u32) -> Self {
336        self.0[i as usize] = w;
337        self
338    }
339}
340impl<W: Copy> Vec4Ext<W> for x4<W> {
341    #[inline(always)]
342    fn transpose4(a: Self, b: Self, c: Self, d: Self) -> (Self, Self, Self, Self)
343    where
344        Self: Sized,
345    {
346        (
347            x4([a.0[0], b.0[0], c.0[0], d.0[0]]),
348            x4([a.0[1], b.0[1], c.0[1], d.0[1]]),
349            x4([a.0[2], b.0[2], c.0[2], d.0[2]]),
350            x4([a.0[3], b.0[3], c.0[3], d.0[3]]),
351        )
352    }
353}
354impl<W: Copy + Store<vec128_storage>> Store<vec512_storage> for x4<W> {
355    #[inline(always)]
356    unsafe fn unpack(p: vec512_storage) -> Self {
357        let p = p.split128();
358        x4([
359            W::unpack(p[0]),
360            W::unpack(p[1]),
361            W::unpack(p[2]),
362            W::unpack(p[3]),
363        ])
364    }
365}
366impl<W> From<x4<W>> for vec512_storage
367where
368    W: Copy,
369    vec128_storage: From<W>,
370{
371    #[inline(always)]
372    fn from(x: x4<W>) -> Self {
373        vec512_storage::new128([x.0[0].into(), x.0[1].into(), x.0[2].into(), x.0[3].into()])
374    }
375}
376impl<W> Swap64 for x4<W>
377where
378    W: Swap64 + Copy,
379{
380    fwd_unop_x4!(swap1);
381    fwd_unop_x4!(swap2);
382    fwd_unop_x4!(swap4);
383    fwd_unop_x4!(swap8);
384    fwd_unop_x4!(swap16);
385    fwd_unop_x4!(swap32);
386    fwd_unop_x4!(swap64);
387}
388impl<W: Copy> MultiLane<[W; 4]> for x4<W> {
389    #[inline(always)]
390    fn to_lanes(self) -> [W; 4] {
391        self.0
392    }
393    #[inline(always)]
394    fn from_lanes(lanes: [W; 4]) -> Self {
395        x4(lanes)
396    }
397}
398impl<W: BSwap + Copy> BSwap for x4<W> {
399    #[inline(always)]
400    fn bswap(self) -> Self {
401        x4([
402            self.0[0].bswap(),
403            self.0[1].bswap(),
404            self.0[2].bswap(),
405            self.0[3].bswap(),
406        ])
407    }
408}
409impl<W: StoreBytes + BSwap + Copy> StoreBytes for x4<W> {
410    #[inline(always)]
411    unsafe fn unsafe_read_le(input: &[u8]) -> Self {
412        let n = input.len() / 4;
413        x4([
414            W::unsafe_read_le(&input[..n]),
415            W::unsafe_read_le(&input[n..n * 2]),
416            W::unsafe_read_le(&input[n * 2..n * 3]),
417            W::unsafe_read_le(&input[n * 3..]),
418        ])
419    }
420    #[inline(always)]
421    unsafe fn unsafe_read_be(input: &[u8]) -> Self {
422        let n = input.len() / 4;
423        x4([
424            W::unsafe_read_be(&input[..n]),
425            W::unsafe_read_be(&input[n..n * 2]),
426            W::unsafe_read_be(&input[n * 2..n * 3]),
427            W::unsafe_read_be(&input[n * 3..]),
428        ])
429    }
430    #[inline(always)]
431    fn write_le(self, out: &mut [u8]) {
432        let n = out.len() / 4;
433        self.0[0].write_le(&mut out[..n]);
434        self.0[1].write_le(&mut out[n..n * 2]);
435        self.0[2].write_le(&mut out[n * 2..n * 3]);
436        self.0[3].write_le(&mut out[n * 3..]);
437    }
438    #[inline(always)]
439    fn write_be(self, out: &mut [u8]) {
440        let n = out.len() / 4;
441        self.0[0].write_be(&mut out[..n]);
442        self.0[1].write_be(&mut out[n..n * 2]);
443        self.0[2].write_be(&mut out[n * 2..n * 3]);
444        self.0[3].write_be(&mut out[n * 3..]);
445    }
446}
447impl<W: Copy + LaneWords4> LaneWords4 for x4<W> {
448    #[inline(always)]
449    fn shuffle_lane_words2301(self) -> Self {
450        x4([
451            self.0[0].shuffle_lane_words2301(),
452            self.0[1].shuffle_lane_words2301(),
453            self.0[2].shuffle_lane_words2301(),
454            self.0[3].shuffle_lane_words2301(),
455        ])
456    }
457    #[inline(always)]
458    fn shuffle_lane_words1230(self) -> Self {
459        x4([
460            self.0[0].shuffle_lane_words1230(),
461            self.0[1].shuffle_lane_words1230(),
462            self.0[2].shuffle_lane_words1230(),
463            self.0[3].shuffle_lane_words1230(),
464        ])
465    }
466    #[inline(always)]
467    fn shuffle_lane_words3012(self) -> Self {
468        x4([
469            self.0[0].shuffle_lane_words3012(),
470            self.0[1].shuffle_lane_words3012(),
471            self.0[2].shuffle_lane_words3012(),
472            self.0[3].shuffle_lane_words3012(),
473        ])
474    }
475}