bytecount/
lib.rs

1//! count occurrences of a given byte, or the number of UTF-8 code points, in a
2//! byte slice, fast.
3//!
4//! This crate has the [`count`](fn.count.html) method to count byte
5//! occurrences (for example newlines) in a larger `&[u8]` slice.
6//!
7//! For example:
8//!
9//! ```rust
10//! assert_eq!(5, bytecount::count(b"Hello, this is the bytecount crate!", b' '));
11//! ```
12//!
13//! Also there is a [`num_chars`](fn.num_chars.html) method to count
14//! the number of UTF8 characters in a slice. It will work the same as
15//! `str::chars().count()` for byte slices of correct UTF-8 character
16//! sequences. The result will likely be off for invalid sequences,
17//! although the result is guaranteed to be between `0` and
18//! `[_]::len()`, inclusive.
19//!
20//! Example:
21//!
22//! ```rust
23//! let sequence = "Wenn ich ein Vöglein wär, flög ich zu Dir!";
24//! assert_eq!(sequence.chars().count(),
25//!            bytecount::num_chars(sequence.as_bytes()));
26//! ```
27//!
28//! For completeness and easy comparison, the "naive" versions of both
29//! count and num_chars are provided. Those are also faster if used on
30//! predominantly small strings. The
31//! [`naive_count_32`](fn.naive_count_32.html) method can be faster
32//! still on small strings.
33
34#![cfg_attr(feature = "generic-simd", feature(portable_simd))]
35#![deny(missing_docs)]
36#![cfg_attr(not(feature = "runtime-dispatch-simd"), no_std)]
37
38#[cfg(not(feature = "runtime-dispatch-simd"))]
39use core::mem;
40#[cfg(feature = "runtime-dispatch-simd")]
41use std::mem;
42
43mod naive;
44pub use naive::*;
45mod integer_simd;
46
47#[cfg(any(
48    all(
49        feature = "runtime-dispatch-simd",
50        any(target_arch = "x86", target_arch = "x86_64")
51    ),
52    all(target_arch = "aarch64", target_endian = "little"),
53    target_arch = "wasm32",
54    feature = "generic-simd"
55))]
56mod simd;
57
58/// Count occurrences of a byte in a slice of bytes, fast
59///
60/// # Examples
61///
62/// ```
63/// let s = b"This is a Text with spaces";
64/// let number_of_spaces = bytecount::count(s, b' ');
65/// assert_eq!(number_of_spaces, 5);
66/// ```
67pub fn count(haystack: &[u8], needle: u8) -> usize {
68    if haystack.len() >= 32 {
69        #[cfg(all(feature = "runtime-dispatch-simd", target_arch = "x86_64"))]
70        {
71            if is_x86_feature_detected!("avx2") {
72                unsafe {
73                    return simd::x86_avx2::chunk_count(haystack, needle);
74                }
75            }
76        }
77
78        #[cfg(feature = "generic-simd")]
79        return simd::generic::chunk_count(haystack, needle);
80    }
81
82    if haystack.len() >= 16 {
83        #[cfg(all(
84            feature = "runtime-dispatch-simd",
85            any(target_arch = "x86", target_arch = "x86_64"),
86            not(feature = "generic-simd")
87        ))]
88        {
89            if is_x86_feature_detected!("sse2") {
90                unsafe {
91                    return simd::x86_sse2::chunk_count(haystack, needle);
92                }
93            }
94        }
95        #[cfg(all(
96            target_arch = "aarch64",
97            target_endian = "little",
98            not(feature = "generic-simd")
99        ))]
100        {
101            unsafe {
102                return simd::aarch64::chunk_count(haystack, needle);
103            }
104        }
105
106        #[cfg(target_arch = "wasm32")]
107        {
108            unsafe {
109                return simd::wasm::chunk_count(haystack, needle);
110            }
111        }
112    }
113
114    if haystack.len() >= mem::size_of::<usize>() {
115        return integer_simd::chunk_count(haystack, needle);
116    }
117
118    naive_count(haystack, needle)
119}
120
121/// Count the number of UTF-8 encoded Unicode codepoints in a slice of bytes, fast
122///
123/// This function is safe to use on any byte array, valid UTF-8 or not,
124/// but the output is only meaningful for well-formed UTF-8.
125///
126/// # Example
127///
128/// ```
129/// let swordfish = "メカジキ";
130/// let char_count = bytecount::num_chars(swordfish.as_bytes());
131/// assert_eq!(char_count, 4);
132/// ```
133pub fn num_chars(utf8_chars: &[u8]) -> usize {
134    if utf8_chars.len() >= 32 {
135        #[cfg(all(feature = "runtime-dispatch-simd", target_arch = "x86_64"))]
136        {
137            if is_x86_feature_detected!("avx2") {
138                unsafe {
139                    return simd::x86_avx2::chunk_num_chars(utf8_chars);
140                }
141            }
142        }
143
144        #[cfg(feature = "generic-simd")]
145        return simd::generic::chunk_num_chars(utf8_chars);
146    }
147
148    if utf8_chars.len() >= 16 {
149        #[cfg(all(
150            feature = "runtime-dispatch-simd",
151            any(target_arch = "x86", target_arch = "x86_64"),
152            not(feature = "generic-simd")
153        ))]
154        {
155            if is_x86_feature_detected!("sse2") {
156                unsafe {
157                    return simd::x86_sse2::chunk_num_chars(utf8_chars);
158                }
159            }
160        }
161        #[cfg(all(
162            target_arch = "aarch64",
163            target_endian = "little",
164            not(feature = "generic-simd")
165        ))]
166        {
167            unsafe {
168                return simd::aarch64::chunk_num_chars(utf8_chars);
169            }
170        }
171
172        #[cfg(target_arch = "wasm32")]
173        {
174            unsafe {
175                return simd::wasm::chunk_num_chars(utf8_chars);
176            }
177        }
178    }
179
180    if utf8_chars.len() >= mem::size_of::<usize>() {
181        return integer_simd::chunk_num_chars(utf8_chars);
182    }
183
184    naive_num_chars(utf8_chars)
185}