bytecount/lib.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
//! count occurrences of a given byte, or the number of UTF-8 code points, in a
//! byte slice, fast.
//!
//! This crate has the [`count`](fn.count.html) method to count byte
//! occurrences (for example newlines) in a larger `&[u8]` slice.
//!
//! For example:
//!
//! ```rust
//! assert_eq!(5, bytecount::count(b"Hello, this is the bytecount crate!", b' '));
//! ```
//!
//! Also there is a [`num_chars`](fn.num_chars.html) method to count
//! the number of UTF8 characters in a slice. It will work the same as
//! `str::chars().count()` for byte slices of correct UTF-8 character
//! sequences. The result will likely be off for invalid sequences,
//! although the result is guaranteed to be between `0` and
//! `[_]::len()`, inclusive.
//!
//! Example:
//!
//! ```rust
//! let sequence = "Wenn ich ein Vöglein wär, flög ich zu Dir!";
//! assert_eq!(sequence.chars().count(),
//! bytecount::num_chars(sequence.as_bytes()));
//! ```
//!
//! For completeness and easy comparison, the "naive" versions of both
//! count and num_chars are provided. Those are also faster if used on
//! predominantly small strings. The
//! [`naive_count_32`](fn.naive_count_32.html) method can be faster
//! still on small strings.
#![cfg_attr(feature = "generic-simd", feature(portable_simd))]
#![deny(missing_docs)]
#![cfg_attr(not(feature = "runtime-dispatch-simd"), no_std)]
#[cfg(not(feature = "runtime-dispatch-simd"))]
use core::mem;
#[cfg(feature = "runtime-dispatch-simd")]
use std::mem;
mod naive;
pub use naive::*;
mod integer_simd;
#[cfg(any(
all(
feature = "runtime-dispatch-simd",
any(target_arch = "x86", target_arch = "x86_64")
),
target_arch = "aarch64",
target_arch = "wasm32",
feature = "generic-simd"
))]
mod simd;
/// Count occurrences of a byte in a slice of bytes, fast
///
/// # Examples
///
/// ```
/// let s = b"This is a Text with spaces";
/// let number_of_spaces = bytecount::count(s, b' ');
/// assert_eq!(number_of_spaces, 5);
/// ```
pub fn count(haystack: &[u8], needle: u8) -> usize {
if haystack.len() >= 32 {
#[cfg(all(feature = "runtime-dispatch-simd", target_arch = "x86_64"))]
{
if is_x86_feature_detected!("avx2") {
unsafe {
return simd::x86_avx2::chunk_count(haystack, needle);
}
}
}
#[cfg(feature = "generic-simd")]
return simd::generic::chunk_count(haystack, needle);
}
if haystack.len() >= 16 {
#[cfg(all(
feature = "runtime-dispatch-simd",
any(target_arch = "x86", target_arch = "x86_64"),
not(feature = "generic-simd")
))]
{
if is_x86_feature_detected!("sse2") {
unsafe {
return simd::x86_sse2::chunk_count(haystack, needle);
}
}
}
#[cfg(all(target_arch = "aarch64", not(feature = "generic_simd")))]
{
unsafe {
return simd::aarch64::chunk_count(haystack, needle);
}
}
#[cfg(target_arch = "wasm32")]
{
unsafe {
return simd::wasm::chunk_count(haystack, needle);
}
}
}
if haystack.len() >= mem::size_of::<usize>() {
return integer_simd::chunk_count(haystack, needle);
}
naive_count(haystack, needle)
}
/// Count the number of UTF-8 encoded Unicode codepoints in a slice of bytes, fast
///
/// This function is safe to use on any byte array, valid UTF-8 or not,
/// but the output is only meaningful for well-formed UTF-8.
///
/// # Example
///
/// ```
/// let swordfish = "メカジキ";
/// let char_count = bytecount::num_chars(swordfish.as_bytes());
/// assert_eq!(char_count, 4);
/// ```
pub fn num_chars(utf8_chars: &[u8]) -> usize {
if utf8_chars.len() >= 32 {
#[cfg(all(feature = "runtime-dispatch-simd", target_arch = "x86_64"))]
{
if is_x86_feature_detected!("avx2") {
unsafe {
return simd::x86_avx2::chunk_num_chars(utf8_chars);
}
}
}
#[cfg(feature = "generic-simd")]
return simd::generic::chunk_num_chars(utf8_chars);
}
if utf8_chars.len() >= 16 {
#[cfg(all(
feature = "runtime-dispatch-simd",
any(target_arch = "x86", target_arch = "x86_64"),
not(feature = "generic-simd")
))]
{
if is_x86_feature_detected!("sse2") {
unsafe {
return simd::x86_sse2::chunk_num_chars(utf8_chars);
}
}
}
#[cfg(all(target_arch = "aarch64", not(feature = "generic_simd")))]
{
unsafe {
return simd::aarch64::chunk_num_chars(utf8_chars);
}
}
#[cfg(target_arch = "wasm32")]
{
unsafe {
return simd::wasm::chunk_num_chars(utf8_chars);
}
}
}
if utf8_chars.len() >= mem::size_of::<usize>() {
return integer_simd::chunk_num_chars(utf8_chars);
}
naive_num_chars(utf8_chars)
}