1#[inline(always)]
4pub(super) const fn bitset_search<
5 const N: usize,
6 const CHUNK_SIZE: usize,
7 const N1: usize,
8 const CANONICAL: usize,
9 const CANONICALIZED: usize,
10>(
11 needle: u32,
12 chunk_idx_map: &[u8; N],
13 bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1],
14 bitset_canonical: &[u64; CANONICAL],
15 bitset_canonicalized: &[(u8, u8); CANONICALIZED],
16) -> bool {
17 let bucket_idx = (needle / 64) as usize;
18 let chunk_map_idx = bucket_idx / CHUNK_SIZE;
19 let chunk_piece = bucket_idx % CHUNK_SIZE;
20 let chunk_idx = if chunk_map_idx < chunk_idx_map.len() {
22 chunk_idx_map[chunk_map_idx]
23 } else {
24 return false;
25 };
26 let idx = bitset_chunk_idx[chunk_idx as usize][chunk_piece] as usize;
27 let word = if idx < bitset_canonical.len() {
29 bitset_canonical[idx]
30 } else {
31 let (real_idx, mapping) = bitset_canonicalized[idx - bitset_canonical.len()];
32 let mut word = bitset_canonical[real_idx as usize];
33 let should_invert = mapping & (1 << 6) != 0;
34 if should_invert {
35 word = !word;
36 }
37 let quantity = mapping & ((1 << 6) - 1);
39 if mapping & (1 << 7) != 0 {
40 word >>= quantity as u64;
42 } else {
43 word = word.rotate_left(quantity as u32);
44 }
45 word
46 };
47 (word & (1 << (needle % 64) as u64)) != 0
48}
49
50#[repr(transparent)]
51pub(super) struct ShortOffsetRunHeader(pub(super) u32);
52
53impl ShortOffsetRunHeader {
54 pub(super) const fn new(start_index: usize, prefix_sum: u32) -> Self {
55 assert!(start_index < (1 << 11));
56 assert!(prefix_sum < (1 << 21));
57
58 Self((start_index as u32) << 21 | prefix_sum)
59 }
60
61 #[inline]
62 pub(super) const fn start_index(&self) -> usize {
63 (self.0 >> 21) as usize
64 }
65
66 #[inline]
67 pub(super) const fn prefix_sum(&self) -> u32 {
68 self.0 & ((1 << 21) - 1)
69 }
70}
71
72#[inline(always)]
77pub(super) unsafe fn skip_search<const SOR: usize, const OFFSETS: usize>(
78 needle: char,
79 short_offset_runs: &[ShortOffsetRunHeader; SOR],
80 offsets: &[u8; OFFSETS],
81) -> bool {
82 let needle = needle as u32;
83
84 let last_idx =
85 match short_offset_runs.binary_search_by_key(&(needle << 11), |header| header.0 << 11) {
86 Ok(idx) => idx + 1,
87 Err(idx) => idx,
88 };
89 unsafe { crate::intrinsics::assume(last_idx < SOR) };
101
102 let mut offset_idx = short_offset_runs[last_idx].start_index();
103 let length = if let Some(next) = short_offset_runs.get(last_idx + 1) {
104 (*next).start_index() - offset_idx
105 } else {
106 offsets.len() - offset_idx
107 };
108
109 let prev =
110 last_idx.checked_sub(1).map(|prev| short_offset_runs[prev].prefix_sum()).unwrap_or(0);
111
112 let total = needle - prev;
113 let mut prefix_sum = 0;
114 for _ in 0..(length - 1) {
115 unsafe { crate::intrinsics::assume(offset_idx < OFFSETS) };
122 let offset = offsets[offset_idx];
123 prefix_sum += offset as u32;
124 if prefix_sum > total {
125 break;
126 }
127 offset_idx += 1;
128 }
129 offset_idx % 2 == 1
130}
131
132#[inline(always)]
138pub(super) unsafe fn case_conversion(
139 c: char,
140 ascii_fn: fn(char) -> char,
141 table: &[(char, u32)],
142 multi: &[[char; 3]],
143) -> [char; 3] {
144 const INDEX_MASK: u32 = 1 << 22;
145
146 if c.is_ascii() {
147 return [ascii_fn(c), '\0', '\0'];
148 }
149
150 let Ok(i) = table.binary_search_by(|&(key, _)| key.cmp(&c)) else {
151 return [c, '\0', '\0'];
152 };
153
154 let u = table[i].1;
155 match char::from_u32(u) {
156 Option::Some(c) => [c, '\0', '\0'],
157 Option::None => {
158 unsafe { *multi.get_unchecked((u & (INDEX_MASK - 1)) as usize) }
160 }
161 }
162}