SDSL 3.0.1
Succinct Data Structure Library
bit_vector_il.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
9#ifndef SDSL_BIT_VECTOR_IL
10#define SDSL_BIT_VECTOR_IL
11
12#include <queue>
13
14#include <sdsl/int_vector.hpp>
15#include <sdsl/iterators.hpp>
16#include <sdsl/util.hpp>
17
19namespace sdsl
20{
21
22template <uint8_t t_b = 1, uint32_t t_bs = 512> // forward declaration needed for friend declaration
23class rank_support_il; // in bit_vector_il
24
25template <uint8_t t_b = 1, uint32_t t_bs = 512> // forward declaration needed for friend declaration
26class select_support_il; // in bit_vector_il
27
28template <class T>
29constexpr bool power_of_two(T x)
30{
31 return std::is_integral<T>::value and x > 1 and !(x & (x - 1));
32}
33
35
43template <uint32_t t_bs = 512>
45{
46 static_assert(t_bs >= 64, "bit_vector_il: blocksize must be be at least 64 bits.");
47 static_assert(power_of_two(t_bs), "bit_vector_il: blocksize must be a power of two.");
48
49 public:
56
57 friend class rank_support_il<1, t_bs>;
58 friend class rank_support_il<0, t_bs>;
59 friend class select_support_il<1, t_bs>;
60 friend class select_support_il<0, t_bs>;
61
66
67 private:
68 size_type m_size = 0;
69 size_type m_block_num = 0;
70 size_type m_superblocks = 0;
71 size_type m_block_shift = 0;
72 int_vector<64> m_data;
73 int_vector<64> m_rank_samples;
74
75 // precondition: m_rank_samples.size() <= m_superblocks
76 void init_rank_samples()
77 {
78 uint32_t blockSize_U64 = bits::hi(t_bs >> 6);
79 size_type idx = 0;
80 std::queue<size_type> lbs, rbs;
81 lbs.push(0);
82 rbs.push(m_superblocks);
83 while (!lbs.empty())
84 {
85 size_type lb = lbs.front();
86 lbs.pop();
87 size_type rb = rbs.front();
88 rbs.pop();
89 if (/*lb < rb and*/ idx < m_rank_samples.size())
90 {
91 size_type mid = lb + (rb - lb) / 2; // select mid \in [lb..rb)
92 size_type pos = (mid << blockSize_U64) + mid;
93 m_rank_samples[idx++] = m_data[pos];
94 lbs.push(lb);
95 rbs.push(mid);
96 lbs.push(mid + 1);
97 rbs.push(rb);
98 }
99 }
100 }
101
102 public:
104 bit_vector_il(const bit_vector_il &) = default;
108
110 {
111 m_size = bv.size();
112 /* calculate the number of superblocks */
113 // each block of size > 0 gets suberblock in which we store the cumulative sum up to this block
114 m_superblocks = (m_size + t_bs) / t_bs;
115 m_block_shift = bits::hi(t_bs);
116 /* allocate new data */
117 size_type blocks = (m_size + 64) / 64;
118 size_type mem = blocks + m_superblocks + 1;
119 // ^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^ ^
120 // bit vector data | cum. sum data | sum after last block
121 m_data = int_vector<64>(mem);
122 m_block_num = mem;
123
124 /* assign data and calculate super block values */
125 const uint64_t * bvp = bv.data();
126
127 size_type j = 0; // 64-bit word counter in the m_data
128 size_type cum_sum = 0;
129 size_type sample_rate = t_bs / 64;
130 for (size_type i = 0, sample_cnt = sample_rate; i < blocks; ++i, ++sample_cnt)
131 {
132 if (sample_cnt == sample_rate)
133 {
134 m_data[j] = cum_sum;
135 sample_cnt = 0;
136 j++;
137 }
138 m_data[j] = bvp[i];
139 cum_sum += bits::cnt(m_data[j]);
140 j++;
141 }
142 m_data[j] = cum_sum; /* last superblock so we can always
143 get num_ones fast */
144 if (m_block_num > 1024 * 64)
145 {
146 // we store at most m_superblocks+1 rank_samples:
147 // we do a cache efficient binary search for the select on X=1024
148 // or X=the smallest power of two smaller than m_superblock
149 m_rank_samples.resize(std::min(1024ULL, 1ULL << bits::hi(m_superblocks)));
150 }
151 init_rank_samples();
152 }
153
155
161 {
162 assert(i < m_size);
163 size_type bs = i >> m_block_shift;
164 size_type block = bs + (i >> 6) + 1;
165 return ((m_data[block] >> (i & 63)) & 1ULL);
166 }
167
169
176 uint64_t get_int(size_type idx, uint8_t len = 64) const
177 {
178 assert(idx + len - 1 < m_size);
179 size_type bs = idx >> m_block_shift;
180 size_type b_block = bs + (idx >> 6) + 1;
181 bs = (idx + len - 1) >> m_block_shift;
182 size_type e_block = bs + ((idx + len - 1) >> 6) + 1;
183 if (b_block == e_block)
184 { // spans on block
185 return (m_data[b_block] >> (idx & 63)) & bits::lo_set[len];
186 }
187 else
188 { // spans two blocks
189 uint8_t b_len = 64 - (idx & 63);
190 return (m_data[b_block] >> (idx & 63)) | (m_data[e_block] & bits::lo_set[len - b_len]) << b_len;
191 }
192 }
193
195 size_type size() const { return m_size; }
196
198 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
199 {
200 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
201 size_type written_bytes = 0;
202 written_bytes += write_member(m_size, out, child, "size");
203 written_bytes += write_member(m_block_num, out, child, "block_num");
204 written_bytes += write_member(m_superblocks, out, child, "superblocks");
205 written_bytes += write_member(m_block_shift, out, child, "block_shift");
206 written_bytes += m_data.serialize(out, child, "data");
207 written_bytes += m_rank_samples.serialize(out, child, "rank_samples");
208 structure_tree::add_size(child, written_bytes);
209 return written_bytes;
210 }
211
213 void load(std::istream & in)
214 {
215 read_member(m_size, in);
216 read_member(m_block_num, in);
217 read_member(m_superblocks, in);
218 read_member(m_block_shift, in);
219 m_data.load(in);
220 m_rank_samples.load(in);
221 }
222
223 template <typename archive_t>
224 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
225 {
226 ar(CEREAL_NVP(m_size));
227 ar(CEREAL_NVP(m_block_num));
228 ar(CEREAL_NVP(m_superblocks));
229 ar(CEREAL_NVP(m_block_shift));
230 ar(CEREAL_NVP(m_data));
231 ar(CEREAL_NVP(m_rank_samples));
232 }
233
234 template <typename archive_t>
235 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
236 {
237 ar(CEREAL_NVP(m_size));
238 ar(CEREAL_NVP(m_block_num));
239 ar(CEREAL_NVP(m_superblocks));
240 ar(CEREAL_NVP(m_block_shift));
241 ar(CEREAL_NVP(m_data));
242 ar(CEREAL_NVP(m_rank_samples));
243 }
244
245 iterator begin() const { return iterator(this, 0); }
246
247 iterator end() const { return iterator(this, size()); }
248
249 bool operator==(const bit_vector_il & v) const { return m_size == v.m_size && m_data == v.m_data; }
250
251 bool operator!=(const bit_vector_il & v) const { return !(*this == v); }
252};
253
254template <uint8_t t_b, uint32_t t_bs>
256{
257 static_assert(t_b == 1 or t_b == 0, "rank_support_il only supports bitpatterns 0 or 1.");
258
259 public:
262 enum
263 {
264 bit_pat = t_b
265 };
266 enum
267 {
268 bit_pat_len = (uint8_t)1
269 };
270
271 private:
272 const bit_vector_type * m_v;
273 size_type m_block_shift;
274 size_type m_block_mask;
275 size_type m_block_size_U64;
276
277 inline size_type rank1(size_type i) const
278 {
279 size_type SBlockNum = i >> m_block_shift;
280 size_type SBlockPos = (SBlockNum << m_block_size_U64) + SBlockNum;
281 uint64_t resp = m_v->m_data[SBlockPos];
282 const uint64_t * B = (m_v->m_data.data() + (SBlockPos + 1));
283 uint64_t rem = i & 63;
284 uint64_t bits = (i & m_block_mask) - rem;
285 while (bits)
286 {
287 resp += bits::cnt(*B++);
288 bits -= 64;
289 }
290 resp += bits::cnt(*B & bits::lo_set[rem]);
291 return resp;
292 }
293
294 inline size_type rank0(size_type i) const
295 {
296 size_type SBlockNum = i >> m_block_shift;
297 size_type SBlockPos = (SBlockNum << m_block_size_U64) + SBlockNum;
298 uint64_t resp = (SBlockNum << m_block_shift) - m_v->m_data[SBlockPos];
299 const uint64_t * B = (m_v->m_data.data() + (SBlockPos + 1));
300 uint64_t rem = i & 63;
301 uint64_t bits = (i & m_block_mask) - rem;
302 while (bits)
303 {
304 resp += bits::cnt(~(*B));
305 B++;
306 bits -= 64;
307 }
308 resp += bits::cnt((~(*B)) & bits::lo_set[rem]);
309 return resp;
310 }
311
312 public:
313 rank_support_il(const bit_vector_type * v = nullptr)
314 {
315 set_vector(v);
316 m_block_shift = bits::hi(t_bs);
317 m_block_mask = t_bs - 1;
318 m_block_size_U64 = bits::hi(t_bs >> 6);
319 }
320
323 {
324 if (t_b) return rank1(i);
325 return rank0(i);
326 }
327
328 size_type operator()(size_type i) const { return rank(i); }
329
330 size_type size() const { return m_v->size(); }
331
332 void set_vector(const bit_vector_type * v = nullptr) { m_v = v; }
333
335 {
336 if (this != &rs) { set_vector(rs.m_v); }
337 return *this;
338 }
339
340 void load(std::istream &, const bit_vector_type * v = nullptr) { set_vector(v); }
341
342 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
343 {
344 return serialize_empty_object(out, v, name, this);
345 }
346
347 template <typename archive_t>
348 void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const
349 {}
350
351 template <typename archive_t>
353 {}
354
355 bool operator==(const rank_support_il & other) const noexcept { return (*m_v == *other.m_v); }
356
357 bool operator!=(const rank_support_il & other) const noexcept { return !(*this == other); }
358};
359
360template <uint8_t t_b, uint32_t t_bs>
362{
363 static_assert(t_b == 1 or t_b == 0, "select_support_il only supports bitpatterns 0 or 1.");
364
365 public:
368 enum
369 {
370 bit_pat = t_b
371 };
372 enum
373 {
374 bit_pat_len = (uint8_t)1
375 };
376
377 private:
378 const bit_vector_type * m_v;
379 size_type m_superblocks;
380 size_type m_block_shift;
381 size_type m_block_size_U64;
382
384 size_type select1(size_type i) const
385 {
386 size_type lb = 0, rb = m_v->m_superblocks; // search interval [lb..rb)
387 size_type res = 0;
388 size_type idx = 0; // index in m_rank_samples
389 /* binary search over super blocks */
390 // invariant: lb==0 or m_data[ pos(lb-1) ] < i
391 // m_data[ pos(rb) ] >= i, initial since i < rank(size())
392 while (lb < rb)
393 {
394 size_type mid = (lb + rb) / 2; // select mid \in [lb..rb)
395#ifndef NOSELCACHE
396 if (idx < m_v->m_rank_samples.size())
397 {
398 if (m_v->m_rank_samples[idx] >= i)
399 {
400 idx = (idx << 1) + 1;
401 rb = mid;
402 }
403 else
404 {
405 idx = (idx << 1) + 2;
406 lb = mid + 1;
407 }
408 }
409 else
410 {
411#endif
412 size_type pos = (mid << m_block_size_U64) + mid;
413 // ^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^
414 // data blocks to jump superblock position
415 if (m_v->m_data[pos] >= i) { rb = mid; }
416 else
417 {
418 lb = mid + 1;
419 }
420#ifndef NOSELCACHE
421 }
422#endif
423 }
424 res = (rb - 1) << m_block_shift;
425 /* iterate in 64 bit steps */
426 const uint64_t * w = m_v->m_data.data() + ((rb - 1) << m_block_size_U64) + (rb - 1);
427 i -= *w; // subtract the cumulative sum before the superblock
428 ++w; /* step into the data */
429 size_type ones = bits::cnt(*w);
430 while (ones < i)
431 {
432 i -= ones;
433 ++w;
434 ones = bits::cnt(*w);
435 res += 64;
436 }
437 /* handle last word */
438 res += bits::sel(*w, i);
439 return res;
440 }
441
443 size_type select0(size_type i) const
444 {
445 size_type lb = 0, rb = m_v->m_superblocks; // search interval [lb..rb)
446 size_type res = 0;
447 size_type idx = 0; // index in m_rank_samples
448 /* binary search over super blocks */
449 // invariant: lb==0 or m_data[ pos(lb-1) ] < i
450 // m_data[ pos(rb) ] >= i, initial since i < rank(size())
451 while (lb < rb)
452 {
453 size_type mid = (lb + rb) / 2; // select mid \in [lb..rb)
454#ifndef NOSELCACHE
455 if (idx < m_v->m_rank_samples.size())
456 {
457 if (((mid << m_block_shift) - m_v->m_rank_samples[idx]) >= i)
458 {
459 idx = (idx << 1) + 1;
460 rb = mid;
461 }
462 else
463 {
464 idx = (idx << 1) + 2;
465 lb = mid + 1;
466 }
467 }
468 else
469 {
470#endif
471 size_type pos = (mid << m_block_size_U64) + mid;
472 // ^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^
473 // data blocks to jump superblock position
474 if (((mid << m_block_shift) - m_v->m_data[pos]) >= i) { rb = mid; }
475 else
476 {
477 lb = mid + 1;
478 }
479#ifndef NOSELCACHE
480 }
481#endif
482 }
483 res = (rb - 1) << m_block_shift;
484
485 /* iterate in 64 bit steps */
486 const uint64_t * w = m_v->m_data.data() + ((rb - 1) << m_block_size_U64) + (rb - 1);
487 i = i - (res - *w); // substract the cumulative sum before the superblock
488 ++w; /* step into the data */
489 size_type zeros = bits::cnt(~*w);
490 while (zeros < i)
491 {
492 i -= zeros;
493 ++w;
494 zeros = bits::cnt(~*w);
495 res += 64;
496 }
497 /* handle last word */
498 res += bits::sel(~*w, i);
499 return res;
500 }
501
502 public:
504 {
505 set_vector(v);
506 m_block_shift = bits::hi(t_bs);
507 m_block_size_U64 = bits::hi(t_bs >> 6);
508 }
509
512 {
513 if (t_b) return select1(i);
514 return select0(i);
515 }
516
517 size_type operator()(size_type i) const { return select(i); }
518
519 size_type size() const { return m_v->size(); }
520
521 void set_vector(const bit_vector_type * v = nullptr) { m_v = v; }
522
524 {
525 if (this != &rs) { set_vector(rs.m_v); }
526 return *this;
527 }
528
529 void load(std::istream &, const bit_vector_type * v = nullptr) { set_vector(v); }
530
531 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
532 {
533 return serialize_empty_object(out, v, name, this);
534 }
535
536 template <typename archive_t>
537 void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const
538 {}
539
540 template <typename archive_t>
542 {}
543
544 bool operator==(const select_support_il & other) const noexcept { return (*m_v == *other.m_v); }
545
546 bool operator!=(const select_support_il & other) const noexcept { return !(*this == other); }
547};
548
549} // end namespace sdsl
550#endif
#define CEREAL_NVP(X)
Definition: cereal.hpp:31
A bit vector which interleaves the original bit_vector with rank information.
bit_vector_il(bit_vector_il &&)=default
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
void load(std::istream &in)
Loads the data structure from the given istream.
bit_vector_il(const bit_vector_il &)=default
bit_vector::difference_type difference_type
bit_vector_il & operator=(const bit_vector_il &)=default
value_type operator[](size_type i) const
Accessing the i-th element of the original bit_vector.
uint64_t get_int(size_type idx, uint8_t len=64) const
Get the integer value of the binary string of length len starting at position idx.
bit_vector_il & operator=(bit_vector_il &&)=default
select_support_il< 1, t_bs > select_1_type
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
bool operator==(const bit_vector_il &v) const
bool operator!=(const bit_vector_il &v) const
size_type size() const
Returns the size of the original bit vector.
select_support_il< 0, t_bs > select_0_type
rank_support_il< 1, t_bs > rank_1_type
random_access_const_iterator< bit_vector_il > iterator
iterator begin() const
bit_vector_il(const bit_vector &bv)
bit_vector::size_type size_type
iterator end() const
rank_support_il< 0, t_bs > rank_0_type
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
int_vector_size_type size_type
Definition: int_vector.hpp:229
void load(std::istream &in)
Load the int_vector for a stream.
size_type size() const noexcept
The number of elements in the int_vector.
const uint64_t * data() const noexcept
Pointer to the raw data of the int_vector.
Definition: int_vector.hpp:566
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
void resize(const size_type size)
Resize the int_vector in terms of elements.
Definition: int_vector.hpp:521
Generic iterator for a random access container.
Definition: iterators.hpp:24
void set_vector(const bit_vector_type *v=nullptr)
bool operator==(const rank_support_il &other) const noexcept
rank_support_il(const bit_vector_type *v=nullptr)
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
void load(std::istream &, const bit_vector_type *v=nullptr)
size_type rank(size_type i) const
Returns the position of the i-th occurrence in the bit vector.
rank_support_il & operator=(const rank_support_il &rs)
size_type operator()(size_type i) const
size_type size() const
bit_vector_il< t_bs > bit_vector_type
bool operator!=(const rank_support_il &other) const noexcept
void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const
void CEREAL_LOAD_FUNCTION_NAME(archive_t &)
bit_vector::size_type size_type
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
size_type select(size_type i) const
Returns the position of the i-th occurrence in the bit vector.
bool operator!=(const select_support_il &other) const noexcept
size_type operator()(size_type i) const
void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const
bool operator==(const select_support_il &other) const noexcept
size_type size() const
select_support_il(const bit_vector_type *v=nullptr)
void CEREAL_LOAD_FUNCTION_NAME(archive_t &)
bit_vector_il< t_bs > bit_vector_type
void load(std::istream &, const bit_vector_type *v=nullptr)
bit_vector::size_type size_type
void set_vector(const bit_vector_type *v=nullptr)
select_support_il & operator=(const select_support_il &rs)
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
static void add_size(structure_tree_node *v, uint64_t value)
int_vector.hpp contains the sdsl::int_vector class.
iterators.hpp contains an generic iterator for random access containers.
Namespace for the succinct data structure library.
bits_impl<> bits
Definition: bits.hpp:957
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition: io.hpp:84
void read_member(T &t, std::istream &in)
Definition: io.hpp:111
constexpr bool power_of_two(T x)
size_t serialize_empty_object(std::ostream &, structure_tree_node *v=nullptr, std::string name="", const T *t=nullptr)
Definition: io.hpp:325
A helper class for bitwise tricks on 64 bit words.
Definition: bits.hpp:46
static constexpr uint32_t sel(uint64_t x, uint32_t i)
Calculate the position of the i-th rightmost 1 bit in the 64bit integer x.
Definition: bits.hpp:584
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition: bits.hpp:651
static constexpr uint64_t cnt(uint64_t x)
Counts the number of set bits in x.
Definition: bits.hpp:484
static constexpr uint64_t lo_set[65]
lo_set[i] is a 64-bit word with the i least significant bits set and the high bits not set.
Definition: bits.hpp:187
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.