SDSL 3.0.1
Succinct Data Structure Library
hyb_vector.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
4/*
5 * Copyright (c) 2014 Juha Karkkainen, Dominik Kempa and Simon J. Puglisi
6 *
7 * Permission is hereby granted, free of charge, to any person
8 * obtaining a copy of this software and associated documentation
9 * files (the "Software"), to deal in the Software without
10 * restriction, including without limitation the rights to use,
11 * copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following
14 * conditions:
15 *
16 * The above copyright notice and this permission notice shall be
17 * included in all copies or substantial portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
21 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
23 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
26 * OTHER DEALINGS IN THE SOFTWARE.
27 *
28 * Simon Gog made the following changes:
29 * - replace std::vectors by int_vectors
30 * - add support for rank0
31 * - added naive implementation of method get_int
32 * - TODO: added a naive implementation of select
33 */
34#ifndef INCLUDED_SDSL_HYB_VECTOR
35#define INCLUDED_SDSL_HYB_VECTOR
36
37#include <algorithm>
38#include <cstdlib>
39#include <iostream>
40#include <vector>
41
42#include <sdsl/int_vector.hpp>
43#include <sdsl/io.hpp>
44#include <sdsl/iterators.hpp>
45#include <sdsl/util.hpp>
46
47namespace sdsl
48{
49
50// Needed for friend declarations.
51template <uint8_t t_b = 1, uint32_t k_sb_rate = 16>
52class rank_support_hyb;
53template <uint8_t t_b = 1, uint32_t k_sb_rate = 16>
54class select_support_hyb;
55
57
65template <uint32_t k_sblock_rate = 16>
67{
68 public:
77
78 friend class rank_support_hyb<1, k_sblock_rate>;
79 friend class rank_support_hyb<0, k_sblock_rate>;
80 friend class select_support_hyb<1, k_sblock_rate>;
81 friend class select_support_hyb<0, k_sblock_rate>;
82
83 private:
84 static const uint32_t k_block_size;
85 static const uint32_t k_block_bytes;
86 static const uint32_t k_sblock_header_size;
87 static const uint32_t k_sblock_size;
88 static const uint32_t k_hblock_rate;
89
90 size_type m_size = 0; // original bitvector size
91 int_vector<8> m_trunk; // body of encoded blocks
92 int_vector<8> m_sblock_header; // sblock headers
93 int_vector<64> m_hblock_header; // hblock headers
94
95 public:
97 hyb_vector() = default;
98 hyb_vector(const hyb_vector & hybrid) = default;
99 hyb_vector(hyb_vector && hybrid) = default;
100 hyb_vector & operator=(const hyb_vector & hybrid) = default;
101 hyb_vector & operator=(hyb_vector && hybrid) = default;
102
105 {
106 m_size = bv.size();
107
108 // Compute the number of blocks.
109 size_type n_blocks = (m_size + k_block_size - 1) / k_block_size;
110 size_type n_sblocks = (n_blocks + k_sblock_rate - 1) / k_sblock_rate;
111 size_type n_hblocks = (n_blocks + k_hblock_rate - 1) / k_hblock_rate;
112
113 size_type trunk_size = 0;
114
115 // runs_lookup[i] = number of runs - 1 in the binary encoding of i.
116 int_vector<8> runs_lookup(65536, 0);
117 runs_lookup[0] = 0;
118 for (uint32_t i = 1; i < 65536; ++i)
119 {
120 runs_lookup[i] = runs_lookup[i >> 1];
121 if (i >= 32768) --runs_lookup[i];
122 if ((i & 1) != ((i >> 1) & 1)) ++runs_lookup[i];
123 }
124
125 // Compute optimal encoding for each block.
126 const uint64_t * bv_ptr = bv.data();
127 for (size_type block_id = 0; block_id < n_blocks; ++block_id)
128 {
129 size_type block_beg = block_id * k_block_size;
130 size_type block_end = block_beg + k_block_size;
131
132 uint32_t ones = 0;
133 uint32_t runs = 0;
134
135 if (block_end <= m_size)
136 {
137 // Count the number of ones, fast.
138 const uint64_t * ptr64 = bv_ptr;
139 for (uint8_t i = 0; i < 4; ++i) ones += bits::cnt(*ptr64++);
140
141 // Count the number of runs, fast.
142 ptr64 = bv_ptr;
143 for (uint8_t i = 0; i < 4; ++i)
144 {
145 // Count changes of bits inside 16-bit words of *ptr64.
146 for (uint8_t j = 0; j < 4; ++j) runs += runs_lookup[((*ptr64) >> (16 * j)) & 0xffff];
147
148 // Count changes of bits between 16-bit words of *ptr64.
149 for (uint8_t j = 0; j < 3; ++j)
150 runs += ((((*ptr64) >> (16 * j + 15)) & 1) ^ (((*ptr64) >> (16 * j + 16)) & 1));
151 ++ptr64;
152 }
153
154 // Count changes of bits between 64-bit words.
155 ptr64 = bv_ptr;
156 for (uint8_t i = 0; i < 3; ++i)
157 {
158 runs += ((((*ptr64) >> 63) & 1) ^ ((*(ptr64 + 1)) & 1));
159 ++ptr64;
160 }
161 ++runs;
162 }
163 else
164 {
165 // Count number of ones and runs, slow.
166 uint8_t prevbit = 2;
167 for (size_type i = block_beg; i < block_end; ++i)
168 {
169 uint8_t bit = (i < m_size ? bv[i] : 0);
170 if (bit == 1) ++ones;
171 if (bit != prevbit) ++runs;
172 prevbit = bit;
173 }
174 }
175
176 // Choose best encoding.
177 uint32_t minority_enc_size = std::min(ones, k_block_size - ones);
178 uint32_t runs_enc_size = (uint32_t)std::max(0, (int32_t)runs - 2);
179 uint32_t best_enc_size = std::min(minority_enc_size, runs_enc_size);
180 best_enc_size = std::min(best_enc_size, k_block_bytes);
181
182 // Update the answer.
183 trunk_size += best_enc_size;
184 bv_ptr += k_block_size / 64;
185 }
186
187 // Allocate the memory.
188 m_sblock_header = int_vector<8>(n_sblocks * k_sblock_header_size, 0);
189 m_hblock_header = int_vector<64>(n_hblocks * 2, 0);
190 m_trunk = int_vector<8>(trunk_size, 0);
191
192 // The actual encoding follows.
193 size_type tot_rank = 0; // stores current rank value
194 size_type sblock_ones = 0; // number of 1s inside superblock
195 size_type trunk_ptr = 0;
196
197 // Process blocks left to right.
198 bv_ptr = bv.data();
199 for (size_type block_id = 0; block_id < n_blocks; ++block_id)
200 {
201 size_type block_beg = block_id * k_block_size;
202 size_type block_end = block_beg + k_block_size;
203 size_type sblock_id = block_id / k_sblock_rate;
204 size_type hblock_id = block_id / k_hblock_rate;
205
206 // Update hblock header.
207 if (!(block_id % k_hblock_rate))
208 {
209 m_hblock_header[2 * hblock_id] = trunk_ptr;
210 m_hblock_header[2 * hblock_id + 1] = tot_rank;
211 }
212
213 // Update sblock header.
214 if (!(block_id % k_sblock_rate))
215 {
216 uint32_t * ptr = (uint32_t *)(((uint8_t *)m_sblock_header.data()) + k_sblock_header_size * sblock_id);
217 *ptr++ = trunk_ptr - m_hblock_header[2 * hblock_id];
218 *ptr = tot_rank - m_hblock_header[2 * hblock_id + 1];
219
220 // If the sblock is uniform, flip the bit.
221 if (sblock_id && (!sblock_ones || sblock_ones == k_sblock_size))
222 {
223 ptr = (uint32_t *)(((uint8_t *)m_sblock_header.data()) + k_sblock_header_size * (sblock_id - 1));
224 *ptr |= 0x80000000;
225 }
226
227 // Reset the number of ones in sblock.
228 sblock_ones = 0;
229 }
230
231 uint32_t ones = 0;
232 uint32_t runs = 0;
233
234 // Compute the number of 1-bits and runs inside current block.
235 if (block_end <= m_size)
236 {
237 // Count the number of ones, fast.
238 const uint64_t * ptr64 = bv_ptr;
239 for (uint8_t i = 0; i < 4; ++i) ones += bits::cnt(*ptr64++);
240
241 // Count the number of runs, fast.
242 ptr64 = bv_ptr;
243 for (uint8_t i = 0; i < 4; ++i)
244 {
245 for (uint8_t j = 0; j < 4; ++j) runs += runs_lookup[((*ptr64) >> (16 * j)) & 0xffff];
246 for (uint8_t j = 0; j < 3; ++j)
247 runs += ((((*ptr64) >> (16 * j + 15)) & 1) ^ (((*ptr64) >> (16 * j + 16)) & 1));
248 ++ptr64;
249 }
250 ptr64 = bv_ptr;
251 for (uint8_t i = 0; i < 3; ++i)
252 {
253 runs += ((((*ptr64) >> 63) & 1) ^ ((*(ptr64 + 1)) & 1));
254 ++ptr64;
255 }
256 ++runs;
257 }
258 else
259 {
260 // Count number of ones and runs, slow.
261 uint8_t prevbit = 2;
262 for (size_type i = block_beg; i < block_end; ++i)
263 {
264 uint8_t bit = (i < m_size ? bv[i] : 0);
265 if (bit == 1) ++ones;
266 if (bit != prevbit) ++runs;
267 prevbit = bit;
268 }
269 }
270 uint32_t zeros = k_block_size - ones;
271
272 // Store block popcount.
273 uint16_t * header_ptr16 = (uint16_t *)(((uint8_t *)m_sblock_header.data()) +
274 sblock_id * k_sblock_header_size + 8 +
275 (block_id % k_sblock_rate) * 2);
276
277 (*header_ptr16) = ones;
278 if (ones == k_block_size) (*header_ptr16) |= 0x200;
279
280 if (0 < ones && ones < k_block_size)
281 { // non uniform block
282 uint32_t minority_enc_size = std::min(ones, zeros);
283 uint32_t runs_enc_size = (uint32_t)std::max(0, (int32_t)runs - 2);
284 uint32_t best_enc_size = std::min(minority_enc_size, runs_enc_size);
285
286 if (k_block_bytes <= best_enc_size)
287 {
288 // Use plain encoding.
289 (*header_ptr16) |= (k_block_bytes << 10);
290
291 // Copy original 256 bits from bv into trunk.
292 if (block_end <= m_size)
293 {
294 for (uint8_t i = 0; i < 4; ++i)
295 {
296 *((uint64_t *)(((uint8_t *)m_trunk.data()) + trunk_ptr)) = *(bv_ptr + i);
297 trunk_ptr += 8;
298 }
299 }
300 else
301 {
302 for (size_type i = block_beg; i < block_end; i += 64)
303 {
304 uint64_t w = 0;
305 for (size_type j = i; j < std::min(i + 64, block_end); ++j)
306 {
307 uint8_t bit = (j < m_size ? bv[j] : 0);
308 if (bit) w |= ((uint64_t)1 << (j - i));
309 }
310 *((uint64_t *)(((uint8_t *)m_trunk.data()) + trunk_ptr)) = w;
311 trunk_ptr += 8;
312 }
313 }
314 }
315 else
316 {
317 if (runs_enc_size < minority_enc_size)
318 {
319
320 // Use runs encoding.
321 (*header_ptr16) |= (runs_enc_size << 10);
322 (*header_ptr16) |= (bv[block_beg] << 9);
323
324 if (block_end <= m_size)
325 {
326 // Find run ends, fast.
327 uint32_t runid = 0;
328 const uint64_t * ptr64 = bv_ptr;
329
330 uint64_t w = 0;
331 for (uint8_t i = 0; runid < runs_enc_size && i < 4; ++i)
332 {
333 // Check if run end aligns with the end of the 64-bit word.
334 if (i > 0 && (w & 1) != ((*ptr64) & 1)) m_trunk[trunk_ptr + runid++] = 64 * i - 1;
335
336 w = (*ptr64++);
337 for (uint8_t j = 0; runid < runs_enc_size && j < 63; ++j)
338 {
339 if ((w & 1) != ((w >> 1) & 1)) m_trunk[trunk_ptr + runid++] = j + i * 64;
340 w >>= 1;
341 }
342 }
343 trunk_ptr += runid;
344 }
345 else
346 {
347 // Find run ends, slow.
348 uint8_t prevbit = 2;
349 uint32_t runid = 0;
350
351 for (size_type i = block_beg; runid < runs_enc_size; ++i)
352 {
353 uint8_t bit = (i < m_size ? bv[i] : 0);
354 if (bit != prevbit && i != block_beg)
355 m_trunk[trunk_ptr + runid++] = (i - block_beg - 1);
356 prevbit = bit;
357 }
358 trunk_ptr += runid;
359 }
360 }
361 else
362 {
363 // Use minority encoding.
364 // Update sblock header.
365 (*header_ptr16) |= (minority_enc_size << 10);
366 if (ones < zeros) (*header_ptr16) |= 0x200;
367 uint8_t keybit = (ones < zeros);
368
369 // Find positions of 1-bits, fast.
370 if (block_end <= m_size)
371 {
372 const uint64_t * ptr64 = bv_ptr;
373 for (uint8_t i = 0; i < 4; ++i)
374 {
375 uint64_t w = (*ptr64++);
376 for (uint8_t j = 0; j < 64; ++j)
377 {
378 if ((w & 1) == keybit) m_trunk[trunk_ptr++] = j + 64 * i;
379 w >>= 1;
380 }
381 }
382 }
383 else
384 {
385 for (size_type i = block_beg; i < block_end; ++i)
386 {
387 uint8_t bit = (i < m_size ? bv[i] : 0);
388
389 if (bit == keybit) m_trunk[trunk_ptr++] = i - block_beg;
390 }
391 }
392 }
393 }
394 }
395
396 // Update global rank.
397 tot_rank += ones;
398 sblock_ones += ones;
399 bv_ptr += k_block_size / 64;
400 }
401 }
402
403 private:
405 value_type access0(size_type i) const
406 {
407 assert(i > 0);
408 assert(i <= m_size);
409
410 size_type block_id = (i - 1) / k_block_size;
411 size_type sblock_id = block_id / k_sblock_rate;
412 size_type hblock_id = block_id / k_hblock_rate;
413
414 size_type trunk_base = m_hblock_header[2 * hblock_id];
415
416 uint32_t local_i = i - block_id * k_block_size;
417
418 // Read superblock header.
419 const uint8_t * header_ptr8 = ((const uint8_t *)m_sblock_header.data()) + (sblock_id * k_sblock_header_size);
420 uint32_t * header_ptr32 = (uint32_t *)header_ptr8;
421 size_type trunk_ptr = trunk_base + ((*header_ptr32) & 0x3fffffff);
422 header_ptr8 += 8;
423
424 uint16_t * header_ptr16 = (uint16_t *)header_ptr8;
425
426 // Uniform superblock optimization.
427 if ((*header_ptr32) & 0x80000000) return (value_type)((*(header_ptr8 + 1)) & 0x01);
428
429 // Fast forward through preceding blocks in the superblock.
430 for (size_type j = sblock_id * k_sblock_rate; j != block_id; ++j)
431 {
432 trunk_ptr += ((*header_ptr16) >> 10); // Update trunk pointer.
433 ++header_ptr16;
434 }
435
436 const uint8_t * trunk_p = ((const uint8_t *)m_trunk.data()) + trunk_ptr;
437
438 uint32_t encoding_size = ((*header_ptr16) >> 10);
439 uint32_t ones = ((*header_ptr16) & 0x1ff);
440 uint32_t zeros = k_block_size - ones;
441
442 // Number of runs <= 2.
443 uint32_t special_bit = (((*header_ptr16) & 0x200) >> 9);
444 if (!encoding_size)
445 {
446 uint32_t first_run_length = special_bit * ones + (1 - special_bit) * zeros;
447 uint8_t inside_second_run = (first_run_length < local_i);
448 return (inside_second_run ^ special_bit);
449 }
450
451 // Number of runs > 2.
452 if (encoding_size < k_block_bytes)
453 {
454 if (std::min(ones, zeros) == encoding_size)
455 {
456 // Minority encoding.
457 uint32_t tot = 0;
458 while (tot < encoding_size && *trunk_p < local_i)
459 {
460 ++trunk_p;
461 ++tot;
462 }
463 uint8_t last_was_majority = ((!tot) || (*(trunk_p - 1) != local_i - 1));
464 return (last_was_majority ^ special_bit);
465 }
466
467 // Runs encoding.
468 if (special_bit)
469 {
470 uint32_t j = 0;
471 uint32_t acc = 0;
472
473 int32_t last = -1;
474 while (j + 1 < encoding_size && *(trunk_p + 1) < local_i)
475 {
476 acc += *trunk_p - last;
477 ++trunk_p;
478 last = *trunk_p;
479 ++trunk_p;
480 j += 2;
481 }
482
483 uint8_t access_i = 0;
484 if (j + 1 >= encoding_size)
485 {
486 if (j < encoding_size)
487 { // j == encoding_size - 1
488 if (local_i <= (uint32_t)(*trunk_p) + 1)
489 access_i = (((int32_t)local_i - last - 1) > 0);
490 else
491 {
492 acc += (int32_t)(*trunk_p) - last;
493 if (ones - acc <= k_block_size - local_i)
494 access_i = 0;
495 else
496 access_i = 1;
497 }
498 }
499 else
500 { // j == encoding_size
501 if ((int32_t)(ones - acc) < (int32_t)local_i - last - 1)
502 access_i = 0;
503 else
504 access_i = (((int32_t)local_i - last - 1) > 0);
505 }
506 }
507 else
508 {
509 if ((*trunk_p) < local_i - 1)
510 access_i = 0;
511 else
512 access_i = (((int32_t)local_i - last - 1) > 0);
513 }
514
515 return access_i;
516 }
517 else
518 {
519 uint32_t j = 0;
520 uint32_t acc = 0;
521 int32_t last = -1;
522
523 while (j + 1 < encoding_size && *(trunk_p + 1) < local_i)
524 {
525 acc += *trunk_p - last;
526 ++trunk_p;
527 last = *trunk_p;
528 ++trunk_p;
529 j += 2;
530 }
531
532 uint8_t access_i = 0;
533 if (j + 1 >= encoding_size)
534 {
535 if (j < encoding_size)
536 {
537 if (local_i <= (uint32_t)(*trunk_p) + 1)
538 access_i = (((int32_t)local_i - last - 1) == 0);
539 else
540 {
541 acc += (*trunk_p) - last;
542 if (zeros - acc <= k_block_size - local_i)
543 access_i = 1;
544 else
545 access_i = 0;
546 }
547 }
548 else
549 {
550 if ((int32_t)(zeros - acc) < (int32_t)local_i - last - 1)
551 access_i = 1;
552 else
553 access_i = ((local_i - last - 1) == 0);
554 }
555 }
556 else
557 {
558 if ((*trunk_p) < local_i - 1)
559 access_i = 1;
560 else
561 access_i = (((int32_t)local_i - last - 1) == 0);
562 }
563
564 return access_i;
565 }
566 }
567 else
568 {
569 // plain encoding.
570 uint64_t * trunk_ptr64 = (uint64_t *)(((uint8_t *)m_trunk.data()) + trunk_ptr);
571 uint32_t bit;
572 for (bit = 0; bit + 64 <= local_i; bit += 64) trunk_ptr64++;
573
574 uint8_t access_i = 0;
575 if (bit != local_i)
576 access_i = (((*trunk_ptr64) >> (local_i - bit - 1)) & 1);
577 else
578 access_i = (((*(trunk_ptr64 - 1)) >> 63) & 1);
579 return access_i;
580 }
581 }
582
583 public:
585
592 uint64_t get_int(size_type idx, const uint8_t len = 64) const
593 {
594 uint64_t res = 0;
595 for (size_t i = 0; i < len; ++i)
596 {
597 res <<= 1;
598 res |= (*this)[idx + len - 1 - i];
599 }
600 return res;
601 }
602
604 value_type operator[](size_type i) const { return access0(i + 1); }
605
607 size_type size() const { return m_size; }
608
610 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
611 {
612 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
613 size_type written_bytes = 0;
614 written_bytes += write_member(m_size, out, child, "size");
615 written_bytes += m_trunk.serialize(out, child, "trunk");
616 written_bytes += m_sblock_header.serialize(out, child, "sblock_header");
617 written_bytes += m_hblock_header.serialize(out, child, "hblock_header");
618 structure_tree::add_size(child, written_bytes);
619 return written_bytes;
620 }
621
623 void load(std::istream & in)
624 {
625 read_member(m_size, in);
626 m_trunk.load(in);
627 m_sblock_header.load(in);
628 m_hblock_header.load(in);
629 }
630
631 template <typename archive_t>
632 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
633 {
634 ar(CEREAL_NVP(m_size));
635 ar(CEREAL_NVP(m_trunk));
636 ar(CEREAL_NVP(m_sblock_header));
637 ar(CEREAL_NVP(m_hblock_header));
638 }
639
640 template <typename archive_t>
641 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
642 {
643 ar(CEREAL_NVP(m_size));
644 ar(CEREAL_NVP(m_trunk));
645 ar(CEREAL_NVP(m_sblock_header));
646 ar(CEREAL_NVP(m_hblock_header));
647 }
648
649 iterator begin() const { return iterator(this, 0); }
650
651 iterator end() const { return iterator(this, size()); }
652
653 bool operator==(const hyb_vector & v) const
654 {
655 return m_size == v.m_size && m_trunk == v.m_trunk && m_sblock_header == v.m_sblock_header &&
656 m_hblock_header == v.m_hblock_header;
657 }
658
659 bool operator!=(const hyb_vector & v) const { return !(*this == v); }
660};
661
662template <uint32_t k_sblock_rate>
663const uint32_t hyb_vector<k_sblock_rate>::k_block_size = 256;
664template <uint32_t k_sblock_rate>
665const uint32_t hyb_vector<k_sblock_rate>::k_block_bytes = 32;
666template <uint32_t k_sblock_rate>
667const uint32_t hyb_vector<k_sblock_rate>::k_sblock_header_size = 8 + 2 * k_sblock_rate;
668template <uint32_t k_sblock_rate>
669const uint32_t hyb_vector<k_sblock_rate>::k_sblock_size = 256 * k_sblock_rate;
670template <uint32_t k_sblock_rate>
671const uint32_t hyb_vector<k_sblock_rate>::k_hblock_rate = (1U << 31) / 256;
672
673template <uint8_t t_bp>
675{
677 static size_type adapt(size_type res, size_type) { return res; }
678};
679
680template <>
681struct rank_result<0>
682{
684 static size_type adapt(size_type res, size_type i) { return i - res; }
685};
686
688
692// TODO:
693template <uint8_t t_b, uint32_t k_sblock_rate>
695{
696 public:
699 enum
700 {
701 bit_pat = t_b
702 };
703 enum
704 {
705 bit_pat_len = (uint8_t)1
706 };
707
708 private:
709 const bit_vector_type * m_v;
710
711 public:
713 explicit rank_support_hyb(const bit_vector_type * v = nullptr) { set_vector(v); }
714
716 const size_type rank(size_type i) const
717 {
718 assert(m_v != nullptr);
719 assert(i <= m_v->size());
720
721 // Handle easy case.
722 if (i <= 0) return 0;
723
724 size_type block_id = (i - 1) / bit_vector_type::k_block_size;
725 size_type sblock_id = block_id / k_sblock_rate;
726 size_type hblock_id = block_id / bit_vector_type::k_hblock_rate;
727
728 size_type trunk_base = m_v->m_hblock_header[2 * hblock_id];
729 size_type hblock_rank = m_v->m_hblock_header[2 * hblock_id + 1];
730
731 uint32_t local_i = i - block_id * bit_vector_type::k_block_size;
732
733 // Read superblock header.
734 const uint8_t * header_ptr8 = ((const uint8_t *)(m_v->m_sblock_header.data())) +
735 (sblock_id * bit_vector_type::k_sblock_header_size);
736 uint32_t * header_ptr32 = (uint32_t *)header_ptr8;
737 size_type trunk_ptr = trunk_base + ((*header_ptr32) & 0x3fffffff);
738 size_type sblock_rank = *(header_ptr32 + 1);
739 header_ptr8 += 8;
740
741 uint16_t * header_ptr16 = (uint16_t *)header_ptr8;
742
743 // Uniform superblock optimization.
744 if ((*header_ptr32) & 0x80000000)
745 {
746 return rank_result<t_b>::adapt(hblock_rank + sblock_rank +
747 ((*(header_ptr8 + 1)) &
748 0x01) * (i -
749 sblock_id * bit_vector_type::k_sblock_size),
750 i);
751 }
752
753 // Fast forward through preceding blocks in the superblock.
754 size_type block_rank = 0;
755 for (size_type j = sblock_id * k_sblock_rate; j != block_id; ++j)
756 {
757 trunk_ptr += ((*header_ptr16) >> 10); // Update trunk pointer.
758 block_rank += ((*header_ptr16) & 0x1ff); // Add 1s in the block.
759 ++header_ptr16;
760 }
761
762 const uint8_t * trunk_p = ((uint8_t *)m_v->m_trunk.data()) + trunk_ptr;
763
764 uint32_t encoding_size = ((*header_ptr16) >> 10);
765 uint32_t ones = ((*header_ptr16) & 0x1ff);
766 uint32_t zeros = bit_vector_type::k_block_size - ones;
767
768 // Number of runs <= 2.
769 uint32_t special_bit = (((*header_ptr16) & 0x200) >> 9);
770 if (!encoding_size)
771 {
772 uint32_t first_run_length = special_bit * ones + (1 - special_bit) * zeros;
773 uint32_t local_rank = std::min(local_i, first_run_length);
774
775 return rank_result<t_b>::adapt(hblock_rank + sblock_rank + block_rank +
776 (special_bit * local_rank +
777 (1 -
778 special_bit) * (local_i -
779 local_rank)),
780 i);
781 }
782
783 // Number of runs > 2.
784 if (encoding_size < bit_vector_type::k_block_bytes)
785 {
786 if (std::min(ones, zeros) == encoding_size)
787 {
788 // Minority encoding.
789 uint32_t tot = 0;
790 while (tot < encoding_size && (*trunk_p++) < local_i) ++tot;
791
792 return rank_result<t_b>::adapt(hblock_rank + sblock_rank + block_rank + special_bit * tot +
793 (1 -
794 special_bit) * (local_i -
795 tot),
796 i);
797 }
798
799 // Runs encoding.
800 if (special_bit)
801 {
802 uint32_t j = 0;
803 uint32_t acc = 0;
804
805 int32_t last = -1;
806 while (j + 1 < encoding_size && *(trunk_p + 1) < local_i)
807 {
808 acc += *trunk_p - last;
809 ++trunk_p;
810 last = *trunk_p;
811 ++trunk_p;
812 j += 2;
813 }
814
815 if (j + 1 >= encoding_size)
816 {
817 if (j < encoding_size)
818 {
819 if (*trunk_p >= local_i)
820 acc += local_i - last - 1;
821 else
822 {
823 acc += (*trunk_p) - last;
824 acc += (ones - acc) - std::min(ones - acc, bit_vector_type::k_block_size - local_i);
825 }
826 }
827 else
828 acc += std::min(ones - acc, local_i - last - 1);
829 }
830 else
831 acc += std::min((int32_t)(*trunk_p), (int32_t)local_i - 1) - last;
832
833 return rank_result<t_b>::adapt(hblock_rank + sblock_rank + block_rank + acc, i);
834 }
835 else
836 {
837 uint32_t j = 0;
838 uint32_t acc = 0;
839 int32_t last = -1;
840
841 while (j + 1 < encoding_size && *(trunk_p + 1) < local_i)
842 {
843 acc += *trunk_p - last;
844 ++trunk_p;
845 last = *trunk_p;
846 ++trunk_p;
847 j += 2;
848 }
849
850 if (j + 1 >= encoding_size)
851 {
852 if (j < encoding_size)
853 {
854 if (*trunk_p >= local_i)
855 acc += local_i - last - 1;
856 else
857 {
858 acc += (*trunk_p) - last;
859 acc += (zeros - acc) - std::min(zeros - acc, bit_vector_type::k_block_size - local_i);
860 }
861 }
862 else
863 acc += std::min(zeros - acc, local_i - last - 1);
864 }
865 else
866 acc += std::min((int32_t)(*trunk_p), (int32_t)local_i - 1) - last;
867
868 return rank_result<t_b>::adapt(hblock_rank + sblock_rank + block_rank + (local_i - acc), i);
869 }
870 }
871 else
872 {
873 // plain encoding.
874 uint64_t * trunk_ptr64 = (uint64_t *)(((uint8_t *)m_v->m_trunk.data()) + trunk_ptr);
875 uint32_t bit;
876 for (bit = 0; bit + 64 <= local_i; bit += 64) block_rank += bits::cnt(*trunk_ptr64++);
877 if (bit != local_i) block_rank += bits::cnt((*trunk_ptr64) & (((uint64_t)1 << (local_i - bit)) - 1));
878
879 return rank_result<t_b>::adapt(hblock_rank + sblock_rank + block_rank, i);
880 }
881 }
882
884 const size_type operator()(size_type i) const { return rank(i); }
885
887 const size_type size() const { return m_v->size(); }
888
890 void set_vector(const bit_vector_type * v = nullptr) { m_v = v; }
891
894 {
895 if (this != &rs) { set_vector(rs.m_v); }
896 return *this;
897 }
898
900 void load(std::istream &, const bit_vector_type * v = nullptr) { set_vector(v); }
901
903 size_type serialize(std::ostream &, structure_tree_node * v = nullptr, std::string name = "") const
904 {
905 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
906 structure_tree::add_size(child, 0);
907 return 0;
908 }
909
910 template <typename archive_t>
911 void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const
912 {}
913
914 template <typename archive_t>
916 {}
917
918 bool operator==(const rank_support_hyb & other) const noexcept { return *m_v == *other.m_v; }
919
920 bool operator!=(const rank_support_hyb & other) const noexcept { return !(*this == other); }
921};
922
924
929template <uint8_t t_b, uint32_t k_sblock_rate>
931{
932 public:
935 enum
936 {
937 bit_pat = t_b
938 };
939 enum
940 {
941 bit_pat_len = (uint8_t)1
942 };
943
944 private:
945 const bit_vector_type * m_v;
946
947 public:
949 explicit select_support_hyb(const bit_vector_type * v = nullptr) { set_vector(v); }
950
953 {
954 fprintf(stderr, "\nhyb_vector: select queries are not currently supported\n");
955 std::exit(EXIT_FAILURE);
956 }
957
959 const size_type operator()(size_type i) const { return select(i); }
960
962 const size_type size() const { return m_v->size(); }
963
965 void set_vector(const bit_vector_type * v = nullptr) { m_v = v; }
966
969 {
970 if (this != &rs) { set_vector(rs.m_v); }
971 return *this;
972 }
973
975 void load(std::istream &, const bit_vector_type * v = nullptr) { set_vector(v); }
976
978 size_type serialize(std::ostream &, structure_tree_node * v = nullptr, std::string name = "") const
979 {
980 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
981 structure_tree::add_size(child, 0);
982 return 0;
983 }
984
985 template <typename archive_t>
986 void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const
987 {}
988
989 template <typename archive_t>
991 {}
992
993 bool operator==(const select_support_hyb & other) const noexcept { return *m_v == *other.m_v; }
994
995 bool operator!=(const select_support_hyb & other) const noexcept { return !(*this == other); }
996};
997
998} // end namespace sdsl
999
1000#endif // INCLUDED_SDSL_HYB_VECTOR
#define CEREAL_NVP(X)
Definition: cereal.hpp:31
A hybrid-encoded compressed bitvector representation.
Definition: hyb_vector.hpp:67
iterator end() const
Definition: hyb_vector.hpp:651
hyb_vector()=default
Default constructor.
select_support_hyb< 0, k_sblock_rate > select_0_type
Definition: hyb_vector.hpp:76
hyb_vector & operator=(hyb_vector &&hybrid)=default
uint64_t get_int(size_type idx, const uint8_t len=64) const
Get the integer value of the binary string of length len starting at position idx.
Definition: hyb_vector.hpp:592
void load(std::istream &in)
Loads the data structure from the given istream.
Definition: hyb_vector.hpp:623
hyb_vector & operator=(const hyb_vector &hybrid)=default
hyb_vector(hyb_vector &&hybrid)=default
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Definition: hyb_vector.hpp:641
iterator begin() const
Definition: hyb_vector.hpp:649
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
Definition: hyb_vector.hpp:610
hyb_vector(const hyb_vector &hybrid)=default
rank_support_hyb< 0, k_sblock_rate > rank_0_type
Definition: hyb_vector.hpp:74
bool operator==(const hyb_vector &v) const
Definition: hyb_vector.hpp:653
select_support_hyb< 1, k_sblock_rate > select_1_type
Definition: hyb_vector.hpp:75
random_access_const_iterator< hyb_vector > iterator
Definition: hyb_vector.hpp:72
size_type size() const
Returns the size of the original bitvector.
Definition: hyb_vector.hpp:607
bit_vector::difference_type difference_type
Definition: hyb_vector.hpp:71
bit_vector::size_type size_type
Definition: hyb_vector.hpp:69
bool operator!=(const hyb_vector &v) const
Definition: hyb_vector.hpp:659
hyb_vector(const bit_vector &bv)
Constructor.
Definition: hyb_vector.hpp:104
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Definition: hyb_vector.hpp:632
value_type operator[](size_type i) const
Accessing the i-th element of the original bitvector.
Definition: hyb_vector.hpp:604
rank_support_hyb< 1, k_sblock_rate > rank_1_type
Definition: hyb_vector.hpp:73
bit_vector::value_type value_type
Definition: hyb_vector.hpp:70
int_vector_size_type size_type
Definition: int_vector.hpp:229
void load(std::istream &in)
Load the int_vector for a stream.
size_type size() const noexcept
The number of elements in the int_vector.
int_vector_trait< t_width >::value_type value_type
Definition: int_vector.hpp:221
const uint64_t * data() const noexcept
Pointer to the raw data of the int_vector.
Definition: int_vector.hpp:566
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
Generic iterator for a random access container.
Definition: iterators.hpp:24
Rank_support for the hyb_vector class.
Definition: hyb_vector.hpp:695
void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const
Definition: hyb_vector.hpp:911
const size_type size() const
Return the size of the original vector.
Definition: hyb_vector.hpp:887
bool operator==(const rank_support_hyb &other) const noexcept
Definition: hyb_vector.hpp:918
rank_support_hyb(const bit_vector_type *v=nullptr)
Standard constructor.
Definition: hyb_vector.hpp:713
void CEREAL_LOAD_FUNCTION_NAME(archive_t &)
Definition: hyb_vector.hpp:915
size_type serialize(std::ostream &, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into a stream.
Definition: hyb_vector.hpp:903
bit_vector_type::size_type size_type
Definition: hyb_vector.hpp:698
const size_type rank(size_type i) const
Answers rank queries.
Definition: hyb_vector.hpp:716
bool operator!=(const rank_support_hyb &other) const noexcept
Definition: hyb_vector.hpp:920
void set_vector(const bit_vector_type *v=nullptr)
Set the supported vector.
Definition: hyb_vector.hpp:890
rank_support_hyb & operator=(const rank_support_hyb &rs)
Assignment operator.
Definition: hyb_vector.hpp:893
const size_type operator()(size_type i) const
Shorthand for rank(i)
Definition: hyb_vector.hpp:884
void load(std::istream &, const bit_vector_type *v=nullptr)
Load the data structure from a stream and set the supported vector.
Definition: hyb_vector.hpp:900
hyb_vector< k_sblock_rate > bit_vector_type
Definition: hyb_vector.hpp:697
Select support for the hyb_vector class.
Definition: hyb_vector.hpp:931
bool operator!=(const select_support_hyb &other) const noexcept
Definition: hyb_vector.hpp:995
bit_vector_type::size_type size_type
Definition: hyb_vector.hpp:934
bool operator==(const select_support_hyb &other) const noexcept
Definition: hyb_vector.hpp:993
void set_vector(const bit_vector_type *v=nullptr)
Set the supported vector.
Definition: hyb_vector.hpp:965
void CEREAL_LOAD_FUNCTION_NAME(archive_t &)
Definition: hyb_vector.hpp:990
size_type serialize(std::ostream &, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into a stream.
Definition: hyb_vector.hpp:978
select_support_hyb(const bit_vector_type *v=nullptr)
Standard constructor.
Definition: hyb_vector.hpp:949
size_type select(size_type) const
Answers select queries.
Definition: hyb_vector.hpp:952
select_support_hyb & operator=(const select_support_hyb &rs)
Assignment operator.
Definition: hyb_vector.hpp:968
const size_type operator()(size_type i) const
Shorthand for select(i)
Definition: hyb_vector.hpp:959
const size_type size() const
Return the size of the original vector.
Definition: hyb_vector.hpp:962
void load(std::istream &, const bit_vector_type *v=nullptr)
Load the data structure from a stream and set the supported vector.
Definition: hyb_vector.hpp:975
hyb_vector< k_sblock_rate > bit_vector_type
Definition: hyb_vector.hpp:933
void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const
Definition: hyb_vector.hpp:986
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
static void add_size(structure_tree_node *v, uint64_t value)
int_vector.hpp contains the sdsl::int_vector class.
io.hpp contains some methods for reading/writing sdsl structures.
iterators.hpp contains an generic iterator for random access containers.
Namespace for the succinct data structure library.
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition: io.hpp:84
void read_member(T &t, std::istream &in)
Definition: io.hpp:111
static constexpr uint64_t cnt(uint64_t x)
Counts the number of set bits in x.
Definition: bits.hpp:484
bit_vector::size_type size_type
Definition: hyb_vector.hpp:683
static size_type adapt(size_type res, size_type i)
Definition: hyb_vector.hpp:684
static size_type adapt(size_type res, size_type)
Definition: hyb_vector.hpp:677
bit_vector::size_type size_type
Definition: hyb_vector.hpp:676
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.