34#ifndef INCLUDED_SDSL_HYB_VECTOR
35#define INCLUDED_SDSL_HYB_VECTOR
51template <u
int8_t t_b = 1, u
int32_t k_sb_rate = 16>
52class rank_support_hyb;
53template <u
int8_t t_b = 1, u
int32_t k_sb_rate = 16>
54class select_support_hyb;
65template <u
int32_t k_sblock_rate = 16>
84 static const uint32_t k_block_size;
85 static const uint32_t k_block_bytes;
86 static const uint32_t k_sblock_header_size;
87 static const uint32_t k_sblock_size;
88 static const uint32_t k_hblock_rate;
109 size_type n_blocks = (m_size + k_block_size - 1) / k_block_size;
110 size_type n_sblocks = (n_blocks + k_sblock_rate - 1) / k_sblock_rate;
111 size_type n_hblocks = (n_blocks + k_hblock_rate - 1) / k_hblock_rate;
118 for (uint32_t i = 1; i < 65536; ++i)
120 runs_lookup[i] = runs_lookup[i >> 1];
121 if (i >= 32768) --runs_lookup[i];
122 if ((i & 1) != ((i >> 1) & 1)) ++runs_lookup[i];
126 const uint64_t * bv_ptr = bv.
data();
127 for (
size_type block_id = 0; block_id < n_blocks; ++block_id)
129 size_type block_beg = block_id * k_block_size;
130 size_type block_end = block_beg + k_block_size;
135 if (block_end <= m_size)
138 const uint64_t * ptr64 = bv_ptr;
139 for (uint8_t i = 0; i < 4; ++i) ones +=
bits::cnt(*ptr64++);
143 for (uint8_t i = 0; i < 4; ++i)
146 for (uint8_t j = 0; j < 4; ++j) runs += runs_lookup[((*ptr64) >> (16 * j)) & 0xffff];
149 for (uint8_t j = 0; j < 3; ++j)
150 runs += ((((*ptr64) >> (16 * j + 15)) & 1) ^ (((*ptr64) >> (16 * j + 16)) & 1));
156 for (uint8_t i = 0; i < 3; ++i)
158 runs += ((((*ptr64) >> 63) & 1) ^ ((*(ptr64 + 1)) & 1));
167 for (
size_type i = block_beg; i < block_end; ++i)
169 uint8_t bit = (i < m_size ? bv[i] : 0);
170 if (bit == 1) ++ones;
171 if (bit != prevbit) ++runs;
177 uint32_t minority_enc_size = std::min(ones, k_block_size - ones);
178 uint32_t runs_enc_size = (uint32_t)std::max(0, (int32_t)runs - 2);
179 uint32_t best_enc_size = std::min(minority_enc_size, runs_enc_size);
180 best_enc_size = std::min(best_enc_size, k_block_bytes);
183 trunk_size += best_enc_size;
184 bv_ptr += k_block_size / 64;
188 m_sblock_header =
int_vector<8>(n_sblocks * k_sblock_header_size, 0);
199 for (
size_type block_id = 0; block_id < n_blocks; ++block_id)
201 size_type block_beg = block_id * k_block_size;
202 size_type block_end = block_beg + k_block_size;
203 size_type sblock_id = block_id / k_sblock_rate;
204 size_type hblock_id = block_id / k_hblock_rate;
207 if (!(block_id % k_hblock_rate))
209 m_hblock_header[2 * hblock_id] = trunk_ptr;
210 m_hblock_header[2 * hblock_id + 1] = tot_rank;
214 if (!(block_id % k_sblock_rate))
216 uint32_t * ptr = (uint32_t *)(((uint8_t *)m_sblock_header.
data()) + k_sblock_header_size * sblock_id);
217 *ptr++ = trunk_ptr - m_hblock_header[2 * hblock_id];
218 *ptr = tot_rank - m_hblock_header[2 * hblock_id + 1];
221 if (sblock_id && (!sblock_ones || sblock_ones == k_sblock_size))
223 ptr = (uint32_t *)(((uint8_t *)m_sblock_header.
data()) + k_sblock_header_size * (sblock_id - 1));
235 if (block_end <= m_size)
238 const uint64_t * ptr64 = bv_ptr;
239 for (uint8_t i = 0; i < 4; ++i) ones +=
bits::cnt(*ptr64++);
243 for (uint8_t i = 0; i < 4; ++i)
245 for (uint8_t j = 0; j < 4; ++j) runs += runs_lookup[((*ptr64) >> (16 * j)) & 0xffff];
246 for (uint8_t j = 0; j < 3; ++j)
247 runs += ((((*ptr64) >> (16 * j + 15)) & 1) ^ (((*ptr64) >> (16 * j + 16)) & 1));
251 for (uint8_t i = 0; i < 3; ++i)
253 runs += ((((*ptr64) >> 63) & 1) ^ ((*(ptr64 + 1)) & 1));
262 for (
size_type i = block_beg; i < block_end; ++i)
264 uint8_t bit = (i < m_size ? bv[i] : 0);
265 if (bit == 1) ++ones;
266 if (bit != prevbit) ++runs;
270 uint32_t zeros = k_block_size - ones;
273 uint16_t * header_ptr16 = (uint16_t *)(((uint8_t *)m_sblock_header.
data()) +
274 sblock_id * k_sblock_header_size + 8 +
275 (block_id % k_sblock_rate) * 2);
277 (*header_ptr16) = ones;
278 if (ones == k_block_size) (*header_ptr16) |= 0x200;
280 if (0 < ones && ones < k_block_size)
282 uint32_t minority_enc_size = std::min(ones, zeros);
283 uint32_t runs_enc_size = (uint32_t)std::max(0, (int32_t)runs - 2);
284 uint32_t best_enc_size = std::min(minority_enc_size, runs_enc_size);
286 if (k_block_bytes <= best_enc_size)
289 (*header_ptr16) |= (k_block_bytes << 10);
292 if (block_end <= m_size)
294 for (uint8_t i = 0; i < 4; ++i)
296 *((uint64_t *)(((uint8_t *)m_trunk.
data()) + trunk_ptr)) = *(bv_ptr + i);
302 for (
size_type i = block_beg; i < block_end; i += 64)
305 for (
size_type j = i; j < std::min(i + 64, block_end); ++j)
307 uint8_t bit = (j < m_size ? bv[j] : 0);
308 if (bit) w |= ((uint64_t)1 << (j - i));
310 *((uint64_t *)(((uint8_t *)m_trunk.
data()) + trunk_ptr)) = w;
317 if (runs_enc_size < minority_enc_size)
321 (*header_ptr16) |= (runs_enc_size << 10);
322 (*header_ptr16) |= (bv[block_beg] << 9);
324 if (block_end <= m_size)
328 const uint64_t * ptr64 = bv_ptr;
331 for (uint8_t i = 0; runid < runs_enc_size && i < 4; ++i)
334 if (i > 0 && (w & 1) != ((*ptr64) & 1)) m_trunk[trunk_ptr + runid++] = 64 * i - 1;
337 for (uint8_t j = 0; runid < runs_enc_size && j < 63; ++j)
339 if ((w & 1) != ((w >> 1) & 1)) m_trunk[trunk_ptr + runid++] = j + i * 64;
351 for (
size_type i = block_beg; runid < runs_enc_size; ++i)
353 uint8_t bit = (i < m_size ? bv[i] : 0);
354 if (bit != prevbit && i != block_beg)
355 m_trunk[trunk_ptr + runid++] = (i - block_beg - 1);
365 (*header_ptr16) |= (minority_enc_size << 10);
366 if (ones < zeros) (*header_ptr16) |= 0x200;
367 uint8_t keybit = (ones < zeros);
370 if (block_end <= m_size)
372 const uint64_t * ptr64 = bv_ptr;
373 for (uint8_t i = 0; i < 4; ++i)
375 uint64_t w = (*ptr64++);
376 for (uint8_t j = 0; j < 64; ++j)
378 if ((w & 1) == keybit) m_trunk[trunk_ptr++] = j + 64 * i;
385 for (
size_type i = block_beg; i < block_end; ++i)
387 uint8_t bit = (i < m_size ? bv[i] : 0);
389 if (bit == keybit) m_trunk[trunk_ptr++] = i - block_beg;
399 bv_ptr += k_block_size / 64;
410 size_type block_id = (i - 1) / k_block_size;
411 size_type sblock_id = block_id / k_sblock_rate;
412 size_type hblock_id = block_id / k_hblock_rate;
414 size_type trunk_base = m_hblock_header[2 * hblock_id];
416 uint32_t local_i = i - block_id * k_block_size;
419 const uint8_t * header_ptr8 = ((
const uint8_t *)m_sblock_header.
data()) + (sblock_id * k_sblock_header_size);
420 uint32_t * header_ptr32 = (uint32_t *)header_ptr8;
421 size_type trunk_ptr = trunk_base + ((*header_ptr32) & 0x3fffffff);
424 uint16_t * header_ptr16 = (uint16_t *)header_ptr8;
427 if ((*header_ptr32) & 0x80000000)
return (
value_type)((*(header_ptr8 + 1)) & 0x01);
430 for (
size_type j = sblock_id * k_sblock_rate; j != block_id; ++j)
432 trunk_ptr += ((*header_ptr16) >> 10);
436 const uint8_t * trunk_p = ((
const uint8_t *)m_trunk.
data()) + trunk_ptr;
438 uint32_t encoding_size = ((*header_ptr16) >> 10);
439 uint32_t ones = ((*header_ptr16) & 0x1ff);
440 uint32_t zeros = k_block_size - ones;
443 uint32_t special_bit = (((*header_ptr16) & 0x200) >> 9);
446 uint32_t first_run_length = special_bit * ones + (1 - special_bit) * zeros;
447 uint8_t inside_second_run = (first_run_length < local_i);
448 return (inside_second_run ^ special_bit);
452 if (encoding_size < k_block_bytes)
454 if (std::min(ones, zeros) == encoding_size)
458 while (tot < encoding_size && *trunk_p < local_i)
463 uint8_t last_was_majority = ((!tot) || (*(trunk_p - 1) != local_i - 1));
464 return (last_was_majority ^ special_bit);
474 while (j + 1 < encoding_size && *(trunk_p + 1) < local_i)
476 acc += *trunk_p - last;
483 uint8_t access_i = 0;
484 if (j + 1 >= encoding_size)
486 if (j < encoding_size)
488 if (local_i <= (uint32_t)(*trunk_p) + 1)
489 access_i = (((int32_t)local_i - last - 1) > 0);
492 acc += (int32_t)(*trunk_p) - last;
493 if (ones - acc <= k_block_size - local_i)
501 if ((int32_t)(ones - acc) < (int32_t)local_i - last - 1)
504 access_i = (((int32_t)local_i - last - 1) > 0);
509 if ((*trunk_p) < local_i - 1)
512 access_i = (((int32_t)local_i - last - 1) > 0);
523 while (j + 1 < encoding_size && *(trunk_p + 1) < local_i)
525 acc += *trunk_p - last;
532 uint8_t access_i = 0;
533 if (j + 1 >= encoding_size)
535 if (j < encoding_size)
537 if (local_i <= (uint32_t)(*trunk_p) + 1)
538 access_i = (((int32_t)local_i - last - 1) == 0);
541 acc += (*trunk_p) - last;
542 if (zeros - acc <= k_block_size - local_i)
550 if ((int32_t)(zeros - acc) < (int32_t)local_i - last - 1)
553 access_i = ((local_i - last - 1) == 0);
558 if ((*trunk_p) < local_i - 1)
561 access_i = (((int32_t)local_i - last - 1) == 0);
570 uint64_t * trunk_ptr64 = (uint64_t *)(((uint8_t *)m_trunk.
data()) + trunk_ptr);
572 for (bit = 0; bit + 64 <= local_i; bit += 64) trunk_ptr64++;
574 uint8_t access_i = 0;
576 access_i = (((*trunk_ptr64) >> (local_i - bit - 1)) & 1);
578 access_i = (((*(trunk_ptr64 - 1)) >> 63) & 1);
595 for (
size_t i = 0; i < len; ++i)
598 res |= (*this)[idx + len - 1 - i];
614 written_bytes +=
write_member(m_size, out, child,
"size");
615 written_bytes += m_trunk.
serialize(out, child,
"trunk");
616 written_bytes += m_sblock_header.
serialize(out, child,
"sblock_header");
617 written_bytes += m_hblock_header.
serialize(out, child,
"hblock_header");
619 return written_bytes;
627 m_sblock_header.
load(in);
628 m_hblock_header.
load(in);
631 template <
typename archive_t>
640 template <
typename archive_t>
655 return m_size == v.m_size && m_trunk == v.m_trunk && m_sblock_header == v.m_sblock_header &&
656 m_hblock_header == v.m_hblock_header;
662template <u
int32_t k_sblock_rate>
663const uint32_t hyb_vector<k_sblock_rate>::k_block_size = 256;
664template <u
int32_t k_sblock_rate>
665const uint32_t hyb_vector<k_sblock_rate>::k_block_bytes = 32;
666template <u
int32_t k_sblock_rate>
667const uint32_t hyb_vector<k_sblock_rate>::k_sblock_header_size = 8 + 2 * k_sblock_rate;
668template <u
int32_t k_sblock_rate>
669const uint32_t hyb_vector<k_sblock_rate>::k_sblock_size = 256 * k_sblock_rate;
670template <u
int32_t k_sblock_rate>
671const uint32_t hyb_vector<k_sblock_rate>::k_hblock_rate = (1U << 31) / 256;
673template <u
int8_t t_bp>
693template <u
int8_t t_b, u
int32_t k_sblock_rate>
718 assert(m_v !=
nullptr);
719 assert(i <= m_v->
size());
722 if (i <= 0)
return 0;
724 size_type block_id = (i - 1) / bit_vector_type::k_block_size;
725 size_type sblock_id = block_id / k_sblock_rate;
726 size_type hblock_id = block_id / bit_vector_type::k_hblock_rate;
728 size_type trunk_base = m_v->m_hblock_header[2 * hblock_id];
729 size_type hblock_rank = m_v->m_hblock_header[2 * hblock_id + 1];
731 uint32_t local_i = i - block_id * bit_vector_type::k_block_size;
734 const uint8_t * header_ptr8 = ((
const uint8_t *)(m_v->m_sblock_header.data())) +
735 (sblock_id * bit_vector_type::k_sblock_header_size);
736 uint32_t * header_ptr32 = (uint32_t *)header_ptr8;
737 size_type trunk_ptr = trunk_base + ((*header_ptr32) & 0x3fffffff);
738 size_type sblock_rank = *(header_ptr32 + 1);
741 uint16_t * header_ptr16 = (uint16_t *)header_ptr8;
744 if ((*header_ptr32) & 0x80000000)
747 ((*(header_ptr8 + 1)) &
749 sblock_id * bit_vector_type::k_sblock_size),
755 for (
size_type j = sblock_id * k_sblock_rate; j != block_id; ++j)
757 trunk_ptr += ((*header_ptr16) >> 10);
758 block_rank += ((*header_ptr16) & 0x1ff);
762 const uint8_t * trunk_p = ((uint8_t *)m_v->m_trunk.data()) + trunk_ptr;
764 uint32_t encoding_size = ((*header_ptr16) >> 10);
765 uint32_t ones = ((*header_ptr16) & 0x1ff);
766 uint32_t zeros = bit_vector_type::k_block_size - ones;
769 uint32_t special_bit = (((*header_ptr16) & 0x200) >> 9);
772 uint32_t first_run_length = special_bit * ones + (1 - special_bit) * zeros;
773 uint32_t local_rank = std::min(local_i, first_run_length);
776 (special_bit * local_rank +
778 special_bit) * (local_i -
784 if (encoding_size < bit_vector_type::k_block_bytes)
786 if (std::min(ones, zeros) == encoding_size)
790 while (tot < encoding_size && (*trunk_p++) < local_i) ++tot;
794 special_bit) * (local_i -
806 while (j + 1 < encoding_size && *(trunk_p + 1) < local_i)
808 acc += *trunk_p - last;
815 if (j + 1 >= encoding_size)
817 if (j < encoding_size)
819 if (*trunk_p >= local_i)
820 acc += local_i - last - 1;
823 acc += (*trunk_p) - last;
824 acc += (ones - acc) - std::min(ones - acc, bit_vector_type::k_block_size - local_i);
828 acc += std::min(ones - acc, local_i - last - 1);
831 acc += std::min((int32_t)(*trunk_p), (int32_t)local_i - 1) - last;
841 while (j + 1 < encoding_size && *(trunk_p + 1) < local_i)
843 acc += *trunk_p - last;
850 if (j + 1 >= encoding_size)
852 if (j < encoding_size)
854 if (*trunk_p >= local_i)
855 acc += local_i - last - 1;
858 acc += (*trunk_p) - last;
859 acc += (zeros - acc) - std::min(zeros - acc, bit_vector_type::k_block_size - local_i);
863 acc += std::min(zeros - acc, local_i - last - 1);
866 acc += std::min((int32_t)(*trunk_p), (int32_t)local_i - 1) - last;
874 uint64_t * trunk_ptr64 = (uint64_t *)(((uint8_t *)m_v->m_trunk.data()) + trunk_ptr);
876 for (bit = 0; bit + 64 <= local_i; bit += 64) block_rank +=
bits::cnt(*trunk_ptr64++);
877 if (bit != local_i) block_rank +=
bits::cnt((*trunk_ptr64) & (((uint64_t)1 << (local_i - bit)) - 1));
910 template <
typename archive_t>
914 template <
typename archive_t>
929template <u
int8_t t_b, u
int32_t k_sblock_rate>
954 fprintf(stderr,
"\nhyb_vector: select queries are not currently supported\n");
955 std::exit(EXIT_FAILURE);
985 template <
typename archive_t>
989 template <
typename archive_t>
A hybrid-encoded compressed bitvector representation.
hyb_vector()=default
Default constructor.
select_support_hyb< 0, k_sblock_rate > select_0_type
hyb_vector & operator=(hyb_vector &&hybrid)=default
uint64_t get_int(size_type idx, const uint8_t len=64) const
Get the integer value of the binary string of length len starting at position idx.
void load(std::istream &in)
Loads the data structure from the given istream.
hyb_vector & operator=(const hyb_vector &hybrid)=default
hyb_vector(hyb_vector &&hybrid)=default
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
hyb_vector(const hyb_vector &hybrid)=default
rank_support_hyb< 0, k_sblock_rate > rank_0_type
bool operator==(const hyb_vector &v) const
select_support_hyb< 1, k_sblock_rate > select_1_type
random_access_const_iterator< hyb_vector > iterator
size_type size() const
Returns the size of the original bitvector.
bit_vector::difference_type difference_type
bit_vector::size_type size_type
bool operator!=(const hyb_vector &v) const
hyb_vector(const bit_vector &bv)
Constructor.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
value_type operator[](size_type i) const
Accessing the i-th element of the original bitvector.
rank_support_hyb< 1, k_sblock_rate > rank_1_type
bit_vector::value_type value_type
int_vector_size_type size_type
ptrdiff_t difference_type
void load(std::istream &in)
Load the int_vector for a stream.
size_type size() const noexcept
The number of elements in the int_vector.
int_vector_trait< t_width >::value_type value_type
const uint64_t * data() const noexcept
Pointer to the raw data of the int_vector.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
Generic iterator for a random access container.
Rank_support for the hyb_vector class.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const
const size_type size() const
Return the size of the original vector.
bool operator==(const rank_support_hyb &other) const noexcept
rank_support_hyb(const bit_vector_type *v=nullptr)
Standard constructor.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &)
size_type serialize(std::ostream &, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into a stream.
bit_vector_type::size_type size_type
const size_type rank(size_type i) const
Answers rank queries.
bool operator!=(const rank_support_hyb &other) const noexcept
void set_vector(const bit_vector_type *v=nullptr)
Set the supported vector.
rank_support_hyb & operator=(const rank_support_hyb &rs)
Assignment operator.
const size_type operator()(size_type i) const
Shorthand for rank(i)
void load(std::istream &, const bit_vector_type *v=nullptr)
Load the data structure from a stream and set the supported vector.
hyb_vector< k_sblock_rate > bit_vector_type
Select support for the hyb_vector class.
bool operator!=(const select_support_hyb &other) const noexcept
bit_vector_type::size_type size_type
bool operator==(const select_support_hyb &other) const noexcept
void set_vector(const bit_vector_type *v=nullptr)
Set the supported vector.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &)
size_type serialize(std::ostream &, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into a stream.
select_support_hyb(const bit_vector_type *v=nullptr)
Standard constructor.
size_type select(size_type) const
Answers select queries.
select_support_hyb & operator=(const select_support_hyb &rs)
Assignment operator.
const size_type operator()(size_type i) const
Shorthand for select(i)
const size_type size() const
Return the size of the original vector.
void load(std::istream &, const bit_vector_type *v=nullptr)
Load the data structure from a stream and set the supported vector.
hyb_vector< k_sblock_rate > bit_vector_type
void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
static void add_size(structure_tree_node *v, uint64_t value)
int_vector.hpp contains the sdsl::int_vector class.
io.hpp contains some methods for reading/writing sdsl structures.
iterators.hpp contains an generic iterator for random access containers.
Namespace for the succinct data structure library.
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
void read_member(T &t, std::istream &in)
static constexpr uint64_t cnt(uint64_t x)
Counts the number of set bits in x.
bit_vector::size_type size_type
static size_type adapt(size_type res, size_type i)
static size_type adapt(size_type res, size_type)
bit_vector::size_type size_type
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.