SDSL 3.0.1
Succinct Data Structure Library
rank_support_int.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
9#ifndef INCLUDED_SDSL_RANK_SUPPORT_INT
10#define INCLUDED_SDSL_RANK_SUPPORT_INT
11
16#include <sdsl/int_vector.hpp>
17#include <sdsl/uint128_t.hpp>
18
19// TODO: benchmark the use of compiler hints for branch prediction
20#define likely(x) __builtin_expect((x), 1)
21#define unlikely(x) __builtin_expect((x), 0)
22
23namespace sdsl
24{
25
26constexpr size_t floor_log2(size_t const n)
27{
28 return (n == 1) ? 0 : 1 + floor_log2(n >> 1);
29}
30
31constexpr size_t ceil_log2(size_t const n)
32{
33 return (n == 1) ? 0 : floor_log2(n - 1) + 1;
34}
35
37template <uint8_t alphabet_size>
39{
40
41 public:
44
45 static_assert(alphabet_size > 2, "Rank support is only implemented on int_vectors with an alphabet size of > 2.");
46
47 protected:
51 template <typename uintX_t>
52 static constexpr uintX_t bm_rec(const uintX_t w, const uint8_t length, const uint8_t max_length)
53 {
54 return (length >= max_length) ? w : bm_rec(w | (w << length), length << 1, max_length);
55 }
56
57 static std::array<uint64_t, alphabet_size> generate_mask_array()
58 {
59 std::array<uint64_t, alphabet_size> masks{};
60
61 for (value_type v = 0; v < alphabet_size; ++v)
62 {
63 masks[v] = v;
64 for (uint8_t i = sigma_bits * 2; i < 64; i <<= 1) masks[v] |= masks[v] << i;
65 }
66
67 uint64_t tmp_carry = masks[1];
68 for (value_type v = 0; v < alphabet_size; ++v) masks[v] |= tmp_carry << sigma_bits;
69
70 return masks;
71 }
72
73 protected:
74 static constexpr uint8_t sigma{ alphabet_size };
75 static constexpr uint8_t sigma_bits{ ceil_log2(alphabet_size) };
76 static constexpr uint8_t bits_per_word{ (64 / sigma_bits) * sigma_bits };
77 static constexpr uint64_t even_mask{ bm_rec<uint64_t>(bits::lo_set[sigma_bits], sigma_bits * 2, 64) };
78 static constexpr uint64_t carry_select_mask{ bm_rec<uint64_t>(1ULL << sigma_bits, sigma_bits * 2, 64) };
79 static const std::array<uint64_t, alphabet_size> masks;
80
81 const int_vector<> * m_v;
82
83 public:
87 rank_support_int(const int_vector<> * v = nullptr)
88 { // Check that the actual width of the vector has same size as sigma_bits.
89 assert((v != nullptr) ? sigma_bits == v->width() : true);
90 m_v = v;
91 }
92
99 virtual ~rank_support_int() {}
100
108 virtual size_type rank(const size_type i, const value_type v) const = 0;
109
111 virtual size_type operator()(const size_type idx, const value_type v) const = 0;
112
122 virtual size_type prefix_rank(const size_type i, const value_type v) const = 0;
123
127 virtual size_type serialize(std::ostream & out, structure_tree_node * v, const std::string name) const = 0;
128
133 virtual void load(std::istream & in, const int_vector<> * v = nullptr) = 0;
134
140 virtual void set_vector(const int_vector<> * v = nullptr) = 0;
141
142 protected:
144 static constexpr uint64_t mask_prefix(value_type const v, uint64_t const w_even, uint64_t const w_odd) noexcept
145 {
146 // every bit that will be set corresponds to an element <= v
147 // because the preset bit to the left in the precomputed bitmask is not eliminated by the carry bit during the
148 // subtraction.
149 // since alphabet_size is > 2 and an element uses at least 2 bits, we can shift the odd positions by one to the
150 // left and it is guaranteed that when adding both with OR that no bits that are set will overlap.
151 return ((masks[v] - w_even) & carry_select_mask) | (((masks[v] - w_odd) & carry_select_mask) << 1);
152 }
153
155 static constexpr uint64_t set_positions_prefix(const uint64_t w, const value_type v) noexcept
156 {
157 uint64_t const w_even = even_mask & w; // retrieve even positions
158 uint64_t const w_odd = even_mask & (w >> sigma_bits); // retrieve odd positions
159 return mask_prefix(v, w_even, w_odd);
160 }
161
165 static constexpr uint64_t set_positions(const uint64_t w, const value_type v) noexcept
166 {
167 assert(v > 0);
168 // optimiyed version of set_positions(w, v) - set_positions(w, v - 1)
169 uint64_t const w_even = even_mask & w; // retrieve even positions
170 uint64_t const w_odd = even_mask & (w >> sigma_bits); // retrieve odd positions
171 uint64_t res = ((masks[v] - w_even) & ~(masks[v - 1] - w_even)) & carry_select_mask;
172 res |= (((masks[v] - w_odd) & ~(masks[v - 1] - w_odd)) & carry_select_mask) << 1;
173 return res;
174 }
175
177 template <typename... value_t>
178 static constexpr std::array<uint64_t, sizeof...(value_t)> word_prefix_rank(const uint64_t word,
179 const size_type bit_pos,
180 const value_t... values) noexcept
181 {
182 uint64_t const mask = bits::lo_set[(bit_pos % bits_per_word) + 1];
183
184 uint64_t const w_even = even_mask & word; // retrieve even positions
185 uint64_t const w_odd = even_mask & (word >> sigma_bits); // retrieve odd positions
186
187 return { (bits::cnt(mask_prefix(values, w_even, w_odd) & mask))... };
188 }
189
193 static constexpr uint32_t word_rank(const uint64_t word, const size_type bit_pos, const value_type v) noexcept
194 {
195 return bits::cnt(set_positions(word, v) & bits::lo_set[(bit_pos & 0x3F) + 1]);
196 }
197
199 static constexpr uint32_t full_word_prefix_rank(const uint64_t word, const value_type v) noexcept
200 {
201 return bits::cnt(set_positions_prefix(word, v));
202 }
203
207 static constexpr uint32_t full_word_rank(const uint64_t word, const value_type v) noexcept
208 {
209
210 return bits::cnt(set_positions(word, v));
211 }
212
214 static constexpr uint64_t extract_word(const uint64_t * data, const size_type word_position) noexcept
215 {
216 return *(data + word_position);
217 }
218};
219
220template <uint8_t alphabet_size>
221const std::array<uint64_t, alphabet_size> rank_support_int<alphabet_size>::masks = generate_mask_array();
222
223} // end namespace sdsl
224
227
228#endif // end file
A generic vector class for integers of width .
Definition: int_vector.hpp:216
The base class of classes supporting rank_queries for a sdsl::int_vector in constant time.
static constexpr uint64_t carry_select_mask
static constexpr uint64_t set_positions(const uint64_t w, const value_type v) noexcept
Count how often value v occurs in the word w.
static constexpr uint64_t set_positions_prefix(const uint64_t w, const value_type v) noexcept
Count how often value v or smaller occurs in the word w.
static constexpr uintX_t bm_rec(const uintX_t w, const uint8_t length, const uint8_t max_length)
Constructs a bit mask with the pattern w of a given length.
rank_support_int(const int_vector<> *v=nullptr)
Constructor.
static std::array< uint64_t, alphabet_size > generate_mask_array()
virtual size_type prefix_rank(const size_type i, const value_type v) const =0
Answers rank queries for the supported int_vector.
virtual void load(std::istream &in, const int_vector<> *v=nullptr)=0
Loads the rank_support_int.
rank_support_int & operator=(rank_support_int &&)=default
static constexpr uint64_t extract_word(const uint64_t *data, const size_type word_position) noexcept
Returns the word a the given word position.
rank_support_int(rank_support_int &&)=default
virtual size_type rank(const size_type i, const value_type v) const =0
Answers rank queries for the supported int_vector.
static constexpr uint8_t sigma_bits
rank_support_int(const rank_support_int &)=default
Copy constructor.
rank_support_int & operator=(const rank_support_int &)=default
static constexpr std::array< uint64_t, sizeof...(value_t)> word_prefix_rank(const uint64_t word, const size_type bit_pos, const value_t... values) noexcept
Counts the occurrences of elements smaller or equal to v in the word starting at data up to position ...
static constexpr uint8_t sigma
const int_vector * m_v
Pointer to the rank supported bit_vector.
static constexpr uint8_t bits_per_word
int_vector ::value_type value_type
static constexpr uint64_t mask_prefix(value_type const v, uint64_t const w_even, uint64_t const w_odd) noexcept
Mask the set prefix positions.
static constexpr uint32_t full_word_prefix_rank(const uint64_t word, const value_type v) noexcept
Counts the occurrences of v in the word starting at data up to position idx.
static constexpr uint64_t even_mask
static constexpr uint32_t word_rank(const uint64_t word, const size_type bit_pos, const value_type v) noexcept
Counts the occurrences of elements smaller or equal to v in the word starting at data up to position ...
static const std::array< uint64_t, alphabet_size > masks
int_vector ::size_type size_type
virtual void set_vector(const int_vector<> *v=nullptr)=0
Sets the supported int_vector to the given pointer.
static constexpr uint32_t full_word_rank(const uint64_t word, const value_type v) noexcept
Counts the occurrences of v in the word starting at data up to position idx.
virtual size_type operator()(const size_type idx, const value_type v) const =0
Alias for rank(idx, v)
virtual size_type serialize(std::ostream &out, structure_tree_node *v, const std::string name) const =0
Serializes rank_support_int.
virtual ~rank_support_int()
Destructor.
int_vector.hpp contains the sdsl::int_vector class.
Namespace for the succinct data structure library.
constexpr size_t ceil_log2(size_t const n)
constexpr size_t floor_log2(size_t const n)
rank_support_int_scan.hpp contains rank_support_int_scan that support a sdsl::int_vector with linear ...
rank_support_int_v.hpp contains rank_support_int_v.
static constexpr uint64_t cnt(uint64_t x)
Counts the number of set bits in x.
Definition: bits.hpp:484
static constexpr uint64_t lo_set[65]
lo_set[i] is a 64-bit word with the i least significant bits set and the high bits not set.
Definition: bits.hpp:187
uint128_t.hpp contains contains the definition of a 128-bit unsigned integer type.