SDSL 3.0.1
Succinct Data Structure Library
enc_vector.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
8#ifndef SDSL_ENC_VECTOR
9#define SDSL_ENC_VECTOR
10
11#include <sdsl/coder.hpp>
12#include <sdsl/int_vector.hpp>
13#include <sdsl/iterators.hpp>
14
16namespace sdsl
17{
18
19template <uint8_t t_width>
21{
23};
24
25template <>
27{
29};
30
31template <>
33{
35};
36
38
48template <class t_coder = coder::elias_delta<>, uint32_t t_dens = 128, uint8_t t_width = 0>
50{
51 private:
52 static_assert(t_dens > 1, "enc_vector: sample density must be larger than `1`");
53
54 public:
55 typedef uint64_t value_type;
58 typedef const value_type reference;
60 typedef const value_type * const_pointer;
61 typedef ptrdiff_t difference_type;
63 typedef t_coder coder;
66 static const uint32_t sample_dens = t_dens;
68
69 int_vector<0> m_z; // storage for encoded deltas
70 private:
71 int_vector_type m_sample_vals_and_pointer; // samples and pointers
72 size_type m_size = 0; // number of vector elements
73
74 void clear()
75 {
76 m_z.resize(0);
78 m_size = 0;
79 m_sample_vals_and_pointer.resize(0);
80 m_sample_vals_and_pointer.shrink_to_fit();
81 }
82
83 public:
84 enc_vector() = default;
85 enc_vector(const enc_vector &) = default;
86 enc_vector(enc_vector &&) = default;
87 enc_vector & operator=(const enc_vector &) = default;
89
91
93 template <class Container>
94 enc_vector(const Container & c);
95
97 /*
98 * \param v_buf A int_vector_buf.
99 */
100 template <uint8_t int_width>
102
105
107 size_type size() const { return m_size; }
108
110 static size_type max_size() { return int_vector<>::max_size() / 2; }
111
113 bool empty() const { return 0 == m_size; }
114
116 const const_iterator begin() const { return const_iterator(this, 0); }
117
119 const const_iterator end() const { return const_iterator(this, this->m_size); }
120
121 bool operator==(const enc_vector & v) const
122 {
123 return m_size && v.m_size && m_z == v.m_z && m_sample_vals_and_pointer == v.m_sample_vals_and_pointer;
124 }
125
126 bool operator!=(const enc_vector & v) const { return !(*this == v); }
127
129
132
134
137 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const;
138
140 void load(std::istream & in);
141
142 template <typename archive_t>
143 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const;
144
145 template <typename archive_t>
146 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar);
147
149
152 value_type sample(const size_type i) const;
153
154 uint32_t get_sample_dens() const { return t_dens; }
155
160 void get_inter_sampled_values(const size_type i, uint64_t * it) const
161 {
162 *(it++) = 0;
163 if (i * t_dens + t_dens - 1 < size())
164 {
165 t_coder::template decode<true, true>(m_z.data(), m_sample_vals_and_pointer[(i << 1) + 1], t_dens - 1, it);
166 }
167 else
168 {
169 assert(i * t_dens < size());
170 t_coder::template decode<true, true>(m_z.data(),
171 m_sample_vals_and_pointer[(i << 1) + 1],
172 size() - i * t_dens - 1,
173 it);
174 }
175 };
176};
177
178template <class t_coder, uint32_t t_dens, uint8_t t_width>
180 const size_type i) const
181{
182 assert(i + 1 != 0);
183 assert(i < m_size);
184 size_type idx = i / get_sample_dens();
185 return m_sample_vals_and_pointer[idx << 1] +
186 t_coder::decode_prefix_sum(m_z.data(), m_sample_vals_and_pointer[(idx << 1) + 1], i - t_dens * idx);
187}
188
189template <class t_coder, uint32_t t_dens, uint8_t t_width>
191 const size_type i) const
192{
193 assert(i * get_sample_dens() + 1 != 0);
194 assert(i * get_sample_dens() < m_size);
195 return m_sample_vals_and_pointer[i << 1];
196}
197
198template <class t_coder, uint32_t t_dens, uint8_t t_width>
199template <class Container>
201{
202 // clear bit_vectors
203 clear();
204
205 if (c.empty()) // if c is empty there is nothing to do...
206 return;
207 typename Container::const_iterator it = c.begin(), end = c.end();
208 typename Container::value_type v1 = *it, v2, max_sample_value = 0, x;
209 size_type samples = 0;
210 size_type z_size = 0;
211 // (1) Calculate maximal value of samples and of deltas
212 for (size_type i = 0, no_sample = 0; it != end; ++it, ++i, --no_sample)
213 {
214 v2 = *it;
215 if (!no_sample)
216 { // add a sample
217 no_sample = get_sample_dens();
218 if (max_sample_value < v2) max_sample_value = v2;
219 ++samples;
220 }
221 else
222 {
223 z_size += t_coder::encoding_length(v2 - v1);
224 }
225 v1 = v2;
226 }
227 // (2) Write sample values and deltas
228 {
229 if (max_sample_value > z_size + 1)
230 m_sample_vals_and_pointer.width(bits::hi(max_sample_value) + 1);
231 else
232 m_sample_vals_and_pointer.width(bits::hi(z_size + 1) + 1);
233 m_sample_vals_and_pointer.resize(2 * samples + 2); // add 2 for last entry
234 util::set_to_value(m_sample_vals_and_pointer, 0);
235 typename int_vector_type::iterator sv_it = m_sample_vals_and_pointer.begin();
236 z_size = 0;
237 size_type no_sample = 0;
238 for (it = c.begin(); it != end; ++it, --no_sample)
239 {
240 v2 = *it;
241 if (!no_sample)
242 { // add a sample
243 no_sample = get_sample_dens();
244 *sv_it = v2;
245 ++sv_it;
246 *sv_it = z_size;
247 ++sv_it;
248 }
249 else
250 {
251 x = v2 - v1;
252 z_size += t_coder::encoding_length(x);
253 }
254 v1 = v2;
255 }
256 *sv_it = 0;
257 ++sv_it; // initialize
258 *sv_it = z_size + 1;
259 ++sv_it; // last entry
260
261 m_z = int_vector<>(z_size, 0, 1);
262 uint64_t * z_data = t_coder::raw_data(m_z);
263 uint8_t offset = 0;
264 no_sample = 0;
265 for (it = c.begin(); it != end; ++it, --no_sample)
266 {
267 v2 = *it;
268 if (!no_sample)
269 { // add a sample
270 no_sample = get_sample_dens();
271 }
272 else
273 {
274 t_coder::encode(v2 - v1, z_data, offset);
275 }
276 v1 = v2;
277 }
278 }
279 m_size = c.size();
280}
281
282template <class t_coder, uint32_t t_dens, uint8_t t_width>
283template <uint8_t int_width>
285{
286 // clear bit_vectors
287 clear();
288 size_type n = v_buf.size();
289 if (n == 0) // if c is empty there is nothing to do...
290 return;
291 value_type v1 = 0, v2 = 0, max_sample_value = 0;
292 size_type samples = 0, z_size = 0;
293 const size_type sd = get_sample_dens();
294 // (1) Calculate maximal value of samples and of deltas
295 for (size_type i = 0, no_sample = 0; i < n; ++i, --no_sample)
296 {
297 v2 = v_buf[i];
298 if (!no_sample)
299 { // is sample
300 no_sample = sd;
301 if (max_sample_value < v2) max_sample_value = v2;
302 ++samples;
303 }
304 else
305 {
306 z_size += t_coder::encoding_length(v2 - v1);
307 }
308 v1 = v2;
309 }
310
311 // (2) Write sample values and deltas
312 // (a) Initialize array for sample values and pointers
313 if (max_sample_value > z_size + 1)
314 m_sample_vals_and_pointer.width(bits::hi(max_sample_value) + 1);
315 else
316 m_sample_vals_and_pointer.width(bits::hi(z_size + 1) + 1);
317 m_sample_vals_and_pointer.resize(2 * samples + 2); // add 2 for last entry
318 util::set_to_value(m_sample_vals_and_pointer, 0);
319
320 // (b) Initilize bit_vector for encoded data
321 m_z = int_vector<>(z_size, 0, 1);
322 uint64_t * z_data = t_coder::raw_data(m_z);
323 uint8_t offset = 0;
324
325 // (c) Write sample values and deltas
326 z_size = 0;
327 for (size_type i = 0, j = 0, no_sample = 0; i < n; ++i, --no_sample)
328 {
329 v2 = v_buf[i];
330 if (!no_sample)
331 { // is sample
332 no_sample = sd;
333 m_sample_vals_and_pointer[j++] = v2; // write samples
334 m_sample_vals_and_pointer[j++] = z_size; // write pointers
335 }
336 else
337 {
338 z_size += t_coder::encoding_length(v2 - v1);
339 t_coder::encode(v2 - v1, z_data, offset); // write encoded values
340 }
341 v1 = v2;
342 }
343 m_size = n;
344}
345
346template <class t_coder, uint32_t t_dens, uint8_t t_width>
349 std::string name) const
350{
351 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
352 size_type written_bytes = 0;
353 written_bytes += write_member(m_size, out, child, "size");
354 written_bytes += m_z.serialize(out, child, "encoded deltas");
355 written_bytes += m_sample_vals_and_pointer.serialize(out, child, "samples_and_pointers");
356 structure_tree::add_size(child, written_bytes);
357 return written_bytes;
358}
359
360template <class t_coder, uint32_t t_dens, uint8_t t_width>
362{
363 read_member(m_size, in);
364 m_z.load(in);
365 m_sample_vals_and_pointer.load(in);
366}
367
368template <class t_coder, uint32_t t_dens, uint8_t t_width>
369template <typename archive_t>
371{
372 ar(CEREAL_NVP(m_size));
373 ar(CEREAL_NVP(m_z));
374 ar(CEREAL_NVP(m_sample_vals_and_pointer));
375}
376
377template <class t_coder, uint32_t t_dens, uint8_t t_width>
378template <typename archive_t>
380{
381 ar(CEREAL_NVP(m_size));
382 ar(CEREAL_NVP(m_z));
383 ar(CEREAL_NVP(m_sample_vals_and_pointer));
384}
385
386} // end namespace sdsl
387#endif
#define CEREAL_NVP(X)
Definition: cereal.hpp:31
A generic immutable space-saving vector class for unsigned integers.
Definition: enc_vector.hpp:50
enc_vector & operator=(enc_vector &&)=default
enc_vector & operator=(const enc_vector &)=default
enc_vector enc_vec_type
Definition: enc_vector.hpp:67
const value_type const_reference
Definition: enc_vector.hpp:59
uint32_t get_sample_dens() const
Definition: enc_vector.hpp:154
const value_type * const_pointer
Definition: enc_vector.hpp:60
enc_vector()=default
random_access_const_iterator< enc_vector > iterator
Definition: enc_vector.hpp:56
enc_vector_trait< t_width >::int_vector_type int_vector_type
Definition: enc_vector.hpp:64
const const_iterator begin() const
Iterator that points to the first element of the enc_vector.
Definition: enc_vector.hpp:116
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serialize the enc_vector to a stream.
Definition: enc_vector.hpp:347
void load(std::istream &in)
Load the enc_vector from a stream.
Definition: enc_vector.hpp:361
static size_type max_size()
Return the largest size that this container can ever have.
Definition: enc_vector.hpp:110
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Definition: enc_vector.hpp:370
bool operator==(const enc_vector &v) const
Definition: enc_vector.hpp:121
value_type operator[](size_type i) const
operator[]
Definition: enc_vector.hpp:179
const value_type reference
Definition: enc_vector.hpp:58
uint64_t value_type
Definition: enc_vector.hpp:55
bool operator!=(const enc_vector &v) const
Definition: enc_vector.hpp:126
value_type sample(const size_type i) const
Returns the i-th sample of enc_vector.
Definition: enc_vector.hpp:190
enc_vector(const enc_vector &)=default
int_vector ::size_type size_type
Definition: enc_vector.hpp:62
void get_inter_sampled_values(const size_type i, uint64_t *it) const
Definition: enc_vector.hpp:160
const const_iterator end() const
Iterator that points to the position after the last element of the enc_vector.
Definition: enc_vector.hpp:119
~enc_vector()
Default Destructor.
Definition: enc_vector.hpp:104
static const uint32_t sample_dens
Definition: enc_vector.hpp:66
iv_tag index_category
Definition: enc_vector.hpp:65
size_type size() const
The number of elements in the enc_vector.
Definition: enc_vector.hpp:107
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Definition: enc_vector.hpp:379
ptrdiff_t difference_type
Definition: enc_vector.hpp:61
int_vector< 0 > m_z
Definition: enc_vector.hpp:69
iterator const_iterator
Definition: enc_vector.hpp:57
enc_vector(enc_vector &&)=default
bool empty() const
Returns if the enc_vector is empty.
Definition: enc_vector.hpp:113
uint64_t size() const
Returns the number of elements currently stored.
void shrink_to_fit()
Free unused allocated memory.
Definition: int_vector.hpp:506
const uint64_t * data() const noexcept
Pointer to the raw data of the int_vector.
Definition: int_vector.hpp:566
static size_type max_size() noexcept
Maximum size of the int_vector.
Definition: int_vector.hpp:542
void resize(const size_type size)
Resize the int_vector in terms of elements.
Definition: int_vector.hpp:521
Generic iterator for a random access container.
Definition: iterators.hpp:24
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
static void add_size(structure_tree_node *v, uint64_t value)
coder.hpp contains the coder namespace and includes the header files of sdsl::coder::fibonacci,...
int_vector.hpp contains the sdsl::int_vector class.
iterators.hpp contains an generic iterator for random access containers.
void set_to_value(t_int_vec &v, uint64_t k)
Set all entries of int_vector to value k.
Definition: util.hpp:563
Namespace for the succinct data structure library.
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition: io.hpp:84
void read_member(T &t, std::istream &in)
Definition: io.hpp:111
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition: bits.hpp:651
int_vector< 32 > int_vector_type
Definition: enc_vector.hpp:28
int_vector< 64 > int_vector_type
Definition: enc_vector.hpp:34
int_vector< 0 > int_vector_type
Definition: enc_vector.hpp:22