SDSL 3.0.1
Succinct Data Structure Library
coder_elias_gamma.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
8#ifndef SDSL_CODER_ELIAS_GAMMA
9#define SDSL_CODER_ELIAS_GAMMA
10
11#include <sdsl/int_vector.hpp>
12
13namespace sdsl
14{
15
16namespace coder
17{
18
20template <typename T = void>
22{
23 public:
24 typedef uint64_t size_type;
25
26 static struct impl
27 {
29
33 uint32_t prefixsum[1 << 16];
34
35 uint16_t prefixsum_8bit[(1 << 8) * 8];
36
37 impl()
38 {
39 // initialize prefixsum
40 for (uint64_t x = 0; x < (1 << 16); ++x)
41 {
42 const uint64_t * w = &x; // copy of x
43 uint64_t value = 0;
44 uint16_t numbers = 0, offset = 0, offset2 = 0;
45 while ((x >> offset) != 0)
46 {
47 uint64_t len_1 = bits::read_unary_bounded(w, offset);
48 if (len_1 == 0)
49 {
50 offset += 1;
51 value += 1;
52 ++numbers;
53 }
54 else
55 {
56 offset2 = offset + len_1 + 1;
57 if (offset2 + len_1 <= 16)
58 {
59 value += bits::read_int_bounded(w, offset2, len_1) + (1ULL << len_1);
60 offset = offset2 + len_1;
61 ++numbers;
62 }
63 else
64 break;
65 }
66 }
67 uint32_t result = 0; // the highest 8 bit equal the shift/offset, the second highest 8 bit equal the
68 // number of decoded values,
69 // and the last 16 bit equals value of decoded prefix sum
70 result = (offset << 24) | (numbers << 16) | value;
71 if (value > 0) assert(offset > 0 and numbers > 0 and offset <= 16 and numbers <= 16);
72 prefixsum[x] = result;
73 }
74 // initialize prefixsum_8bit
75
76 for (uint32_t maxi = 1, idx = 0; maxi <= 8; ++maxi)
77 {
78 for (uint64_t x = 0; x < (1 << 8); ++x)
79 {
80 const uint64_t * w = &x; // copy of x
81 uint64_t value = 0;
82 uint32_t numbers = 0, offset = 0, offset2 = 0;
83 while ((x >> offset) != 0 and numbers < maxi)
84 {
85 uint64_t len_1 = bits::read_unary_bounded(w, offset);
86 if (len_1 == 0)
87 {
88 offset += 1;
89 value += 1;
90 ++numbers;
91 }
92 else
93 {
94 offset2 = offset + len_1 + 1;
95 if (offset2 + len_1 <= 8)
96 {
97 value += bits::read_int_bounded(w, offset2, len_1) + (1ULL << len_1);
98 offset = offset2 + len_1;
99 ++numbers;
100 }
101 else
102 break;
103 }
104 }
105 uint16_t result = 0; // the highest 8 bit equal the shift/offset, the second highest 8 bit equal the
106 // number of decoded values,
107 // and the last 16 bit equals value of decoded prefix sum
108 result = (offset << 12) | (numbers << 8) | value;
109 prefixsum_8bit[idx++] = result;
110 }
111 }
112 }
113 } data;
114
115 static const uint8_t min_codeword_length = 1; // 1 represents 1 and is the code word with minimum length
116 static uint8_t encoding_length(uint64_t);
118 /* \param data Bitstring
119 * \param start_idx Starting index of the decoding.
120 * \param n Number of values to decode from the bitstring.
121 * \param it Iterator to decode the values.
122 */
123 template <bool t_sumup, bool t_inc, class t_iter>
124 static uint64_t decode(const uint64_t * data, const size_type start_idx, size_type n, t_iter it = (t_iter) nullptr);
125
128
133 static uint64_t decode_prefix_sum(const uint64_t * d, const size_type start_idx, size_type n);
134 static uint64_t decode_prefix_sum(const uint64_t * d,
135 const size_type start_idx,
136 const size_type end_idx,
137 size_type n);
138
139 template <class int_vector>
140 static bool encode(const int_vector & v, int_vector & z);
141 template <class int_vector>
142 static bool decode(const int_vector & z, int_vector & v);
143
145 /* \param x Positive integer to encode.
146 * \param z Raw data of vector to write the encoded form of x.
147 * \param start_idx Beginning bit index to write the encoded form ox x in z.
148 */
149 static void encode(uint64_t x, uint64_t *& z, uint8_t & offset);
150
151 template <class int_vector>
152 static uint64_t * raw_data(int_vector & v)
153 {
154 return v.m_data;
155 }
156};
157
158// \sa coder::elias_gamma::encoding_length
159template <typename T>
160inline uint8_t elias_gamma<T>::encoding_length(uint64_t w)
161{
162 uint8_t len_1 = w ? bits::hi(w) : 64;
163 return 2 * len_1 + 1;
164}
165
166template <typename T>
167template <class int_vector>
169{
170 typedef typename int_vector::size_type size_type;
171 z.width(v.width());
172 size_type z_bit_size = 0;
173 uint64_t w;
174 const uint64_t zero_val = v.width() < 64 ? (1ULL) << v.width() : 0;
175 for (typename int_vector::const_iterator it = v.begin(), end = v.end(); it != end; ++it)
176 {
177 if ((w = *it) == 0) { w = zero_val; }
178 z_bit_size += encoding_length(w);
179 }
180 z.bit_resize(z_bit_size); // Initial size of z
181 z.shrink_to_fit();
182 if (z_bit_size & 0x3F)
183 { // if z_bit_size % 64 != 0
184 *(z.m_data + (z_bit_size >> 6)) = 0; // initialize last word
185 }
186 z_bit_size = 0;
187 uint64_t * z_data = z.m_data;
188 uint8_t offset = 0;
189 size_type len_1; // TODO: change to uint8_t and test it
190 for (typename int_vector::const_iterator it = v.begin(), end = v.end(); it != end; ++it)
191 {
192 w = *it;
193 if (w == 0) { w = zero_val; }
194 // (number of bits to represent w)-1
195 if (!w)
196 {
197 len_1 = 64;
198 bits::write_int_and_move(z_data, 0ULL, offset, 64);
199 bits::write_int_and_move(z_data, 1ULL, offset, 1);
200 }
201 else
202 {
203 len_1 = bits::hi(w);
204 bits::write_int_and_move(z_data, 1ULL << len_1, offset, len_1 + 1);
205 }
206 if (len_1) { bits::write_int_and_move(z_data, w, offset, len_1); }
207 }
208 return true;
209}
210
211template <typename T>
212inline void elias_gamma<T>::encode(uint64_t x, uint64_t *& z, uint8_t & offset)
213{
214 uint8_t len_1 = 0;
215 if (!x)
216 {
217 len_1 = 64;
218 bits::write_int_and_move(z, 0, offset, 64);
219 bits::write_int_and_move(z, 1, offset, 1);
220 }
221 else
222 {
223 len_1 = bits::hi(x);
224 bits::write_int_and_move(z, 1ULL << len_1, offset, len_1 + 1);
225 }
226 if (len_1) { bits::write_int_and_move(z, x, offset, len_1); }
227}
228
229template <typename T>
230template <class int_vector>
232{
233 typename int_vector::size_type len_1, n = 0;
234 const uint64_t * z_data = z.data();
235 const uint64_t * z_end = z.data() + (z.bit_size() >> 6);
236 uint8_t offset = 0;
237 while ((z_data < z_end) or (z_data == z_end and offset < (z.bit_size() & 0x3F)))
238 {
239 len_1 = bits::read_unary_and_move(z_data, offset);
240 if (len_1) { bits::move_right(z_data, offset, len_1); }
241 ++n;
242 }
243 v.width(z.width());
244 v.resize(n);
245 v.shrink_to_fit();
246 return decode<false, true>(z.data(), 0, n, v.begin());
247}
248
249template <typename T>
250template <bool t_sumup, bool t_inc, class t_iter>
251inline uint64_t elias_gamma<T>::decode(const uint64_t * d, const size_type start_idx, size_type n, t_iter it)
252{
253 d += (start_idx >> 6);
254 uint64_t value = 0;
255 size_type i = 0;
256 size_type len_1;
257 uint8_t offset = start_idx & 0x3F;
258 while (i++ < n)
259 { // while not all values are decoded
260 if (!t_sumup) value = 0;
261 len_1 = bits::read_unary_and_move(d, offset); // read length of x-1
262 if (!len_1) { value += 1; }
263 else
264 {
265 value += bits::read_int_and_move(d, offset, len_1) + (len_1 < 64) * (1ULL << len_1);
266 }
267 if (t_inc) *(it++) = value;
268 }
269 return value;
270}
271
272template <typename T>
273inline uint64_t elias_gamma<T>::decode_prefix_sum(const uint64_t * d,
274 const size_type start_idx,
275 const size_type end_idx,
276 size_type n)
277{
278 if (n == 0) return 0;
279 const uint64_t * lastdata = d + ((end_idx + 63) >> 6);
280 d += (start_idx >> 6);
281 uint64_t w = 0, value = 0;
282 int16_t buffered = 0, read = start_idx & 0x3F;
283 size_type i = 0;
284 if (n + read <= 64)
285 {
286 if (((*d >> read) & bits::lo_set[n]) == bits::lo_set[n]) return n;
287 }
288 else
289 { // n+read > 64
290 if ((*d >> read) == bits::lo_set[64 - read])
291 { // all bits are set to 1
292 value = 64 - read;
293 ++d;
294 n -= (64 - read);
295 read = 0;
296 while (n >= 64)
297 {
298 if (*d == 0xFFFFFFFFFFFFFFFFULL)
299 {
300 value += 64;
301 ++d;
302 n -= 64;
303 }
304 else
305 goto start_decoding;
306 }
307 // 0 <= n <= 64
308 if ((*d & bits::lo_set[n]) == bits::lo_set[n]) return value + n;
309 }
310 }
311
312start_decoding:
313 while (i < n)
314 {
315 while (buffered < 64 and d < lastdata)
316 {
317 fill_buffer:
318 w |= (((*d) >> read) << buffered);
319 if (read >= buffered)
320 {
321 ++d;
322 buffered += 64 - read;
323 read = 0;
324 }
325 else
326 { // read buffered
327 read += 64 - buffered;
328 buffered = 64;
329 }
330 }
331 uint32_t rbp = bits::lo(~w);
332 if (rbp > 0)
333 {
334 i += rbp;
335 value += rbp;
336 if (i >= n)
337 { // decoded too much
338 return value - (i - n); // return corrected value
339 }
340 assert((int64_t)buffered >= rbp);
341 buffered -= rbp;
342 w >>= rbp;
343 if (buffered < 16) goto fill_buffer;
344 }
345 {
346 // i < n
347 begin_decode:
348 uint32_t psum = elias_gamma<T>::data.prefixsum[w & 0x0000FFFF];
349 if (!psum or i + ((psum >> 16) & 0x00FF) > n)
350 {
351 if (w == 0)
352 { // buffer is not full
353 w |= (((*d) >> read) << buffered);
354 if (read >= buffered)
355 {
356 ++d;
357 buffered += 64 - read;
358 read = 0;
359 }
360 else
361 {
362 read += 64 - buffered;
363 buffered = 64;
364 };
365 if (!w)
366 {
367 w |= (((*d) >> read) << buffered);
368 if (read >= buffered)
369 {
370 ++d;
371 buffered += 64 - read;
372 read = 0;
373 }
374 else
375 {
376 read += 64 - buffered;
377 buffered = 64;
378 };
379 }
380 }
381 // assert(w>0);
382 uint16_t len_1 = bits::lo(w); // read length of length
383 buffered -= (len_1 + 1);
384 w >>= (len_1 + 1);
385 if (len_1 > buffered)
386 { // buffer is not full
387 w |= (((*d) >> read) << buffered);
388 if (read >= buffered)
389 {
390 ++d;
391 buffered += 64 - read;
392 read = 0;
393 }
394 else
395 {
396 read += 64 - buffered;
397 buffered = 64;
398 };
399 if (len_1 > buffered)
400 {
401 w |= (((*d) >> read) << buffered);
402 if (read >= buffered)
403 {
404 ++d;
405 buffered += 64 - read;
406 read = 0;
407 }
408 else
409 {
410 read += 64 - buffered;
411 buffered = 64;
412 };
413 }
414 }
415 value += (w & bits::lo_set[len_1]) + (len_1 < 64) * (1ULL << len_1);
416 buffered -= len_1;
417 if (len_1 < 64) { w >>= len_1; }
418 else
419 {
420 w = 0;
421 }
422 ++i;
423 if (i == n) return value;
424 if (buffered >= 16) goto begin_decode;
425 }
426 else
427 {
428 value += (psum & 0x0000FFFF);
429 i += ((psum >> 16) & 0x00FF);
430 if (i == n) return value;
431 buffered -= (psum >> 24);
432 w >>= (psum >> 24);
433 if (buffered >= 16) goto begin_decode;
434 }
435 }
436 };
437 return value;
438}
439
440template <typename T>
441inline uint64_t elias_gamma<T>::decode_prefix_sum(const uint64_t * d, const size_type start_idx, size_type n)
442{
443 if (n == 0) return 0;
444 d += (start_idx >> 6);
445 uint64_t value = 0;
446 size_type i = 0;
447 uint8_t offset = start_idx & 0x3F;
448
449 if (n < 24)
450 {
451 if (n + offset <= 64)
452 {
453 if (((*d >> offset) & bits::lo_set[n]) == bits::lo_set[n]) return n;
454 }
455 else
456 { // n+offset > 64
457 if ((*d >> offset) == bits::lo_set[64 - offset])
458 { // all bits are set to 1
459 value = 64 - offset;
460 ++d;
461 n -= (64 - offset);
462 offset = 0;
463 while (n >= 64)
464 {
465 if (*d == 0xFFFFFFFFFFFFFFFFULL)
466 {
467 value += 64;
468 ++d;
469 n -= 64;
470 }
471 else
472 {
473 uint8_t temp = bits::lo(~(*d));
474 value += temp;
475 n -= temp;
476 offset = temp;
477 goto start_decoding;
478 }
479 }
480 // 0 <= n <= 64
481 if ((*d & bits::lo_set[n]) == bits::lo_set[n]) return value + n;
482 }
483 }
484 }
485
486start_decoding:
487 while (i < n)
488 { // while not all values are decoded
489 // n-i values to decode
490
491 if (((*d >> offset) & 0xF) == 0xF)
492 {
493 uint8_t maxdecode = n - i > 63 ? 63 : n - i;
494 uint8_t rbp = bits::lo(~bits::read_int(d, offset, maxdecode));
495 i += rbp;
496 value += rbp;
497 if (rbp + offset >= 64)
498 {
499 ++d;
500 offset = (rbp + offset) & 0x3F;
501 }
502 else
503 {
504 offset += rbp;
505 }
506 if (rbp == maxdecode) continue;
507 }
508
509 while (i < n)
510 {
511 uint32_t psum = elias_gamma<T>::data.prefixsum[bits::read_int(d, offset, 16)];
512 if (psum == 0)
513 { // value does not fit in 16 bits
514 goto decode_single;
515 }
516 else if (i + ((psum >> 16) & 0x00FF) > n)
517 { // decoded too much
518 if (n - i <= 8)
519 {
520 psum = elias_gamma<T>::data.prefixsum_8bit[bits::read_int(d, offset, 8) | ((n - i - 1) << 8)];
521 if (psum > 0)
522 {
523 value += (psum & 0xFF);
524 i += ((psum >> 8) & 0xF);
525 offset += (psum >> 12);
526 if (offset >= 64)
527 {
528 offset &= 0x3F;
529 ++d;
530 }
531 }
532 }
533 break;
534 }
535 else
536 {
537 value += (psum & 0x0000FFFF);
538 i += ((psum >> 16) & 0x00FF);
539 offset += (psum >> 24);
540 if (offset >= 64)
541 {
542 offset &= 0x3F;
543 ++d;
544 }
545 }
546 }
547 if (i < n)
548 {
549 decode_single:
550 i++;
551 uint16_t len_1 = bits::read_unary_and_move(d, offset); // read length of length of x
552 value += bits::read_int_and_move(d, offset, len_1) + (len_1 < 64) * (1ULL << len_1);
553 }
554 }
555 return value;
556}
557
558template <typename T>
560
561} // end namespace coder
562
563} // end namespace sdsl
564#endif
A class to encode and decode between Elias- and binary code.
static bool encode(const int_vector &v, int_vector &z)
static struct sdsl::coder::elias_gamma::impl data
static uint64_t * raw_data(int_vector &v)
static uint64_t decode_prefix_sum(const uint64_t *d, const size_type start_idx, size_type n)
Decode n Elias gamma encoded integers beginning at start_idx in the bitstring "data" and return the s...
static uint8_t encoding_length(uint64_t)
static const uint8_t min_codeword_length
static uint64_t decode(const uint64_t *data, const size_type start_idx, size_type n, t_iter it=(t_iter) nullptr)
Decode n Elias-delta encoded bits beginning at start_idx in the bitstring "data".
A generic vector class for integers of width .
Definition: int_vector.hpp:216
iterator end() noexcept
Iterator that points to the element after the last element of int_vector.
Definition: int_vector.hpp:764
int_vector_size_type size_type
Definition: int_vector.hpp:229
size_type bit_size() const noexcept
The number of bits in the int_vector.
Definition: int_vector.hpp:547
void shrink_to_fit()
Free unused allocated memory.
Definition: int_vector.hpp:506
uint8_t width() const noexcept
Returns the width of the integers which are accessed via the [] operator.
Definition: int_vector.hpp:595
const uint64_t * data() const noexcept
Pointer to the raw data of the int_vector.
Definition: int_vector.hpp:566
void resize(const size_type size)
Resize the int_vector in terms of elements.
Definition: int_vector.hpp:521
iterator begin() noexcept
Iterator that points to the first element of the int_vector.
Definition: int_vector.hpp:759
int_vector.hpp contains the sdsl::int_vector class.
int_vector ::size_type size_type
Namespace for the succinct data structure library.
static constexpr uint64_t read_int(const uint64_t *word, uint8_t offset=0, const uint8_t len=64)
Reads a value from a bit position in an array.
Definition: bits.hpp:770
static constexpr uint32_t lo(uint64_t x)
Calculates the position of the rightmost 1-bit in the 64bit integer x if it exists.
Definition: bits.hpp:686
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition: bits.hpp:651
static constexpr void write_int_and_move(uint64_t *&word, uint64_t x, uint8_t &offset, const uint8_t len)
Writes value x to an bit position in an array and moves the bit-pointer.
Definition: bits.hpp:742
static constexpr uint64_t read_int_bounded(const uint64_t *word, uint8_t offset=0, const uint8_t len=64)
Definition: bits.hpp:786
static constexpr uint64_t read_unary_and_move(const uint64_t *&word, uint8_t &offset)
Reads an unary decoded value from a bit position in an array and moves the bit-pointer.
Definition: bits.hpp:842
static constexpr uint64_t read_unary_bounded(const uint64_t *word, uint8_t offset=0)
Definition: bits.hpp:831
static constexpr uint64_t read_int_and_move(const uint64_t *&word, uint8_t &offset, const uint8_t len=64)
Reads a value from a bit position in an array and moved the bit-pointer.
Definition: bits.hpp:792
static constexpr uint64_t lo_set[65]
lo_set[i] is a 64-bit word with the i least significant bits set and the high bits not set.
Definition: bits.hpp:187
static constexpr void move_right(const uint64_t *&word, uint8_t &offset, const uint8_t len)
Move the bit-pointer (=uint64_t word and offset) len to the right.
Definition: bits.hpp:877