SDSL 3.0.1
Succinct Data Structure Library
wt_int.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
10#ifndef INCLUDED_SDSL_INT_WAVELET_TREE
11#define INCLUDED_SDSL_INT_WAVELET_TREE
12
13#include <algorithm> // for std::swap
14#include <map> // for mapping a symbol to its lexicographical index
15#include <queue>
16#include <set> // for calculating the alphabet size
17#include <stdexcept>
18#include <utility>
19#include <vector>
20
21#include <sdsl/int_vector.hpp>
25#include <sdsl/util.hpp>
26#include <sdsl/wt_helper.hpp>
27
29namespace sdsl
30{
31
33
44template <class t_bitvector = bit_vector,
45 class t_rank = typename t_bitvector::rank_1_type,
46 class t_select = typename t_bitvector::select_1_type,
47 class t_select_zero = typename t_bitvector::select_0_type>
48class wt_int
49{
50 public:
53 typedef typename t_bitvector::difference_type difference_type;
56 typedef t_bitvector bit_vector_type;
57 typedef t_rank rank_1_type;
58 typedef t_select select_1_type;
59 typedef t_select_zero select_0_type;
62 enum
63 {
64 lex_ordered = 1
65 };
66
67 typedef std::pair<value_type, size_type> point_type;
68 typedef std::vector<point_type> point_vec_type;
69 typedef std::pair<size_type, point_vec_type> r2d_res_type;
70
71 protected:
73 size_type m_sigma = 0; //<- \f$ |\Sigma| \f$
74 bit_vector_type m_tree; // bit vector to store the wavelet tree
75 rank_1_type m_tree_rank; // rank support for the wavelet tree bit vector
76 select_1_type m_tree_select1; // select support for the wavelet tree bit vector
78 uint32_t m_max_level = 0;
79
80 private:
81 // recursive internal version of the method interval_symbols
82 void _interval_symbols(size_type i,
83 size_type j,
84 size_type & k,
85 std::vector<value_type> & cs,
86 std::vector<size_type> & rank_c_i,
87 std::vector<size_type> & rank_c_j,
88 size_type level,
90 size_type node_size,
91 size_type offset) const
92 {
93 // invariant: j>i
94
95 if (level >= m_max_level)
96 {
97 rank_c_i[k] = i;
98 rank_c_j[k] = j;
99 cs[k++] = path;
100 return;
101 }
102
103 size_type ones_before_o = m_tree_rank(offset);
104 size_type ones_before_i = m_tree_rank(offset + i) - ones_before_o;
105 size_type ones_before_j = m_tree_rank(offset + j) - ones_before_o;
106 size_type ones_before_end = m_tree_rank(offset + node_size) - ones_before_o;
107
108 // goto left child
109 if ((j - i) - (ones_before_j - ones_before_i) > 0)
110 {
111 size_type new_offset = offset + m_size;
112 size_type new_node_size = node_size - ones_before_end;
113 size_type new_i = i - ones_before_i;
114 size_type new_j = j - ones_before_j;
115 _interval_symbols(new_i, new_j, k, cs, rank_c_i, rank_c_j, level + 1, path << 1, new_node_size, new_offset);
116 }
117
118 // goto right child
119 if ((ones_before_j - ones_before_i) > 0)
120 {
121 size_type new_offset = offset + (node_size - ones_before_end) + m_size;
122 size_type new_node_size = ones_before_end;
123 size_type new_i = ones_before_i;
124 size_type new_j = ones_before_j;
125 _interval_symbols(new_i,
126 new_j,
127 k,
128 cs,
129 rank_c_i,
130 rank_c_j,
131 level + 1,
132 (path << 1) | 1,
133 new_node_size,
134 new_offset);
135 }
136 }
137
138 public:
141 const uint32_t & max_level = m_max_level;
142
144 wt_int() = default;
145
147
155 template <typename t_it>
156 wt_int(t_it begin, t_it end, std::string tmp_dir = ram_file_name(""))
157 : m_size(std::distance(begin, end))
158 {
159 if (0 == m_size) return;
160 m_sigma = 0;
161
162 value_type max_elem = 1; // variable for the biggest value in rac
163 for (auto it = begin; it != end; ++it)
164 {
165 value_type value = *it;
166 if (value > max_elem) max_elem = value;
167 }
168 m_max_level = bits::hi(max_elem) + 1;
169 std::string tree_out_buf_file_name = tmp_file(tmp_dir, "_m_tree");
170 {
172 std::copy(begin, end, rac.begin());
173
174 // buffer for elements in the right node
175 int_vector_buffer<> buf1(tmp_file(tmp_dir, "_wt_constr_buf"), std::ios::out, 10 * (1 << 20), m_max_level);
176 osfstream tree_out_buf(tree_out_buf_file_name, std::ios::binary | std::ios::trunc | std::ios::out);
177
178 size_type bit_size = m_size * m_max_level;
179 int_vector<1>::write_header(bit_size, 1, tree_out_buf); // write bv header
180
181 size_type tree_pos = 0;
182 uint64_t tree_word = 0;
183
184 uint64_t mask_old = 1ULL << (m_max_level);
185 for (uint32_t k = 0; k < m_max_level; ++k)
186 {
187 size_type start = 0;
188 const uint64_t mask_new = 1ULL << (m_max_level - k - 1);
189 do {
190 size_type i = start;
191 size_type cnt0 = 0;
192 size_type cnt1 = 0;
193 uint64_t start_value = (rac[i] & mask_old);
194 uint64_t x;
195 while (i < m_size and ((x = rac[i]) & mask_old) == start_value)
196 {
197 if (x & mask_new)
198 {
199 tree_word |= (1ULL << (tree_pos & 0x3FULL));
200 buf1[cnt1++] = x;
201 }
202 else
203 {
204 rac[start + cnt0++] = x;
205 }
206 ++tree_pos;
207 if ((tree_pos & 0x3FULL) == 0)
208 { // if tree_pos % 64 == 0 write old word
209 tree_out_buf.write((char *)&tree_word, sizeof(tree_word));
210 tree_word = 0;
211 }
212 ++i;
213 }
214 if (k + 1 < m_max_level)
215 { // inner node
216 for (size_type j = 0; j < cnt1; ++j) { rac[start + cnt0 + j] = buf1[j]; }
217 }
218 else
219 { // leaf node
220 m_sigma += (cnt0 > 0) + (cnt1 > 0); // increase sigma for each leaf
221 }
222 start += cnt0 + cnt1;
223 } while (start < m_size);
224 mask_old += mask_new;
225 }
226 if ((tree_pos & 0x3FULL) != 0)
227 { // if tree_pos % 64 > 0 => there are remaining entries we have to write
228 tree_out_buf.write((char *)&tree_word, sizeof(tree_word));
229 }
230 buf1.close(true); // remove temporary file
231 tree_out_buf.close();
232 } // destruct rac
233
235 load_from_file(tree, tree_out_buf_file_name);
236 sdsl::remove(tree_out_buf_file_name);
237 m_tree = bit_vector_type(std::move(tree));
238 util::init_support(m_tree_rank, &m_tree);
239 util::init_support(m_tree_select0, &m_tree);
240 util::init_support(m_tree_select1, &m_tree);
241 }
242
244 wt_int(const wt_int & wt)
245 {
246 m_size = wt.m_size;
247 m_sigma = wt.m_sigma;
248 m_tree = wt.m_tree;
250 m_tree_rank.set_vector(&m_tree);
252 m_tree_select1.set_vector(&m_tree);
254 m_tree_select0.set_vector(&m_tree);
256 }
257
260 : m_size(wt.m_size)
261 , m_sigma(wt.m_sigma)
262 , m_tree(std::move(wt.m_tree))
263 , m_tree_rank(std::move(wt.m_tree_rank))
264 , m_tree_select1(std::move(wt.m_tree_select1))
265 , m_tree_select0(std::move(wt.m_tree_select0))
267 {
268 m_tree_rank.set_vector(&m_tree);
269 m_tree_select1.set_vector(&m_tree);
270 m_tree_select0.set_vector(&m_tree);
271 }
272
274 wt_int & operator=(const wt_int & wt)
275 {
276 if (this != &wt)
277 {
278 wt_int tmp(wt); // re-use copy-constructor
279 *this = std::move(tmp); // re-use move-assignment
280 }
281 return *this;
282 }
283
286 {
287 if (this != &wt)
288 {
289 m_size = wt.m_size;
290 m_sigma = wt.m_sigma;
291 m_tree = std::move(wt.m_tree);
292 m_tree_rank = std::move(wt.m_tree_rank);
293 m_tree_rank.set_vector(&m_tree);
294 m_tree_select1 = std::move(wt.m_tree_select1);
295 m_tree_select1.set_vector(&m_tree);
296 m_tree_select0 = std::move(wt.m_tree_select0);
297 m_tree_select0.set_vector(&m_tree);
298 m_max_level = std::move(wt.m_max_level);
299 }
300 return *this;
301 }
302
304 size_type size() const { return m_size; }
305
307 bool empty() const { return m_size == 0; }
308
310
316 {
317 assert(i < size());
318 size_type offset = 0;
319 value_type res = 0;
320 size_type node_size = m_size;
321 for (uint32_t k = 0; k < m_max_level; ++k)
322 {
323 res <<= 1;
324 size_type ones_before_o = m_tree_rank(offset);
325 size_type ones_before_i = m_tree_rank(offset + i) - ones_before_o;
326 size_type ones_before_end = m_tree_rank(offset + node_size) - ones_before_o;
327 if (m_tree[offset + i])
328 { // one at position i => follow right child
329 offset += (node_size - ones_before_end);
330 node_size = ones_before_end;
331 i = ones_before_i;
332 res |= 1;
333 }
334 else
335 { // zero at position i => follow left child
336 node_size = (node_size - ones_before_end);
337 i = (i - ones_before_i);
338 }
339 offset += m_size;
340 }
341 return res;
342 };
343
345
355 {
356 assert(i <= size());
357 if (((1ULL) << (m_max_level)) <= c)
358 { // c is greater than any symbol in wt
359 return 0;
360 }
361 size_type offset = 0;
362 uint64_t mask = (1ULL) << (m_max_level - 1);
363 size_type node_size = m_size;
364 for (uint32_t k = 0; k < m_max_level and i; ++k)
365 {
366 size_type ones_before_o = m_tree_rank(offset);
367 size_type ones_before_i = m_tree_rank(offset + i) - ones_before_o;
368 size_type ones_before_end = m_tree_rank(offset + node_size) - ones_before_o;
369 if (c & mask)
370 { // search for a one at this level
371 offset += (node_size - ones_before_end);
372 node_size = ones_before_end;
373 i = ones_before_i;
374 }
375 else
376 { // search for a zero at this level
377 node_size = (node_size - ones_before_end);
378 i = (i - ones_before_i);
379 }
380 offset += m_size;
381 mask >>= 1;
382 }
383 return i;
384 };
385
387
393 std::pair<size_type, value_type> inverse_select(size_type i) const
394 {
395 assert(i < size());
396
397 value_type c = 0;
398 size_type node_size = m_size, offset = 0;
399 for (uint32_t k = 0; k < m_max_level; ++k)
400 {
401 size_type ones_before_o = m_tree_rank(offset);
402 size_type ones_before_i = m_tree_rank(offset + i) - ones_before_o;
403 size_type ones_before_end = m_tree_rank(offset + node_size) - ones_before_o;
404 c <<= 1;
405 if (m_tree[offset + i])
406 { // go to the right child
407 offset += (node_size - ones_before_end);
408 node_size = ones_before_end;
409 i = ones_before_i;
410 c |= 1;
411 }
412 else
413 { // go to the left child
414 node_size = (node_size - ones_before_end);
415 i = (i - ones_before_i);
416 }
417 offset += m_size;
418 }
419 return std::make_pair(i, c);
420 }
421
423
432 {
433 assert(1 <= i and i <= rank(size(), c));
434 // possible optimization: if the array is a permutation we can start at the bottom of the tree
435 size_type offset = 0;
436 uint64_t mask = (1ULL) << (m_max_level - 1);
437 size_type node_size = m_size;
438 int_vector<64> m_path_off(max_level + 1);
439 int_vector<64> m_path_rank_off(max_level + 1);
440 m_path_off[0] = m_path_rank_off[0] = 0;
441
442 for (uint32_t k = 0; k < m_max_level and node_size; ++k)
443 {
444 size_type ones_before_o = m_tree_rank(offset);
445 m_path_rank_off[k] = ones_before_o;
446 size_type ones_before_end = m_tree_rank(offset + node_size) - ones_before_o;
447 if (c & mask)
448 { // search for a one at this level
449 offset += (node_size - ones_before_end);
450 node_size = ones_before_end;
451 }
452 else
453 { // search for a zero at this level
454 node_size = (node_size - ones_before_end);
455 }
456 offset += m_size;
457 m_path_off[k + 1] = offset;
458 mask >>= 1;
459 }
460 if (0ULL == node_size or node_size < i)
461 {
462 throw std::logic_error("select(" + util::to_string(i) + "," + util::to_string(c) +
463 "): c does not occur i times in the WT");
464 return m_size;
465 }
466 mask = 1ULL;
467 for (uint32_t k = m_max_level; k > 0; --k)
468 {
469 offset = m_path_off[k - 1];
470 size_type ones_before_o = m_path_rank_off[k - 1];
471 if (c & mask)
472 { // right child => search i'th
473 i = m_tree_select1(ones_before_o + i) - offset + 1;
474 }
475 else
476 { // left child => search i'th zero
477 i = m_tree_select0(offset - ones_before_o + i) - offset + 1;
478 }
479 mask <<= 1;
480 }
481 return i - 1;
482 };
483
485
506 size_type j,
507 size_type & k,
508 std::vector<value_type> & cs,
509 std::vector<size_type> & rank_c_i,
510 std::vector<size_type> & rank_c_j) const
511 {
512 assert(i <= j and j <= size());
513 k = 0;
514 if (i == j) { return; }
515 if ((i + 1) == j)
516 {
517 auto res = inverse_select(i);
518 cs[0] = res.second;
519 rank_c_i[0] = res.first;
520 rank_c_j[0] = res.first + 1;
521 k = 1;
522 return;
523 }
524
525 _interval_symbols(i, j, k, cs, rank_c_i, rank_c_j, 0, 0, m_size, 0);
526 }
527
529
541 template <class t_ret_type = std::tuple<size_type, size_type, size_type>>
542 t_ret_type lex_count(size_type i, size_type j, value_type c) const
543 {
544 assert(i <= j and j <= size());
545 if (((1ULL) << (m_max_level)) <= c)
546 { // c is greater than any symbol in wt
547 return t_ret_type{ 0, j - i, 0 };
548 }
549 size_type offset = 0;
550 size_type smaller = 0;
551 size_type greater = 0;
552 uint64_t mask = (1ULL) << (m_max_level - 1);
553 size_type node_size = m_size;
554 for (uint32_t k = 0; k < m_max_level; ++k)
555 {
556 size_type ones_before_o = m_tree_rank(offset);
557 size_type ones_before_i = m_tree_rank(offset + i) - ones_before_o;
558 size_type ones_before_j = m_tree_rank(offset + j) - ones_before_o;
559 size_type ones_before_end = m_tree_rank(offset + node_size) - ones_before_o;
560 if (c & mask)
561 { // search for a one at this level
562 offset += (node_size - ones_before_end);
563 node_size = ones_before_end;
564 smaller += j - i - ones_before_j + ones_before_i;
565 i = ones_before_i;
566 j = ones_before_j;
567 }
568 else
569 { // search for a zero at this level
570 node_size -= ones_before_end;
571 greater += ones_before_j - ones_before_i;
572 i -= ones_before_i;
573 j -= ones_before_j;
574 }
575 offset += m_size;
576 mask >>= 1;
577 }
578 return t_ret_type{ i, smaller, greater };
579 }
580
582
591 template <class t_ret_type = std::tuple<size_type, size_type>>
593 {
594 assert(i <= size());
595 if (((1ULL) << (m_max_level)) <= c)
596 { // c is greater than any symbol in wt
597 return t_ret_type{ 0, i };
598 }
599 size_type offset = 0;
600 size_type result = 0;
601 uint64_t mask = (1ULL) << (m_max_level - 1);
602 size_type node_size = m_size;
603 for (uint32_t k = 0; k < m_max_level and i; ++k)
604 {
605 size_type ones_before_o = m_tree_rank(offset);
606 size_type ones_before_i = m_tree_rank(offset + i) - ones_before_o;
607 size_type ones_before_end = m_tree_rank(offset + node_size) - ones_before_o;
608 if (c & mask)
609 { // search for a one at this level
610 offset += (node_size - ones_before_end);
611 node_size = ones_before_end;
612 result += i - ones_before_i;
613 i = ones_before_i;
614 }
615 else
616 { // search for a zero at this level
617 node_size = (node_size - ones_before_end);
618 i -= ones_before_i;
619 }
620 offset += m_size;
621 mask >>= 1;
622 }
623 return t_ret_type{ i, result };
624 }
625
627
635 std::pair<size_type, std::vector<std::pair<value_type, size_type>>> range_search_2d(size_type lb,
636 size_type rb,
637 value_type vlb,
638 value_type vrb,
639 bool report = true) const
640 {
641 std::vector<size_type> offsets(m_max_level + 1);
642 std::vector<size_type> ones_before_os(m_max_level + 1);
643 offsets[0] = 0;
644 if (vrb > (1ULL << m_max_level)) vrb = (1ULL << m_max_level);
645 if (vlb > vrb) return make_pair(0, point_vec_type());
646 size_type cnt_answers = 0;
647 point_vec_type point_vec;
648 _range_search_2d(lb, rb, vlb, vrb, 0, 0, m_size, offsets, ones_before_os, 0, point_vec, report, cnt_answers);
649 return make_pair(cnt_answers, point_vec);
650 }
651
653 size_type rb,
654 value_type vlb,
655 value_type vrb,
656 size_type level,
657 size_type ilb,
658 size_type node_size,
659 std::vector<size_type> & offsets,
660 std::vector<size_type> & ones_before_os,
662 point_vec_type & point_vec,
663 bool report,
664 size_type & cnt_answers) const
665 {
666 if (lb > rb) return;
667 if (level == m_max_level)
668 {
669 if (report)
670 {
671 for (size_type j = lb + 1; j <= rb + 1; ++j)
672 {
673 size_type i = j;
674 size_type c = path;
675 for (uint32_t k = m_max_level; k > 0; --k)
676 {
677 size_type offset = offsets[k - 1];
678 size_type ones_before_o = ones_before_os[k - 1];
679 if (c & 1) { i = m_tree_select1(ones_before_o + i) - offset + 1; }
680 else
681 {
682 i = m_tree_select0(offset - ones_before_o + i) - offset + 1;
683 }
684 c >>= 1;
685 }
686 point_vec.emplace_back(i - 1, path);
687 }
688 }
689 cnt_answers += rb - lb + 1;
690 return;
691 }
692 size_type irb = ilb + (1ULL << (m_max_level - level));
693 size_type mid = (irb + ilb) >> 1;
694
695 size_type offset = offsets[level];
696
697 size_type ones_before_o = m_tree_rank(offset);
698 ones_before_os[level] = ones_before_o;
699 size_type ones_before_lb = m_tree_rank(offset + lb);
700 size_type ones_before_rb = m_tree_rank(offset + rb + 1);
701 size_type ones_before_end = m_tree_rank(offset + node_size);
702 size_type zeros_before_o = offset - ones_before_o;
703 size_type zeros_before_lb = offset + lb - ones_before_lb;
704 size_type zeros_before_rb = offset + rb + 1 - ones_before_rb;
705 size_type zeros_before_end = offset + node_size - ones_before_end;
706 if (vlb < mid and mid)
707 {
708 size_type nlb = zeros_before_lb - zeros_before_o;
709 size_type nrb = zeros_before_rb - zeros_before_o;
710 offsets[level + 1] = offset + m_size;
711 if (nrb)
713 nrb - 1,
714 vlb,
715 std::min(vrb, mid - 1),
716 level + 1,
717 ilb,
718 zeros_before_end - zeros_before_o,
719 offsets,
720 ones_before_os,
721 path << 1,
722 point_vec,
723 report,
724 cnt_answers);
725 }
726 if (vrb >= mid)
727 {
728 size_type nlb = ones_before_lb - ones_before_o;
729 size_type nrb = ones_before_rb - ones_before_o;
730 offsets[level + 1] = offset + m_size + (zeros_before_end - zeros_before_o);
731 if (nrb)
733 nrb - 1,
734 std::max(mid, vlb),
735 vrb,
736 level + 1,
737 mid,
738 ones_before_end - ones_before_o,
739 offsets,
740 ones_before_os,
741 (path << 1) + 1,
742 point_vec,
743 report,
744 cnt_answers);
745 }
746 }
747
749 const_iterator begin() const { return const_iterator(this, 0); }
750
752 const_iterator end() const { return const_iterator(this, size()); }
753
755 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
756 {
757 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
758 size_type written_bytes = 0;
759 written_bytes += write_member(m_size, out, child, "size");
760 written_bytes += write_member(m_sigma, out, child, "sigma");
761 written_bytes += m_tree.serialize(out, child, "tree");
762 written_bytes += m_tree_rank.serialize(out, child, "tree_rank");
763 written_bytes += m_tree_select1.serialize(out, child, "tree_select_1");
764 written_bytes += m_tree_select0.serialize(out, child, "tree_select_0");
765 written_bytes += write_member(m_max_level, out, child, "max_level");
766 structure_tree::add_size(child, written_bytes);
767 return written_bytes;
768 }
769
771 void load(std::istream & in)
772 {
773 read_member(m_size, in);
774 read_member(m_sigma, in);
775 m_tree.load(in);
776 m_tree_rank.load(in, &m_tree);
777 m_tree_select1.load(in, &m_tree);
778 m_tree_select0.load(in, &m_tree);
780 }
781
782 template <typename archive_t>
783 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
784 {
785 ar(CEREAL_NVP(m_size));
786 ar(CEREAL_NVP(m_sigma));
788 ar(CEREAL_NVP(m_tree));
792 }
793
794 template <typename archive_t>
795 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
796 {
797 ar(CEREAL_NVP(m_size));
798 ar(CEREAL_NVP(m_sigma));
800 ar(CEREAL_NVP(m_tree));
802 m_tree_rank.set_vector(&m_tree);
804 m_tree_select1.set_vector(&m_tree);
806 m_tree_select0.set_vector(&m_tree);
807 }
808
810 bool operator==(wt_int const & other) const noexcept
811 {
812 return (m_size == other.m_size) && (m_sigma == other.m_sigma) && (m_tree == other.m_tree) &&
813 (m_tree_rank == other.m_tree_rank) && (m_tree_select1 == other.m_tree_select1) &&
814 (m_tree_select0 == other.m_tree_select0) && (m_max_level == other.m_max_level);
815 }
816
818 bool operator!=(wt_int const & other) const noexcept { return !(*this == other); }
819
822 {
827
828 // Default constructor
829 node_type(size_type o = 0, size_type sz = 0, size_type l = 0, value_type sy = 0)
830 : offset(o)
831 , size(sz)
832 , level(l)
833 , sym(sy)
834 {}
835
836 // Copy constructor
837 node_type(const node_type &) = default;
838
839 // Move copy constructor
840 node_type(node_type &&) = default;
841
842 // Assignment operator
843 node_type & operator=(const node_type &) = default;
844
845 // Move assignment operator
847
848 // Comparator operator
849 bool operator==(const node_type & v) const { return offset == v.offset; }
850
851 // Smaller operator
852 bool operator<(const node_type & v) const { return offset < v.offset; }
853
854 // Greater operator
855 bool operator>(const node_type & v) const { return offset > v.offset; }
856 };
857
859 bool is_leaf(const node_type & v) const { return v.level == m_max_level; }
860
862 value_type sym(const node_type & v) const { return v.sym; }
863
866 {
868 }
869
871 auto seq(const node_type & v) const -> random_access_container<std::function<value_type(size_type)>>
872 {
873 return random_access_container<std::function<value_type(size_type)>>(
874 [&v, this](size_type i) {
875 node_type vv = v;
876 while (!is_leaf(vv))
877 {
878 auto vs = expand(vv);
879 auto rs = expand(vv, range_type{ { 0, i } });
880 bool bit = *(begin(vv) + i);
881 i = std::get<1>(rs[bit]);
882 vv = vs[bit];
883 }
884 return sym(vv);
885 },
886 size(v));
887 }
888
890 bool empty(const node_type & v) const { return v.size == (size_type)0; }
891
893 auto size(const node_type & v) const -> decltype(v.size) { return v.size; }
894
896 node_type root() const { return node_type(0, m_size, 0, 0); }
897
899
903 std::array<node_type, 2> expand(const node_type & v) const
904 {
905 node_type v_right = v;
906 return expand(std::move(v_right));
907 }
908
910
914 std::array<node_type, 2> expand(node_type && v) const
915 {
916 node_type v_left;
917 size_type offset_rank = m_tree_rank(v.offset);
918 size_type ones = m_tree_rank(v.offset + v.size) - offset_rank;
919
920 v_left.offset = v.offset + m_size;
921 v_left.size = v.size - ones;
922 v_left.level = v.level + 1;
923 v_left.sym = v.sym << 1;
924
925 v.offset = v.offset + m_size + v_left.size;
926 v.size = ones;
927 v.level = v.level + 1;
928 v.sym = (v.sym << 1) | 1;
929
930 return { { std::move(v_left), v } };
931 }
932
934
943 std::array<range_vec_type, 2> expand(const node_type & v, const range_vec_type & ranges) const
944 {
945 auto ranges_copy = ranges;
946 return expand(v, std::move(ranges_copy));
947 }
948
950
959 std::array<range_vec_type, 2> expand(const node_type & v, range_vec_type && ranges) const
960 {
961 auto v_sp_rank = m_tree_rank(v.offset); // this is already calculated in expand(v)
962 range_vec_type res(ranges.size());
963 size_t i = 0;
964 for (auto & r : ranges)
965 {
966 auto sp_rank = m_tree_rank(v.offset + r[0]);
967 auto right_size = m_tree_rank(v.offset + r[1] + 1) - sp_rank;
968 auto left_size = (r[1] - r[0] + 1) - right_size;
969
970 auto right_sp = sp_rank - v_sp_rank;
971 auto left_sp = r[0] - right_sp;
972
973 r = { { left_sp, left_sp + left_size - 1 } };
974 res[i++] = { { right_sp, right_sp + right_size - 1 } };
975 }
976 return { { ranges, std::move(res) } };
977 }
978
980
989 std::array<range_type, 2> expand(const node_type & v, const range_type & r) const
990 {
991 auto v_sp_rank = m_tree_rank(v.offset); // this is already calculated in expand(v)
992 auto sp_rank = m_tree_rank(v.offset + r[0]);
993 auto right_size = m_tree_rank(v.offset + r[1] + 1) - sp_rank;
994 auto left_size = (r[1] - r[0] + 1) - right_size;
995
996 auto right_sp = sp_rank - v_sp_rank;
997 auto left_sp = r[0] - right_sp;
998
999 return { { { { left_sp, left_sp + left_size - 1 } }, { { right_sp, right_sp + right_size - 1 } } } };
1000 }
1001
1003 std::pair<uint64_t, uint64_t> path(value_type c) const { return { m_max_level, c }; }
1004
1005 private:
1007 auto begin(const node_type & v) const -> decltype(m_tree.begin() + v.offset) { return m_tree.begin() + v.offset; }
1008
1010 auto end(const node_type & v) const -> decltype(m_tree.begin() + v.offset + v.size)
1011 {
1012 return m_tree.begin() + v.offset + v.size;
1013 }
1014};
1015
1016} // end namespace sdsl
1017#endif
#define CEREAL_NVP(X)
Definition: cereal.hpp:31
void close(bool remove_file=false)
Close the int_vector_buffer.
A generic vector class for integers of width .
Definition: int_vector.hpp:216
static uint64_t write_header(uint64_t size, uint8_t int_width, std::ostream &out)
Write the size and int_width of a int_vector.
Definition: int_vector.hpp:806
iterator begin() noexcept
Iterator that points to the first element of the int_vector.
Definition: int_vector.hpp:759
void close()
Close the stream.
Definition: sfstream.hpp:87
Generic iterator for a random access container.
Definition: iterators.hpp:24
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
static void add_size(structure_tree_node *v, uint64_t value)
A wavelet tree class for integer sequences.
Definition: wt_int.hpp:49
value_type operator[](size_type i) const
Recovers the i-th symbol of the original vector.
Definition: wt_int.hpp:315
wt_tag index_category
Definition: wt_int.hpp:60
bool operator!=(wt_int const &other) const noexcept
Inequality operator.
Definition: wt_int.hpp:818
bool operator==(wt_int const &other) const noexcept
Equality operator.
Definition: wt_int.hpp:810
std::array< range_type, 2 > expand(const node_type &v, const range_type &r) const
Returns for a range its left and right child ranges.
Definition: wt_int.hpp:989
std::array< range_vec_type, 2 > expand(const node_type &v, const range_vec_type &ranges) const
Returns for each range its left and right child ranges.
Definition: wt_int.hpp:943
select_0_type m_tree_select0
Definition: wt_int.hpp:77
std::array< node_type, 2 > expand(node_type &&v) const
Returns the two child nodes of an inner node.
Definition: wt_int.hpp:914
auto bit_vec(const node_type &v) const -> node_bv_container< t_bitvector >
Random access container to bitvector of node v.
Definition: wt_int.hpp:865
node_type root() const
Return the root node.
Definition: wt_int.hpp:896
wt_int & operator=(wt_int &&wt)
Assignment move operator.
Definition: wt_int.hpp:285
const bit_vector_type & tree
A concatenation of all bit vectors of the wavelet tree.
Definition: wt_int.hpp:140
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Definition: wt_int.hpp:783
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
Definition: wt_int.hpp:755
size_type size() const
Returns the size of the original vector.
Definition: wt_int.hpp:304
std::pair< size_type, point_vec_type > r2d_res_type
Definition: wt_int.hpp:69
const size_type & sigma
Effective alphabet size of the wavelet tree.
Definition: wt_int.hpp:139
t_select_zero select_0_type
Definition: wt_int.hpp:59
uint32_t m_max_level
Definition: wt_int.hpp:78
std::pair< value_type, size_type > point_type
Definition: wt_int.hpp:67
wt_int()=default
Default constructor.
value_type sym(const node_type &v) const
Returns the symbol of leaf node v.
Definition: wt_int.hpp:862
size_type m_size
Definition: wt_int.hpp:72
int_vector ::value_type value_type
Definition: wt_int.hpp:52
wt_int & operator=(const wt_int &wt)
Assignment operator.
Definition: wt_int.hpp:274
const uint32_t & max_level
Maximal level of the wavelet tree.
Definition: wt_int.hpp:141
const_iterator iterator
Definition: wt_int.hpp:55
std::pair< size_type, std::vector< std::pair< value_type, size_type > > > range_search_2d(size_type lb, size_type rb, value_type vlb, value_type vrb, bool report=true) const
range_search_2d searches points in the index interval [lb..rb] and value interval [vlb....
Definition: wt_int.hpp:635
t_rank rank_1_type
Definition: wt_int.hpp:57
t_bitvector bit_vector_type
Definition: wt_int.hpp:56
rank_1_type m_tree_rank
Definition: wt_int.hpp:75
size_type m_sigma
Definition: wt_int.hpp:73
void interval_symbols(size_type i, size_type j, size_type &k, std::vector< value_type > &cs, std::vector< size_type > &rank_c_i, std::vector< size_type > &rank_c_j) const
For each symbol c in wt[i..j-1] get rank(i,c) and rank(j,c).
Definition: wt_int.hpp:505
wt_int(const wt_int &wt)
Copy constructor.
Definition: wt_int.hpp:244
random_access_const_iterator< wt_int > const_iterator
Definition: wt_int.hpp:54
wt_int(t_it begin, t_it end, std::string tmp_dir=ram_file_name(""))
Construct the wavelet tree from a sequence defined by two interators.
Definition: wt_int.hpp:156
int_alphabet_tag alphabet_category
Definition: wt_int.hpp:61
wt_int(wt_int &&wt)
Move constructor.
Definition: wt_int.hpp:259
auto seq(const node_type &v) const -> random_access_container< std::function< value_type(size_type)> >
Random access container to sequence of node v.
Definition: wt_int.hpp:871
std::vector< point_type > point_vec_type
Definition: wt_int.hpp:68
size_type select(size_type i, value_type c) const
Calculates the i-th occurrence of the symbol c in the supported vector.
Definition: wt_int.hpp:431
int_vector ::size_type size_type
Definition: wt_int.hpp:51
std::array< range_vec_type, 2 > expand(const node_type &v, range_vec_type &&ranges) const
Returns for each range its left and right child ranges.
Definition: wt_int.hpp:959
t_select select_1_type
Definition: wt_int.hpp:58
bool empty(const node_type &v) const
Indicates if node v is empty.
Definition: wt_int.hpp:890
void _range_search_2d(size_type lb, size_type rb, value_type vlb, value_type vrb, size_type level, size_type ilb, size_type node_size, std::vector< size_type > &offsets, std::vector< size_type > &ones_before_os, size_type path, point_vec_type &point_vec, bool report, size_type &cnt_answers) const
Definition: wt_int.hpp:652
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Definition: wt_int.hpp:795
size_type rank(size_type i, value_type c) const
Calculates how many symbols c are in the prefix [0..i-1] of the supported vector.
Definition: wt_int.hpp:354
const_iterator end() const
Returns a const_iterator to the element after the last element.
Definition: wt_int.hpp:752
std::pair< uint64_t, uint64_t > path(value_type c) const
return the path to the leaf for a given symbol
Definition: wt_int.hpp:1003
t_bitvector::difference_type difference_type
Definition: wt_int.hpp:53
const_iterator begin() const
Returns a const_iterator to the first element.
Definition: wt_int.hpp:749
bit_vector_type m_tree
Definition: wt_int.hpp:74
std::array< node_type, 2 > expand(const node_type &v) const
Returns the two child nodes of an inner node.
Definition: wt_int.hpp:903
std::pair< size_type, value_type > inverse_select(size_type i) const
Calculates how many occurrences of symbol wt[i] are in the prefix [0..i-1] of the original sequence.
Definition: wt_int.hpp:393
bool is_leaf(const node_type &v) const
Checks if the node is a leaf node.
Definition: wt_int.hpp:859
void load(std::istream &in)
Loads the data structure from the given istream.
Definition: wt_int.hpp:771
auto size(const node_type &v) const -> decltype(v.size)
Return the size of node v.
Definition: wt_int.hpp:893
t_ret_type lex_count(size_type i, size_type j, value_type c) const
How many symbols are lexicographic smaller/greater than c in [i..j-1].
Definition: wt_int.hpp:542
t_ret_type lex_smaller_count(size_type i, value_type c) const
How many symbols are lexicographic smaller than c in [0..i-1].
Definition: wt_int.hpp:592
select_1_type m_tree_select1
Definition: wt_int.hpp:76
bool empty() const
Returns whether the wavelet tree contains no data.
Definition: wt_int.hpp:307
int_vector.hpp contains the sdsl::int_vector class.
std::string to_string(const T &t, int w=1)
Namespace for the succinct data structure library.
std::string ram_file_name(const std::string &file)
Returns the corresponding RAM-file name for file.
Definition: ram_fs.hpp:174
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition: io.hpp:84
void read_member(T &t, std::istream &in)
Definition: io.hpp:111
std::string tmp_file(const cache_config &config, std::string name_part="")
Returns a name for a temporary file. I.e. the name was not used before.
Definition: io.hpp:698
int_vector< 1 > bit_vector
bit_vector is a specialization of the int_vector.
Definition: int_vector.hpp:53
std::array< int_vector<>::size_type, 2 > range_type
Definition: wt_helper.hpp:20
bool load_from_file(T &v, const std::string &file)
Load sdsl-object v from a file.
Definition: io.hpp:901
std::vector< range_type > range_vec_type
Definition: wt_helper.hpp:21
int remove(const std::string &)
Remove a file.
Definition: ram_fs.hpp:194
rank_support_v.hpp contains rank_support_v.
Contains declarations and definitions of data structure concepts.
select_support_mcl.hpp contains classes that support a sdsl::bit_vector with constant time select inf...
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition: bits.hpp:651
Represents a node in the wavelet tree.
Definition: wt_int.hpp:822
bool operator>(const node_type &v) const
Definition: wt_int.hpp:855
node_type(size_type o=0, size_type sz=0, size_type l=0, value_type sy=0)
Definition: wt_int.hpp:829
node_type(node_type &&)=default
node_type & operator=(node_type &&)=default
node_type & operator=(const node_type &)=default
node_type(const node_type &)=default
bool operator==(const node_type &v) const
Definition: wt_int.hpp:849
bool operator<(const node_type &v) const
Definition: wt_int.hpp:852
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.