SDSL 3.0.1
Succinct Data Structure Library
wm_int.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
9#ifndef INCLUDED_SDSL_WM_INT
10#define INCLUDED_SDSL_WM_INT
11
12#include <algorithm> // for std::swap
13#include <map> // for mapping a symbol to its lexicographical index
14#include <queue>
15#include <set> // for calculating the alphabet size
16#include <stdexcept>
17#include <utility>
18#include <vector>
19
20#include <sdsl/int_vector.hpp>
24#include <sdsl/util.hpp>
25#include <sdsl/wt_helper.hpp>
26
28namespace sdsl
29{
30
32
47template <class t_bitvector = bit_vector,
48 class t_rank = typename t_bitvector::rank_1_type,
49 class t_select = typename t_bitvector::select_1_type,
50 class t_select_zero = typename t_bitvector::select_0_type>
51class wm_int
52{
53 public:
56 typedef typename t_bitvector::difference_type difference_type;
59 typedef t_bitvector bit_vector_type;
60 typedef t_rank rank_1_type;
61 typedef t_select select_1_type;
62 typedef t_select_zero select_0_type;
65 enum
66 {
67 lex_ordered = 0
68 };
69
70 typedef std::pair<value_type, size_type> point_type;
71 typedef std::vector<point_type> point_vec_type;
72 typedef std::pair<size_type, point_vec_type> r2d_res_type;
73
74 struct node_type;
75
76 protected:
78 size_type m_sigma = 0; //<- \f$ |\Sigma| \f$
79 bit_vector_type m_tree; // bit vector to store the wavelet tree
80 rank_1_type m_tree_rank; // rank support for the wavelet tree bit vector
81 select_1_type m_tree_select1; // select support for the wavelet tree bit vector
83 uint32_t m_max_level = 0;
84 int_vector<64> m_zero_cnt; // m_zero_cnt[i] contains the number of zeros in level i
85 int_vector<64> m_rank_level; // m_rank_level[i] contains m_tree_rank(i*size())
86
87 public:
90 const uint32_t & max_level = m_max_level;
91
93 wm_int() = default;
94
96
104 template <typename t_it>
105 wm_int(t_it begin, t_it end, std::string tmp_dir = ram_file_name(""))
106 : m_size(std::distance(begin, end))
107 {
108 if (0 == m_size) return;
109 m_sigma = 0;
110
111 value_type max_elem = 1; // variable for the biggest value in rac
112 for (auto it = begin; it != end; ++it)
113 {
114 value_type value = *it;
115 if (value > max_elem) max_elem = value;
116 }
117 m_max_level = bits::hi(max_elem) + 1;
118 std::string tree_out_buf_file_name = tmp_file(tmp_dir, "_m_tree");
119 {
121 std::copy(begin, end, rac.begin());
122
123 // buffer for elements in the right node
124 std::string zero_buf_file_name = tmp_file(tmp_dir, "_zero_buf");
125 osfstream tree_out_buf(tree_out_buf_file_name,
126 std::ios::binary | std::ios::trunc | std::ios::out); // open buffer for tree
127 size_type bit_size = m_size * m_max_level;
128 int_vector<1>::write_header(bit_size, 1, tree_out_buf); // write bv header
129
130 size_type tree_pos = 0;
131 uint64_t tree_word = 0;
132
133 m_zero_cnt = int_vector<64>(m_max_level, 0); // zeros at level i
134
135 for (uint32_t k = 0; k < m_max_level; ++k)
136 {
137 uint8_t width = m_max_level - k - 1;
138 const uint64_t mask = 1ULL << width;
139 uint64_t x = 0;
140 size_type zeros = 0;
141 int_vector_buffer<> zero_buf(zero_buf_file_name, std::ios::out, 1024 * 1024, m_max_level);
142 for (size_t i = 0; i < m_size; ++i)
143 {
144 x = rac[i];
145 if (x & mask)
146 {
147 tree_word |= (1ULL << (tree_pos & 0x3FULL));
148 zero_buf.push_back(x);
149 }
150 else
151 {
152 rac[zeros++] = x;
153 }
154 ++tree_pos;
155 if ((tree_pos & 0x3FULL) == 0)
156 { // if tree_pos % 64 == 0 write old word
157 tree_out_buf.write((char *)&tree_word, sizeof(tree_word));
158 tree_word = 0;
159 }
160 }
161 m_zero_cnt[k] = zeros;
162 for (size_t i = zeros; i < m_size; ++i) { rac[i] = zero_buf[i - zeros]; }
163 }
164 if ((tree_pos & 0x3FULL) != 0)
165 { // if tree_pos % 64 > 0 => there are remaining entries we have to write
166 tree_out_buf.write((char *)&tree_word, sizeof(tree_word));
167 }
168 sdsl::remove(zero_buf_file_name);
169 tree_out_buf.close();
170 m_sigma = std::unique(rac.begin(), rac.end()) - rac.begin();
171 }
173 load_from_file(tree, tree_out_buf_file_name);
174 sdsl::remove(tree_out_buf_file_name);
175 m_tree = bit_vector_type(std::move(tree));
176 util::init_support(m_tree_rank, &m_tree);
177 util::init_support(m_tree_select0, &m_tree);
178 util::init_support(m_tree_select1, &m_tree);
180 for (uint32_t k = 0; k < m_rank_level.size(); ++k) { m_rank_level[k] = m_tree_rank(k * m_size); }
181 }
182
184 wm_int(const wm_int & wt)
185 : m_size(wt.m_size)
186 , m_sigma(wt.m_sigma)
187 , m_tree(wt.m_tree)
194 {
195 m_tree_rank.set_vector(&m_tree);
196 m_tree_select1.set_vector(&m_tree);
197 m_tree_select0.set_vector(&m_tree);
198 }
199
202 : m_size(wt.m_size)
203 , m_sigma(wt.m_sigma)
204 , m_tree(std::move(wt.m_tree))
205 , m_tree_rank(std::move(wt.m_tree_rank))
206 , m_tree_select1(std::move(wt.m_tree_select1))
207 , m_tree_select0(std::move(wt.m_tree_select0))
211 {
212 m_tree_rank.set_vector(&m_tree);
213 m_tree_select1.set_vector(&m_tree);
214 m_tree_select0.set_vector(&m_tree);
215 }
216
218 wm_int & operator=(const wm_int & wt)
219 {
220 if (this != &wt)
221 {
222 m_size = wt.m_size;
223 m_sigma = wt.m_sigma;
224 m_tree = wt.m_tree;
226 m_tree_rank.set_vector(&m_tree);
228 m_tree_select1.set_vector(&m_tree);
230 m_tree_select0.set_vector(&m_tree);
234 }
235 return *this;
236 }
237
240 {
241 if (this != &wt)
242 {
243 m_size = wt.m_size;
244 m_sigma = wt.m_sigma;
245 m_tree = std::move(wt.m_tree);
246 m_tree_rank = std::move(wt.m_tree_rank);
247 m_tree_rank.set_vector(&m_tree);
248 m_tree_select1 = std::move(wt.m_tree_select1);
249 m_tree_select1.set_vector(&m_tree);
250 m_tree_select0 = std::move(wt.m_tree_select0);
251 m_tree_select0.set_vector(&m_tree);
252 m_max_level = std::move(wt.m_max_level);
253 m_zero_cnt = std::move(wt.m_zero_cnt);
254 m_rank_level = std::move(wt.m_rank_level);
255 }
256 return *this;
257 }
258
260 size_type size() const { return m_size; }
261
263 bool empty() const { return m_size == 0; }
264
266
272 {
273 assert(i < size());
274 value_type res = 0;
275 for (uint32_t k = 0; k < m_max_level; ++k)
276 {
277 res <<= 1;
278 size_type rank_ones = m_tree_rank(i) - m_rank_level[k];
279 if (m_tree[i])
280 { // one at position i => follow right child
281 i = (k + 1) * m_size + m_zero_cnt[k] + rank_ones;
282 res |= 1;
283 }
284 else
285 { // zero at position i => follow left child
286 auto rank_zeros = (i - k * m_size) - rank_ones;
287 i = (k + 1) * m_size + rank_zeros;
288 }
289 }
290 return res;
291 };
292
294
304 {
305 assert(i <= size());
306 if (((1ULL) << (m_max_level)) <= c)
307 { // c is greater than any symbol in wt
308 return 0;
309 }
310 size_type b = 0; // start position of the interval
311 uint64_t mask = (1ULL) << (m_max_level - 1);
312 for (uint32_t k = 0; k < m_max_level and i; ++k)
313 {
314 size_type rank_b = m_tree_rank(b);
315 size_type ones = m_tree_rank(b + i) - rank_b; // ones in [b..i)
316 size_type ones_p = rank_b - m_rank_level[k]; // ones in [level_b..b)
317 if (c & mask)
318 { // search for a one at this level
319 i = ones;
320 b = (k + 1) * m_size + m_zero_cnt[k] + ones_p;
321 }
322 else
323 { // search for a zero at this level
324 i = i - ones;
325 b = (k + 1) * m_size + (b - k * m_size - ones_p);
326 }
327 mask >>= 1;
328 }
329 return i;
330 };
331
333
339 std::pair<size_type, value_type> inverse_select(size_type i) const
340 {
341 assert(i < size());
342 value_type c = 0;
343 size_type b = 0; // start position of the interval
344 for (uint32_t k = 0; k < m_max_level; ++k)
345 {
346 size_type rank_b = m_tree_rank(b);
347 size_type ones = m_tree_rank(b + i) - rank_b; // ones in [b..i)
348 size_type ones_p = rank_b - m_rank_level[k]; // ones in [level_b..b)
349 c <<= 1;
350 if (m_tree[b + i])
351 { // go to the right child
352 i = ones;
353 b = (k + 1) * m_size + m_zero_cnt[k] + ones_p;
354 c |= 1;
355 }
356 else
357 { // go to the left child
358 i = i - ones;
359 b = (k + 1) * m_size + (b - k * m_size - ones_p);
360 }
361 }
362 return std::make_pair(i, c);
363 }
364
366
375 {
376 assert(1 <= i and i <= rank(size(), c));
377 uint64_t mask = 1ULL << (m_max_level - 1);
378 int_vector<64> m_path_off(max_level + 1);
379 int_vector<64> m_path_rank_off(max_level + 1);
380 m_path_off[0] = m_path_rank_off[0] = 0;
381 size_type b = 0; // start position of the interval
382 size_type r = i;
383 for (uint32_t k = 0; k < m_max_level and i; ++k)
384 {
385 size_type rank_b = m_tree_rank(b);
386 size_type ones = m_tree_rank(b + r) - rank_b; // ones in [b..i)
387 size_type ones_p = rank_b - m_rank_level[k]; // ones in [0..b)
388 if (c & mask)
389 { // search for a one at this level
390 r = ones;
391 b = (k + 1) * m_size + m_zero_cnt[k] + ones_p;
392 }
393 else
394 { // search for a zero at this level
395 r = r - ones;
396 b = (k + 1) * m_size + (b - k * m_size - ones_p);
397 }
398 mask >>= 1;
399 m_path_off[k + 1] = b;
400 m_path_rank_off[k] = rank_b;
401 }
402 mask = 1ULL;
403 for (uint32_t k = m_max_level; k > 0; --k)
404 {
405 b = m_path_off[k - 1];
406 size_type rank_b = m_path_rank_off[k - 1];
407 if (c & mask)
408 { // right child => search i'th one
409 i = m_tree_select1(rank_b + i) - b + 1;
410 }
411 else
412 { // left child => search i'th zero
413 i = m_tree_select0(b - rank_b + i) - b + 1;
414 }
415 mask <<= 1;
416 }
417 return i - 1;
418 };
419
421
429 std::pair<size_type, std::vector<std::pair<value_type, size_type>>> range_search_2d(size_type lb,
430 size_type rb,
431 value_type vlb,
432 value_type vrb,
433 bool report = true) const
434 {
435
436 if (vrb > (1ULL << m_max_level)) vrb = (1ULL << m_max_level);
437 if (vlb > vrb) return make_pair(0, point_vec_type());
438 size_type cnt_answers = 0;
439 point_vec_type point_vec;
440 if (lb <= rb)
441 {
442 std::vector<size_type> is(m_max_level + 1);
443 std::vector<size_type> rank_off(m_max_level + 1);
444 _range_search_2d(root(), { { lb, rb } }, vlb, vrb, 0, is, rank_off, point_vec, report, cnt_answers);
445 }
446 return make_pair(cnt_answers, point_vec);
447 }
448
450 range_type r,
451 value_type vlb,
452 value_type vrb,
453 size_type ilb,
454 std::vector<size_type> & is,
455 std::vector<size_type> & rank_off,
456 point_vec_type & point_vec,
457 bool report,
458 size_type & cnt_answers) const
459 {
460 using std::get;
461 if (get<0>(r) > get<1>(r)) return;
462 is[v.level] = v.offset + get<0>(r);
463
464 if (v.level == m_max_level)
465 {
466 for (size_type j = 1; j <= sdsl::size(r) and report; ++j)
467 {
468 size_type i = j;
469 size_type c = v.sym;
470 for (uint32_t k = m_max_level; k > 0; --k)
471 {
472 size_type offset = is[k - 1];
473 size_type rank_offset = rank_off[k - 1];
474 if (c & 1) { i = m_tree_select1(rank_offset + i) - offset + 1; }
475 else
476 {
477 i = m_tree_select0(offset - rank_offset + i) - offset + 1;
478 }
479 c >>= 1;
480 }
481 point_vec.emplace_back(is[0] + i - 1, v.sym);
482 }
483 cnt_answers += sdsl::size(r);
484 return;
485 }
486 else
487 {
488 rank_off[v.level] = m_tree_rank(is[v.level]);
489 }
490 size_type irb = ilb + (1ULL << (m_max_level - v.level));
491 size_type mid = (irb + ilb) >> 1;
492
493 auto c_v = expand(v);
494 auto c_r = expand(v, r);
495
496 if (!sdsl::empty(get<0>(c_r)) and vlb < mid and mid)
497 {
498 _range_search_2d(get<0>(c_v),
499 get<0>(c_r),
500 vlb,
501 std::min(vrb, mid - 1),
502 ilb,
503 is,
504 rank_off,
505 point_vec,
506 report,
507 cnt_answers);
508 }
509 if (!sdsl::empty(get<1>(c_r)) and vrb >= mid)
510 {
511 _range_search_2d(get<1>(c_v),
512 get<1>(c_r),
513 std::max(mid, vlb),
514 vrb,
515 mid,
516 is,
517 rank_off,
518 point_vec,
519 report,
520 cnt_answers);
521 }
522 }
523
525 const_iterator begin() const { return const_iterator(this, 0); }
526
528 const_iterator end() const { return const_iterator(this, size()); }
529
531 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
532 {
533 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
534 size_type written_bytes = 0;
535 written_bytes += write_member(m_size, out, child, "size");
536 written_bytes += write_member(m_sigma, out, child, "sigma");
537 written_bytes += m_tree.serialize(out, child, "tree");
538 written_bytes += m_tree_rank.serialize(out, child, "tree_rank");
539 written_bytes += m_tree_select1.serialize(out, child, "tree_select_1");
540 written_bytes += m_tree_select0.serialize(out, child, "tree_select_0");
541 written_bytes += write_member(m_max_level, out, child, "max_level");
542 written_bytes += m_zero_cnt.serialize(out, child, "zero_cnt");
543 written_bytes += m_rank_level.serialize(out, child, "rank_level");
544 structure_tree::add_size(child, written_bytes);
545 return written_bytes;
546 }
547
549 void load(std::istream & in)
550 {
551 read_member(m_size, in);
552 read_member(m_sigma, in);
553 m_tree.load(in);
554 m_tree_rank.load(in, &m_tree);
555 m_tree_select1.load(in, &m_tree);
556 m_tree_select0.load(in, &m_tree);
558 m_zero_cnt.load(in);
559 m_rank_level.load(in);
560 }
561
563 template <typename archive_t>
564 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
565 {
566 ar(CEREAL_NVP(m_size));
567 ar(CEREAL_NVP(m_sigma));
569 ar(CEREAL_NVP(m_tree));
575 }
576
578 template <typename archive_t>
579 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
580 {
581 ar(CEREAL_NVP(m_size));
582 ar(CEREAL_NVP(m_sigma));
584 ar(CEREAL_NVP(m_tree));
586 m_tree_rank.set_vector(&m_tree);
588 m_tree_select1.set_vector(&m_tree);
590 m_tree_select0.set_vector(&m_tree);
593 }
594
596 bool operator==(wm_int const & other) const noexcept
597 {
598 return (m_size == other.m_size) && (m_sigma == other.m_sigma) && (m_max_level == other.m_max_level) &&
599 (m_tree == other.m_tree) && (m_tree_rank == other.m_tree_rank) &&
600 (m_tree_select1 == other.m_tree_select1) && (m_tree_select0 == other.m_tree_select0) &&
601 (m_zero_cnt == other.m_zero_cnt) && (m_rank_level == other.m_rank_level);
602 }
603
605 bool operator!=(wm_int const & other) const noexcept { return !(*this == other); }
606
609 {
614
615 // Default constructor
616 node_type(size_type o = 0, size_type sz = 0, size_type l = 0, value_type sy = 0)
617 : offset(o)
618 , size(sz)
619 , level(l)
620 , sym(sy)
621 {}
622
623 // Copy constructor
624 node_type(const node_type &) = default;
625
626 // Move copy constructor
627 node_type(node_type &&) = default;
628
629 // Assignment operator
630 node_type & operator=(const node_type &) = default;
631
632 // Move assignment operator
634
635 // Comparator operator
636 bool operator==(const node_type & v) const { return offset == v.offset; }
637
638 // Smaller operator
639 bool operator<(const node_type & v) const { return offset < v.offset; }
640
641 // Greater operator
642 bool operator>(const node_type & v) const { return offset > v.offset; }
643 };
644
646 bool is_leaf(const node_type & v) const { return v.level == m_max_level; }
647
649 value_type sym(const node_type & v) const { return v.sym; }
650
653 {
655 }
656
658 auto seq(const node_type & v) const -> random_access_container<std::function<value_type(size_type)>>
659 {
660 return random_access_container<std::function<value_type(size_type)>>(
661 [&v, this](size_type i) {
662 node_type vv = v;
663 while (!is_leaf(vv))
664 {
665 auto vs = expand(vv);
666 auto rs = expand(vv, { 0, i });
667 bool bit = *(begin(vv) + i);
668 i = std::get<1>(rs[bit]);
669 vv = vs[bit];
670 }
671 return sym(vv);
672 },
673 size(v));
674 }
675
677 bool empty(const node_type & v) const { return v.size == (size_type)0; }
678
680 auto size(const node_type & v) const -> decltype(v.size) { return v.size; }
681
683 node_type root() const { return node_type(0, m_size, 0, 0); }
684
686
690 std::array<node_type, 2> expand(const node_type & v) const
691 {
692 node_type v_right = v;
693 return expand(std::move(v_right));
694 }
695
697
701 std::array<node_type, 2> expand(node_type && v) const
702 {
703 node_type v_left;
704 size_type rank_b = m_tree_rank(v.offset);
705 size_type ones = m_tree_rank(v.offset + v.size) - rank_b; // ones in [b..size)
706 size_type ones_p = rank_b - m_rank_level[v.level]; // ones in [level_b..b)
707
708 v_left.offset = (v.level + 1) * m_size + (v.offset - v.level * m_size) - ones_p;
709 v_left.size = v.size - ones;
710 v_left.level = v.level + 1;
711 v_left.sym = v.sym << 1;
712
713 v.offset = (v.level + 1) * m_size + m_zero_cnt[v.level] + ones_p;
714 v.size = ones;
715 v.level = v.level + 1;
716 v.sym = (v.sym << 1) | 1;
717
718 return { { std::move(v_left), v } };
719 }
720
722
731 std::array<range_vec_type, 2> expand(const node_type & v, const range_vec_type & ranges) const
732 {
733 auto ranges_copy = ranges;
734 return expand(v, std::move(ranges_copy));
735 }
736
738
747 std::array<range_vec_type, 2> expand(const node_type & v, range_vec_type && ranges) const
748 {
749 auto v_sp_rank = m_tree_rank(v.offset); // this is already calculated in expand(v)
750 range_vec_type res(ranges.size());
751 size_t i = 0;
752 for (auto & r : ranges)
753 {
754 auto sp_rank = m_tree_rank(v.offset + r[0]);
755 auto right_size = m_tree_rank(v.offset + r[1] + 1) - sp_rank;
756 auto left_size = (r[1] - r[0] + 1) - right_size;
757
758 auto right_sp = sp_rank - v_sp_rank;
759 auto left_sp = r[0] - right_sp;
760
761 r = { { left_sp, left_sp + left_size - 1 } };
762 res[i++] = { { right_sp, right_sp + right_size - 1 } };
763 }
764 return { { ranges, std::move(res) } };
765 }
766
768
777 std::array<range_type, 2> expand(const node_type & v, const range_type & r) const
778 {
779 auto v_sp_rank = m_tree_rank(v.offset); // this is already calculated in expand(v)
780 auto sp_rank = m_tree_rank(v.offset + r[0]);
781 auto right_size = m_tree_rank(v.offset + r[1] + 1) - sp_rank;
782 auto left_size = (r[1] - r[0] + 1) - right_size;
783
784 auto right_sp = sp_rank - v_sp_rank;
785 auto left_sp = r[0] - right_sp;
786
787 return { { { { left_sp, left_sp + left_size - 1 } }, { { right_sp, right_sp + right_size - 1 } } } };
788 }
789
791 std::pair<uint64_t, uint64_t> path(value_type c) const { return { m_max_level, c }; }
792
793 private:
795 auto begin(const node_type & v) const -> decltype(m_tree.begin() + v.offset) { return m_tree.begin() + v.offset; }
796
798 auto end(const node_type & v) const -> decltype(m_tree.begin() + v.offset + v.size)
799 {
800 return m_tree.begin() + v.offset + v.size;
801 }
802};
803
804} // end namespace sdsl
805#endif
#define CEREAL_NVP(X)
Definition: cereal.hpp:31
void push_back(const uint64_t value)
Appends the given element value to the end of the int_vector_buffer.
A generic vector class for integers of width .
Definition: int_vector.hpp:216
iterator end() noexcept
Iterator that points to the element after the last element of int_vector.
Definition: int_vector.hpp:764
void load(std::istream &in)
Load the int_vector for a stream.
size_type size() const noexcept
The number of elements in the int_vector.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
static uint64_t write_header(uint64_t size, uint8_t int_width, std::ostream &out)
Write the size and int_width of a int_vector.
Definition: int_vector.hpp:806
iterator begin() noexcept
Iterator that points to the first element of the int_vector.
Definition: int_vector.hpp:759
void close()
Close the stream.
Definition: sfstream.hpp:87
Generic iterator for a random access container.
Definition: iterators.hpp:24
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
static void add_size(structure_tree_node *v, uint64_t value)
A wavelet tree class for integer sequences.
Definition: wm_int.hpp:52
uint32_t m_max_level
Definition: wm_int.hpp:83
const_iterator end() const
Returns a const_iterator to the element after the last element.
Definition: wm_int.hpp:528
t_bitvector::difference_type difference_type
Definition: wm_int.hpp:56
std::array< node_type, 2 > expand(const node_type &v) const
Returns the two child nodes of an inner node.
Definition: wm_int.hpp:690
wm_int(const wm_int &wt)
Copy constructor.
Definition: wm_int.hpp:184
std::pair< size_type, point_vec_type > r2d_res_type
Definition: wm_int.hpp:72
const_iterator iterator
Definition: wm_int.hpp:58
void load(std::istream &in)
Loads the data structure from the given istream.
Definition: wm_int.hpp:549
value_type operator[](size_type i) const
Recovers the i-th symbol of the original vector.
Definition: wm_int.hpp:271
void _range_search_2d(node_type v, range_type r, value_type vlb, value_type vrb, size_type ilb, std::vector< size_type > &is, std::vector< size_type > &rank_off, point_vec_type &point_vec, bool report, size_type &cnt_answers) const
Definition: wm_int.hpp:449
int_alphabet_tag alphabet_category
Definition: wm_int.hpp:64
std::array< range_vec_type, 2 > expand(const node_type &v, range_vec_type &&ranges) const
Returns for each range its left and right child ranges.
Definition: wm_int.hpp:747
const size_type & sigma
Effective alphabet size of the wavelet tree.
Definition: wm_int.hpp:88
t_rank rank_1_type
Definition: wm_int.hpp:60
t_bitvector bit_vector_type
Definition: wm_int.hpp:59
value_type sym(const node_type &v) const
Symbol of leaf node v.
Definition: wm_int.hpp:649
std::pair< uint64_t, uint64_t > path(value_type c) const
return the path to the leaf for a given symbol
Definition: wm_int.hpp:791
std::pair< size_type, value_type > inverse_select(size_type i) const
Calculates how many occurrences of symbol wt[i] are in the prefix [0..i-1] of the original sequence.
Definition: wm_int.hpp:339
std::array< node_type, 2 > expand(node_type &&v) const
Returns the two child nodes of an inner node.
Definition: wm_int.hpp:701
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Load via cereal.
Definition: wm_int.hpp:579
rank_1_type m_tree_rank
Definition: wm_int.hpp:80
size_type m_size
Definition: wm_int.hpp:77
wt_tag index_category
Definition: wm_int.hpp:63
std::pair< size_type, std::vector< std::pair< value_type, size_type > > > range_search_2d(size_type lb, size_type rb, value_type vlb, value_type vrb, bool report=true) const
range_search_2d searches points in the index interval [lb..rb] and value interval [vlb....
Definition: wm_int.hpp:429
size_type size() const
Returns the size of the original vector.
Definition: wm_int.hpp:260
int_vector< 64 > m_rank_level
Definition: wm_int.hpp:85
bool operator==(wm_int const &other) const noexcept
Equality operator.
Definition: wm_int.hpp:596
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Serialise (save) via cereal.
Definition: wm_int.hpp:564
const_iterator begin() const
Returns a const_iterator to the first element.
Definition: wm_int.hpp:525
bool empty() const
Returns whether the wavelet tree contains no data.
Definition: wm_int.hpp:263
wm_int()=default
Default constructor.
int_vector< 64 > m_zero_cnt
Definition: wm_int.hpp:84
size_type m_sigma
Definition: wm_int.hpp:78
int_vector ::value_type value_type
Definition: wm_int.hpp:55
auto seq(const node_type &v) const -> random_access_container< std::function< value_type(size_type)> >
Random access container to sequence of node v.
Definition: wm_int.hpp:658
auto bit_vec(const node_type &v) const -> node_bv_container< t_bitvector >
Random access container to bitvector of node v.
Definition: wm_int.hpp:652
wm_int & operator=(const wm_int &wt)
Assignment operator.
Definition: wm_int.hpp:218
std::vector< point_type > point_vec_type
Definition: wm_int.hpp:71
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
Definition: wm_int.hpp:531
std::pair< value_type, size_type > point_type
Definition: wm_int.hpp:70
std::array< range_vec_type, 2 > expand(const node_type &v, const range_vec_type &ranges) const
Returns for each range its left and right child ranges.
Definition: wm_int.hpp:731
random_access_const_iterator< wm_int > const_iterator
Definition: wm_int.hpp:57
bool operator!=(wm_int const &other) const noexcept
Inequality operator.
Definition: wm_int.hpp:605
t_select_zero select_0_type
Definition: wm_int.hpp:62
node_type root() const
Return the root node.
Definition: wm_int.hpp:683
t_select select_1_type
Definition: wm_int.hpp:61
size_type rank(size_type i, value_type c) const
Calculates how many symbols c are in the prefix [0..i-1] of the supported vector.
Definition: wm_int.hpp:303
bit_vector_type m_tree
Definition: wm_int.hpp:79
const bit_vector_type & tree
A concatenation of all bit vectors of the wavelet tree.
Definition: wm_int.hpp:89
select_1_type m_tree_select1
Definition: wm_int.hpp:81
size_type select(size_type i, value_type c) const
Calculates the i-th occurrence of the symbol c in the supported vector.
Definition: wm_int.hpp:374
select_0_type m_tree_select0
Definition: wm_int.hpp:82
wm_int & operator=(wm_int &&wt)
Assignment move operator.
Definition: wm_int.hpp:239
bool is_leaf(const node_type &v) const
Checks if the node is a leaf node.
Definition: wm_int.hpp:646
std::array< range_type, 2 > expand(const node_type &v, const range_type &r) const
Returns for a range its left and right child ranges.
Definition: wm_int.hpp:777
const uint32_t & max_level
Maximal level of the wavelet tree.
Definition: wm_int.hpp:90
int_vector ::size_type size_type
Definition: wm_int.hpp:54
bool empty(const node_type &v) const
Indicates if node v is empty.
Definition: wm_int.hpp:677
wm_int(t_it begin, t_it end, std::string tmp_dir=ram_file_name(""))
Construct the wavelet tree from a sequence defined by two interators.
Definition: wm_int.hpp:105
auto size(const node_type &v) const -> decltype(v.size)
Return the size of node v.
Definition: wm_int.hpp:680
wm_int(wm_int &&wt)
Move copy constructor.
Definition: wm_int.hpp:201
int_vector.hpp contains the sdsl::int_vector class.
Namespace for the succinct data structure library.
std::string ram_file_name(const std::string &file)
Returns the corresponding RAM-file name for file.
Definition: ram_fs.hpp:174
bool empty(const range_type &r)
Empty range check.
Definition: wt_helper.hpp:782
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition: io.hpp:84
void read_member(T &t, std::istream &in)
Definition: io.hpp:111
std::string tmp_file(const cache_config &config, std::string name_part="")
Returns a name for a temporary file. I.e. the name was not used before.
Definition: io.hpp:698
int_vector< 1 > bit_vector
bit_vector is a specialization of the int_vector.
Definition: int_vector.hpp:53
std::array< int_vector<>::size_type, 2 > range_type
Definition: wt_helper.hpp:20
bool load_from_file(T &v, const std::string &file)
Load sdsl-object v from a file.
Definition: io.hpp:901
std::vector< range_type > range_vec_type
Definition: wt_helper.hpp:21
int remove(const std::string &)
Remove a file.
Definition: ram_fs.hpp:194
int_vector ::size_type size(const range_type &r)
Size of a range.
Definition: wt_helper.hpp:787
rank_support_v.hpp contains rank_support_v.
Contains declarations and definitions of data structure concepts.
select_support_mcl.hpp contains classes that support a sdsl::bit_vector with constant time select inf...
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition: bits.hpp:651
Represents a node in the wavelet tree.
Definition: wm_int.hpp:609
node_type(const node_type &)=default
bool operator==(const node_type &v) const
Definition: wm_int.hpp:636
bool operator>(const node_type &v) const
Definition: wm_int.hpp:642
node_type & operator=(const node_type &)=default
node_type(node_type &&)=default
node_type & operator=(node_type &&)=default
node_type(size_type o=0, size_type sz=0, size_type l=0, value_type sy=0)
Definition: wm_int.hpp:616
bool operator<(const node_type &v) const
Definition: wm_int.hpp:639
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.