SDSL 3.0.1
Succinct Data Structure Library
csa_sampling_strategy.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
9#ifndef INCLUDED_CSA_SAMPLING_STRATEGY
10#define INCLUDED_CSA_SAMPLING_STRATEGY
11
12/*
13 * Text = ABCDEFABCDEF$
14 * 0123456789012
15 * sa_sample_dens = 2
16 * *1 SA *2
17 * * 12 * $
18 * 06 * ABCDEF$
19 * * 00 * ABCDEFABCDEF$
20 * 07 BCDEF$
21 * * 01 BCDEFABCDEF$
22 * 08 * CDEF$
23 * * 02 * CDEFABCDEF$
24 * 09 DEF$
25 * * 03 DEFABCDEF$
26 * 10 * EF$
27 * * 04 * EFABCDEF$
28 * 11 F$
29 * * 05 FABCDEF$
30 *
31 * The first sampling (*1) is called suffix order sampling. It has the advantage, that
32 * we don't need to store a bitvector, which marks the sampled suffixes, since a suffix
33 * at index \‍(i\‍) in the suffix array is marked if \‍( 0 \equiv i \mod sa_sample_dens \‍).
34 *
35 * The second sampling (*2) is called text order sampling. It is also called regular in [1].
36 *
37 * [1] P.Ferragina, J. Siren, R. Venturini: Distribution-Aware Compressed Full-Text Indexes, ESA 2011
38 */
39
40#include <set>
41#include <tuple>
42
44#include <sdsl/int_vector.hpp>
47
48namespace sdsl
49{
50
51template <class t_csa, uint8_t t_width = 0>
52class _sa_order_sampling : public int_vector<t_width>
53{
54 public:
56 typedef typename base_type::size_type size_type; // make typedefs of base_type visible
57 typedef typename base_type::value_type value_type; //
58 enum
59 {
60 sample_dens = t_csa::sa_sample_dens
61 };
62 enum
63 {
64 text_order = false
65 };
67
70
72 /*
73 * \param cconfig Cache configuration (SA is expected to be cached.).
74 * \param csa Pointer to the corresponding CSA. Not used in this class.
75 * \par Time complexity
76 * Linear in the size of the suffix array.
77 */
78 _sa_order_sampling(const cache_config & cconfig, SDSL_UNUSED const t_csa * csa = nullptr)
79 {
81 size_type n = sa_buf.size();
82 this->width(bits::hi(n) + 1);
83 this->resize((n + sample_dens - 1) / sample_dens);
84
85 for (size_type i = 0, cnt_mod = sample_dens, cnt_sum = 0; i < n; ++i, ++cnt_mod)
86 {
87 size_type sa = sa_buf[i];
88 if (sample_dens == cnt_mod)
89 {
90 cnt_mod = 0;
91 base_type::operator[](cnt_sum++) = sa;
92 }
93 }
94 }
95
97 inline bool is_sampled(size_type i) const { return 0 == (i % sample_dens); }
98
101};
102
103template <uint8_t t_width = 0>
105{
106 template <class t_csa>
109};
110
111template <class t_csa, class t_bv = bit_vector, class t_rank = typename t_bv::rank_1_type, uint8_t t_width = 0>
112class _text_order_sampling : public int_vector<t_width>
113{
114 private:
115 t_bv m_marked;
116 t_rank m_rank_marked;
117
118 public:
120 typedef typename base_type::size_type size_type; // make typedefs of base_type visible
122 typedef t_bv bv_type;
123 enum
124 {
125 sample_dens = t_csa::sa_sample_dens
126 };
127 enum
128 {
129 text_order = true
130 };
132
133 const bv_type & marked = m_marked;
134 const t_rank & rank_marked = m_rank_marked;
135
138
140 /*
141 * \param cconfig Cache configuration (SA is expected to be cached.).
142 * \param csa Pointer to the corresponding CSA. Not used in this class.
143 * \par Time complexity
144 * Linear in the size of the suffix array.
145 */
146 _text_order_sampling(const cache_config & cconfig, SDSL_UNUSED const t_csa * csa = nullptr)
147 {
149 size_type n = sa_buf.size();
150 bit_vector marked(n, 0); // temporary bitvector for the marked text positions
151 this->width(bits::hi(n / sample_dens) + 1);
152 this->resize((n + sample_dens - 1) / sample_dens);
153
154 for (size_type i = 0, sa_cnt = 0; i < n; ++i)
155 {
156 size_type sa = sa_buf[i];
157 if (0 == (sa % sample_dens))
158 {
159 marked[i] = 1;
160 base_type::operator[](sa_cnt++) = sa / sample_dens;
161 }
162 }
163 m_marked = std::move(t_bv(marked));
164 util::init_support(m_rank_marked, &m_marked);
165 }
166
169 : base_type(st)
170 {
171 m_marked = st.m_marked;
172 m_rank_marked = st.m_rank_marked;
173 m_rank_marked.set_vector(&m_marked);
174 }
175
177 inline bool is_sampled(size_type i) const { return m_marked[i]; }
178
180 inline value_type operator[](size_type i) const { return base_type::operator[](m_rank_marked(i)) * sample_dens; }
181
183
186 {
187 if (this != &st)
188 {
190 m_marked = st.m_marked;
191 m_rank_marked = st.m_rank_marked;
192 m_rank_marked.set_vector(&m_marked);
193 }
194 return *this;
195 }
196
199 {
200 base_type::swap(st);
201 m_marked.swap(st.m_marked);
202 util::swap_support(m_rank_marked, st.m_rank_marked, &m_marked, &(st.m_marked));
203 }
204
205 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
206 {
207 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
208 size_type written_bytes = 0;
209 written_bytes += base_type::serialize(out, child, "samples");
210 written_bytes += m_marked.serialize(out, child, "marked");
211 written_bytes += m_rank_marked.serialize(out, child, "rank_marked");
212 structure_tree::add_size(child, written_bytes);
213 return written_bytes;
214 }
215
216 void load(std::istream & in)
217 {
218 base_type::load(in);
219 m_marked.load(in);
220 m_rank_marked.load(in);
221 m_rank_marked.set_vector(&m_marked);
222 }
223
224 template <typename archive_t>
225 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
226 {
228 ar(CEREAL_NVP(m_marked));
229 ar(CEREAL_NVP(m_rank_marked));
230 }
231
232 template <typename archive_t>
233 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
234 {
236 ar(CEREAL_NVP(m_marked));
237 ar(CEREAL_NVP(m_rank_marked));
238 m_rank_marked.set_vector(&m_marked);
239 }
240};
241
242template <class t_bit_vec = sd_vector<>, class t_rank_sup = typename t_bit_vec::rank_1_type, uint8_t t_width = 0>
244{
245 template <class t_csa>
248};
249
250template <class t_csa,
251 class t_bv_sa = sd_vector<>,
252 class t_bv_isa = sd_vector<>,
253 class t_rank_sa = typename t_bv_sa::rank_1_type,
254 class t_select_isa = typename t_bv_isa::select_1_type>
256{
257 private:
258 t_bv_sa m_marked_sa;
259 t_rank_sa m_rank_marked_sa;
260 t_bv_isa m_marked_isa;
261 t_select_isa m_select_marked_isa;
262 wt_int<rrr_vector<63>> m_inv_perm;
263
264 public:
265 typedef typename bit_vector::size_type size_type; // make typedefs of base_type visible
267 typedef t_bv_sa bv_sa_type;
268 enum
269 {
270 sample_dens = t_csa::sa_sample_dens
271 };
272 enum
273 {
274 text_order = true
275 };
277
278 const t_bv_sa & marked_sa = m_marked_sa;
279 const t_rank_sa & rank_marked_sa = m_rank_marked_sa;
280 const t_bv_isa & marked_isa = m_marked_isa;
281 const t_select_isa & select_marked_isa = m_select_marked_isa;
282
285
287 /*
288 * \param cconfig Cache configuration (SA is expected to be cached.).
289 * \param csa Pointer to the corresponding CSA. Not used in this class.
290 * \par Time complexity
291 * Linear in the size of the suffix array.
292 */
293 _fuzzy_sa_sampling(cache_config & cconfig, SDSL_UNUSED const t_csa * csa = nullptr)
294 {
295 {
296 // (2) check, if the suffix array is cached
297 if (!cache_file_exists(conf::KEY_ISA, cconfig))
298 {
299 auto event = memory_monitor::event("ISA");
300 construct_isa(cconfig);
301 }
303 }
304 {
306 size_type n = isa_buf.size();
307 bit_vector marked_isa(n, 0); // temporary bitvector for marked ISA positions
308 bit_vector marked_sa(n, 0); // temporary bitvector for marked SA positions
309 int_vector<> inv_perm((n + sample_dens - 1) / sample_dens, 0, bits::hi(n) + 1);
310 size_type cnt = 0;
311 size_type runs = 1;
312
313 uint64_t min_prev_val = 0;
314 for (size_type i = 0; i < n; i += sample_dens)
315 {
316 size_type pos_min = i;
317 size_type pos_cnd = isa_buf[i] >= min_prev_val ? i : n;
318 for (size_type j = i + 1; j < i + sample_dens and j < n; ++j)
319 {
320 if (isa_buf[j] < isa_buf[pos_min]) pos_min = j;
321 if (isa_buf[j] >= min_prev_val)
322 {
323 if (pos_cnd == n) { pos_cnd = j; }
324 else if (isa_buf[j] < isa_buf[pos_cnd])
325 {
326 pos_cnd = j;
327 }
328 }
329 }
330 if (pos_cnd == n)
331 { // increasing sequence can not be extended
332 pos_cnd = pos_min;
333 ++runs;
334 }
335 min_prev_val = isa_buf[pos_cnd];
336 marked_isa[pos_cnd] = 1;
337 inv_perm[cnt++] = min_prev_val;
338 marked_sa[min_prev_val] = 1;
339 }
340 m_marked_isa = std::move(t_bv_isa(marked_isa));
341 util::init_support(m_select_marked_isa, &m_marked_isa);
342 {
344 for (size_type i = 0; i < inv_perm.size(); ++i) { inv_perm[i] = rank_marked_sa(inv_perm[i]); }
345 }
346 util::bit_compress(inv_perm);
347
348 m_marked_sa = std::move(t_bv_sa(marked_sa));
349 util::init_support(m_rank_marked_sa, &m_marked_sa);
350
351 std::string tmp_key = "fuzzy_isa_samples_" + util::to_string(util::pid()) + "_" +
353 std::string tmp_file_name = cache_file_name(tmp_key, cconfig);
354 store_to_file(inv_perm, tmp_file_name);
355 construct(m_inv_perm, tmp_file_name, 0);
356 sdsl::remove(tmp_file_name);
357 }
358 }
359
362 : m_marked_sa(st.m_marked_sa)
363 , m_rank_marked_sa(st.m_rank_marked_sa)
364 , m_marked_isa(st.m_marked_isa)
365 , m_select_marked_isa(st.m_select_marked_isa)
366 , m_inv_perm(st.m_inv_perm)
367 {
368 m_rank_marked_sa.set_vector(&m_marked_sa);
369 m_select_marked_isa.set_vector(&m_marked_isa);
370 }
371
374 : m_marked_sa(std::move(st.m_marked_sa))
375 , m_rank_marked_sa(std::move(st.m_rank_marked_sa))
376 , m_marked_isa(std::move(st.m_marked_isa))
377 , m_select_marked_isa(std::move(st.m_select_marked_isa))
378 , m_inv_perm(std::move(st.m_inv_perm))
379 {
380 m_rank_marked_sa.set_vector(&m_marked_sa);
381 m_select_marked_isa.set_vector(&m_marked_isa);
382 }
383
385 inline bool is_sampled(size_type i) const { return m_marked_sa[i]; }
386
389 {
390 return m_select_marked_isa(m_inv_perm.select(1, m_rank_marked_sa(i)) + 1);
391 }
392
394 inline value_type inv(size_type i) const { return m_inv_perm[i]; }
395
396 size_type size() const { return m_inv_perm.size(); }
397
400 {
401 if (this != &st)
402 {
403 _fuzzy_sa_sampling tmp(st);
404 *this = std::move(tmp);
405 }
406 return *this;
407 }
408
411 {
412 m_marked_sa = std::move(st.m_marked_sa);
413 m_rank_marked_sa = std::move(st.m_rank_marked_sa);
414 m_marked_isa = std::move(st.m_marked_isa);
415 m_select_marked_isa = std::move(st.m_select_marked_isa);
416 m_inv_perm = std::move(st.m_inv_perm);
417 m_rank_marked_sa.set_vector(&m_marked_sa);
418 m_select_marked_isa.set_vector(&m_marked_isa);
419 return *this;
420 }
421
422 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
423 {
424 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
425 size_type written_bytes = 0;
426 written_bytes += m_marked_sa.serialize(out, child, "marked_sa");
427 written_bytes += m_rank_marked_sa.serialize(out, child, "rank_marked_sa");
428 written_bytes += m_marked_isa.serialize(out, child, "marked_isa");
429 written_bytes += m_select_marked_isa.serialize(out, child, "select_marked_isa");
430 written_bytes += m_inv_perm.serialize(out, child, "inv_perm");
431 structure_tree::add_size(child, written_bytes);
432 return written_bytes;
433 }
434
435 void load(std::istream & in)
436 {
437 m_marked_sa.load(in);
438 m_rank_marked_sa.load(in);
439 m_rank_marked_sa.set_vector(&m_marked_sa);
440 m_marked_isa.load(in);
441 m_select_marked_isa.load(in);
442 m_select_marked_isa.set_vector(&m_marked_isa);
443 m_inv_perm.load(in);
444 }
445
446 template <typename archive_t>
447 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
448 {
449 ar(CEREAL_NVP(m_marked_sa));
450 ar(CEREAL_NVP(m_rank_marked_sa));
451 ar(CEREAL_NVP(m_marked_isa));
452 ar(CEREAL_NVP(m_select_marked_isa));
453 ar(CEREAL_NVP(m_inv_perm));
454 }
455
456 template <typename archive_t>
457 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
458 {
459 ar(CEREAL_NVP(m_marked_sa));
460 ar(CEREAL_NVP(m_rank_marked_sa));
461 m_rank_marked_sa.set_vector(&m_marked_sa);
462 ar(CEREAL_NVP(m_marked_isa));
463 ar(CEREAL_NVP(m_select_marked_isa));
464 m_select_marked_isa.set_vector(&m_marked_isa);
465 ar(CEREAL_NVP(m_inv_perm));
466 }
467
469 bool operator==(_fuzzy_sa_sampling const & other) const noexcept
470 {
471 return (m_marked_sa == other.m_marked_sa) && (m_rank_marked_sa == other.m_rank_marked_sa) &&
472 (m_marked_isa == other.m_marked_isa) && (m_select_marked_isa == other.m_select_marked_isa) &&
473 (m_inv_perm == other.m_inv_perm);
474 }
475
477 bool operator!=(_fuzzy_sa_sampling const & other) const noexcept { return !(*this == other); }
478};
479template <class t_bv_sa = sd_vector<>,
480 class t_bv_isa = sd_vector<>,
481 class t_rank_sa = typename t_bv_sa::rank_1_type,
482 class t_select_isa = typename t_bv_isa::select_1_type>
484{
485 template <class t_csa>
488};
489
490/*
491 * Text = ABCDEFABCDEF$
492 * 0123456789012
493 * sa_sample_dens = 4
494 * sa_sample_chars = {B,E}
495 * SA BWT (1)
496 * 12 F * $
497 * 06 F ABCDEF$
498 * 00 $ * ABCDEFABCDEF$
499 * 07 A BCDEF$
500 * 01 A BCDEFABCDEF$
501 * 08 B * CDEF$
502 * 02 B * CDEFABCDEF$
503 * 09 C DEF$
504 * 03 C DEFABCDEF$
505 * 10 D EF$
506 * 04 D * EFABCDEF$
507 * 11 E * F$
508 * 05 E * FABCDEF$
509 *
510 * In this sampling a suffix x=SA[i] is marked if x \‍( 0 \equiv x \mod sa_sample_dens \‍) or
511 * BWT[i] is contained in sa_sample_chars.
512 */
513
514template <class t_csa, class t_bv = bit_vector, class t_rank = typename t_bv::rank_1_type, uint8_t t_width = 0>
515class _bwt_sampling : public int_vector<t_width>
516{
517 private:
518 t_bv m_marked;
519 t_rank m_rank_marked;
520
521 public:
523 typedef typename base_type::size_type size_type; // make typedefs of base_type visible
525 enum
526 {
527 sample_dens = t_csa::sa_sample_dens
528 };
529 enum
530 {
531 text_order = false
532 };
534
537
539 /*
540 * \param cconfig Cache configuration (BWT,SA, and SAMPLE_CHARS are expected to be cached.).
541 * \param csa Pointer to the corresponding CSA. Not used in this class.
542 * \par Time complexity
543 * Linear in the size of the suffix array.
544 */
545 _bwt_sampling(const cache_config & cconfig, SDSL_UNUSED const t_csa * csa = nullptr)
546 {
549 key_bwt<t_csa::alphabet_type::int_width>(), cconfig));
550 size_type n = sa_buf.size();
551 bit_vector marked(n, 0); // temporary bitvector for the marked text positions
552 this->width(bits::hi(n) + 1);
553 int_vector<> sample_char;
554 typedef typename t_csa::char_type char_type;
555 std::set<char_type> char_map;
556 if (load_from_cache(sample_char, conf::KEY_SAMPLE_CHAR, cconfig))
557 {
558 for (uint64_t i = 0; i < sample_char.size(); ++i) { char_map.insert((char_type)sample_char[i]); }
559 }
560 size_type sa_cnt = 0;
561 for (size_type i = 0; i < n; ++i)
562 {
563 size_type sa = sa_buf[i];
564 char_type bwt = bwt_buf[i];
565 if (0 == (sa % sample_dens))
566 {
567 marked[i] = 1;
568 ++sa_cnt;
569 }
570 else if (char_map.find(bwt) != char_map.end())
571 {
572 marked[i] = 1;
573 ++sa_cnt;
574 }
575 }
576 this->resize(sa_cnt);
577 sa_cnt = 0;
578 for (size_type i = 0; i < n; ++i)
579 {
580 size_type sa = sa_buf[i];
581 if (marked[i]) { base_type::operator[](sa_cnt++) = sa; }
582 }
583 m_marked = std::move(marked);
584 util::init_support(m_rank_marked, &m_marked);
585 }
586
589 : base_type(st)
590 {
591 m_marked = st.m_marked;
592 m_rank_marked = st.m_rank_marked;
593 m_rank_marked.set_vector(&m_marked);
594 }
595
597 inline bool is_sampled(size_type i) const { return m_marked[i]; }
598
600 inline value_type operator[](size_type i) const { return base_type::operator[](m_rank_marked(i)) * sample_dens; }
601
604 {
605 if (this != &st)
606 {
608 m_marked = st.m_marked;
609 m_rank_marked = st.m_rank_marked;
610 m_rank_marked.set_vector(&m_marked);
611 }
612 return *this;
613 }
614
617 {
618 base_type::swap(st);
619 m_marked.swap(st.m_marked);
620 util::swap_support(m_rank_marked, st.m_rank_marked, &m_marked, &(st.m_marked));
621 }
622
623 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
624 {
625 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
626 size_type written_bytes = 0;
627 written_bytes += base_type::serialize(out, child, "samples");
628 written_bytes += m_marked.serialize(out, child, "marked");
629 written_bytes += m_rank_marked.serialize(out, child, "rank_marked");
630 structure_tree::add_size(child, written_bytes);
631 return written_bytes;
632 }
633
634 void load(std::istream & in)
635 {
636 base_type::load(in);
637 m_marked.load(in);
638 m_rank_marked.load(in);
639 m_rank_marked.set_vector(&m_marked);
640 }
641
642 template <typename archive_t>
643 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
644 {
646 ar(CEREAL_NVP(m_marked));
647 ar(CEREAL_NVP(m_rank_marked));
648 }
649
650 template <typename archive_t>
651 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
652 {
654 ar(CEREAL_NVP(m_marked));
655 ar(CEREAL_NVP(m_rank_marked));
656 m_rank_marked.set_vector(&m_marked);
657 }
658};
659
660template <class t_bit_vec = bit_vector, class t_rank_sup = typename t_bit_vec::rank_1_type, uint8_t t_width = 0>
662{
663 template <class t_csa>
666};
667
668template <class t_csa, uint8_t t_width = 0>
669class _isa_sampling : public int_vector<t_width>
670{
671 public:
673 typedef typename base_type::size_type size_type; // make typedefs of base_type visible
675 typedef typename t_csa::sa_sample_type sa_type; // sa sample type
676 enum
677 {
678 sample_dens = t_csa::isa_sample_dens
679 };
681
684
686 /*
687 * \param cconfig Cache configuration (SA is expected to be cached.).
688 * \param sa_sample Pointer to the corresponding SA sampling. Not used in this class.
689 * \par Time complexity
690 * Linear in the size of the suffix array.
691 */
692 _isa_sampling(const cache_config & cconfig, SDSL_UNUSED const sa_type * sa_sample = nullptr)
693 {
695 size_type n = sa_buf.size();
696 if (n >= 1)
697 { // so n+t_csa::isa_sample_dens >= 2
698 this->width(bits::hi(n) + 1);
699 this->resize((n - 1) / sample_dens + 1);
700 }
701 for (size_type i = 0; i < this->size(); ++i) base_type::operator[](i) = 0;
702
703 for (size_type i = 0; i < n; ++i)
704 {
705 size_type sa = sa_buf[i];
706 if ((sa % sample_dens) == 0) { base_type::operator[](sa / sample_dens) = i; }
707 }
708 }
709
712
714 inline std::tuple<value_type, size_type> sample_leq(size_type i) const
715 {
716 size_type ci = i / sample_dens;
717 return std::make_tuple(base_type::operator[](ci), ci * sample_dens);
718 }
719
721 inline std::tuple<value_type, size_type> sample_qeq(size_type i) const
722 {
723 size_type ci = (i / sample_dens + 1) % this->size();
724 return std::make_tuple(base_type::operator[](ci), ci * sample_dens);
725 }
726
728 void load(std::istream & in, SDSL_UNUSED const sa_type * sa_sample = nullptr) { base_type::load(in); }
729
730 template <typename archive_t>
731 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
732 {
734 }
735
736 template <typename archive_t>
737 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
738 {
740 }
741
743};
744
745template <uint8_t t_width = 0>
747{
748 template <class t_csa>
751};
752
753template <class t_csa, class t_inv_perm, class t_sel>
755{
756 static_assert(t_csa::sa_sample_dens == t_csa::isa_sample_dens,
757 "ISA sampling requires: sa_sample_dens == isa_sample_dens");
758
759 public:
762 typedef typename t_csa::sa_sample_type sa_type; // sa sample type
763 typedef typename sa_type::bv_type bv_type; // bitvector type used to mark SA samples
764 enum
765 {
766 sample_dens = t_csa::isa_sample_dens
767 };
769
770 private:
771 t_sel m_select_marked;
772 t_inv_perm m_inv_perm;
773
774 public:
775 const t_sel & select_marked = m_select_marked;
776
779
781 /*
782 * \param cconfig Cache configuration. (Not used in this class)
783 * \param sa_sample Pointer to the corresponding SA sampling..
784 * \par Time complexity
785 * Linear in the size of the suffix array.
786 */
788 const typename std::enable_if<sa_type::text_order, sa_type *>::type sa_sample)
789 {
790 // and initialize the select support on bitvector marked
791 m_select_marked = t_sel(&(sa_sample->marked));
792 const int_vector<> * perm = (const int_vector<> *)sa_sample;
793 m_inv_perm = t_inv_perm(perm);
794 m_inv_perm.set_vector(perm);
795 }
796
799 {
800 m_inv_perm = st.m_inv_perm;
801 m_select_marked = st.m_select_marked;
802 }
803
805 inline value_type operator[](size_type i) const { return m_select_marked(m_inv_perm[i / sample_dens] + 1); }
806
808 inline std::tuple<value_type, size_type> sample_leq(size_type i) const
809 {
810 size_type ci = i / sample_dens;
811 return std::make_tuple(m_select_marked(m_inv_perm[ci] + 1), ci * sample_dens);
812 }
813
815 inline std::tuple<value_type, size_type> sample_qeq(size_type i) const
816 {
817 size_type ci = (i / sample_dens + 1) % m_inv_perm.size();
818 return std::make_tuple(m_select_marked(m_inv_perm[ci] + 1), ci * sample_dens);
819 }
820
823 {
824 if (this != &st)
825 {
826 m_inv_perm = st.m_inv_perm;
827 m_select_marked = st.m_select_marked;
828 }
829 return *this;
830 }
831
834 {
835 if (this != &st)
836 {
837 m_inv_perm.swap(st.m_inv_perm);
838 m_select_marked.swap(st.m_select_marked);
839 }
840 }
841
842 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
843 {
844 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
845 size_type written_bytes = 0;
846 written_bytes += m_inv_perm.serialize(out, child, "inv_perm");
847 written_bytes += m_select_marked.serialize(out, child, "select_marked");
848 structure_tree::add_size(child, written_bytes);
849 return written_bytes;
850 }
851
853 void load(std::istream & in, const sa_type * sa_sample = nullptr)
854 {
855 m_inv_perm.load(in);
856 m_select_marked.load(in);
857 set_vector(sa_sample);
858 }
859
860 template <typename archive_t>
861 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
862 {
863 ar(CEREAL_NVP(m_inv_perm));
864 ar(CEREAL_NVP(m_select_marked));
865 }
866
867 template <typename archive_t>
868 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar, const sa_type * sa_sample = nullptr)
869 {
870 ar(CEREAL_NVP(m_inv_perm));
871 ar(CEREAL_NVP(m_select_marked));
872 set_vector(sa_sample);
873 }
874
876 bool operator==(_text_order_isa_sampling_support const & other) const noexcept
877 {
878 return (m_inv_perm == other.m_inv_perm) && (m_select_marked == other.m_select_marked);
879 }
880
882 bool operator!=(_text_order_isa_sampling_support const & other) const noexcept { return !(*this == other); }
883
884 void set_vector(const sa_type * sa_sample = nullptr)
885 {
886 if (sa_sample == nullptr)
887 {
888 m_select_marked.set_vector(nullptr);
889 m_inv_perm.set_vector(nullptr);
890 }
891 else
892 {
893 m_select_marked.set_vector(&(sa_sample->marked));
894 m_inv_perm.set_vector((const int_vector<> *)sa_sample);
895 }
896 }
897};
898
899template <class t_inv_perm = inv_perm_support<8>, class t_sel = void>
901{
902 template <class t_csa>
904 t_inv_perm,
905 typename std::conditional<std::is_void<t_sel>::value,
906 typename t_csa::sa_sample_type::bv_type::select_1_type,
907 t_sel>::type>;
909};
910
911template <class t_csa, class t_select_sa>
913{
914 static_assert(t_csa::sa_sample_dens == t_csa::isa_sample_dens,
915 "ISA sampling requires: sa_sample_dens==isa_sample_dens");
916
917 public:
920 typedef typename t_csa::sa_sample_type sa_type; // sa sample type
921 enum
922 {
923 sample_dens = t_csa::isa_sample_dens
924 };
926
927 private:
928 const sa_type * m_sa_p = nullptr; // pointer to sa_sample_strategy
929 t_select_sa m_select_marked_sa;
930
931 public:
934
936 /*
937 * \param cconfig Cache configuration. (Not used in this class)
938 * \param sa_sample Pointer to the corresponding SA sampling..
939 * \par Time complexity
940 * Linear in the size of the suffix array.
941 */
943 : m_sa_p(sa_sample)
944 {
945 util::init_support(m_select_marked_sa, &(sa_sample->marked_sa));
946 }
947
950 : m_select_marked_sa(st.m_select_marked_sa)
951 {
952 set_vector(st.m_sa_p);
953 }
954
956 inline value_type operator[](size_type i) const { return m_sa_p->inv(i); }
957
959 inline std::tuple<value_type, size_type> sample_leq(size_type i) const
960 {
961 size_type ci = i / sample_dens;
962 size_type j = m_sa_p->select_marked_isa(ci + 1);
963 if (j > i)
964 {
965 if (ci > 0) { ci = ci - 1; }
966 else
967 {
968 ci = m_sa_p->size() - 1;
969 }
970 j = m_sa_p->select_marked_isa(ci + 1);
971 }
972 return std::make_tuple(m_select_marked_sa(m_sa_p->inv(ci) + 1), j);
973 }
974
976 inline std::tuple<value_type, size_type> sample_qeq(size_type i) const
977 {
978 size_type ci = i / sample_dens;
979 size_type j = m_sa_p->select_marked_isa(ci + 1);
980 if (j < i)
981 {
982 if (ci < m_sa_p->size() - 1) { ci = ci + 1; }
983 else
984 {
985 ci = 0;
986 }
987 j = m_sa_p->select_marked_isa(ci + 1);
988 }
989 return std::make_tuple(m_select_marked_sa(m_sa_p->inv(ci) + 1), j);
990 }
991
994 {
995 if (this != &st)
996 {
997 m_select_marked_sa = st.m_select_marked_sa;
998 set_vector(st.m_sa_p);
999 }
1000 return *this;
1001 }
1002
1004 void swap(_fuzzy_isa_sampling_support & st) { m_select_marked_sa.swap(st.m_select_marked_sa); }
1005
1006 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
1007 {
1008 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
1009 size_type written_bytes = 0;
1010 written_bytes += m_select_marked_sa.serialize(out, child, "select_marked_sa");
1011 structure_tree::add_size(child, written_bytes);
1012 return written_bytes;
1013 }
1014
1016 void load(std::istream & in, const sa_type * sa_sample = nullptr)
1017 {
1018 m_select_marked_sa.load(in);
1019 set_vector(sa_sample);
1020 }
1021
1022 template <typename archive_t>
1023 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
1024 {
1025 ar(CEREAL_NVP(m_select_marked_sa));
1026 }
1027
1028 template <typename archive_t>
1029 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar, const sa_type * sa_sample = nullptr)
1030 {
1031 ar(CEREAL_NVP(m_select_marked_sa));
1032 set_vector(sa_sample);
1033 }
1034
1036 bool operator==(_fuzzy_isa_sampling_support const & other) const noexcept
1037 {
1038 return (m_select_marked_sa == other.m_select_marked_sa);
1039 }
1040
1042 bool operator!=(_fuzzy_isa_sampling_support const & other) const noexcept { return !(*this == other); }
1043
1044 void set_vector(const sa_type * sa_sample = nullptr)
1045 {
1046 m_sa_p = sa_sample;
1047 if (nullptr != m_sa_p) { m_select_marked_sa.set_vector(&(sa_sample->marked_sa)); }
1048 }
1049};
1050
1051template <class t_select_sa = void>
1053{
1054 template <class t_csa>
1056 typename std::conditional<std::is_void<t_select_sa>::value,
1057 typename t_csa::sa_sample_type::bv_sa_type::select_1_type,
1058 t_select_sa>::type>;
1060};
1061
1062} // namespace sdsl
1063
1064#endif
#define CEREAL_NVP(X)
Definition: cereal.hpp:31
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
_bwt_sampling()
Default constructor.
base_type::value_type value_type
_bwt_sampling & operator=(const _bwt_sampling &st)
Assignment operation.
void load(std::istream &in)
_bwt_sampling(const _bwt_sampling &st)
Copy constructor.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
_bwt_sampling(const cache_config &cconfig, SDSL_UNUSED const t_csa *csa=nullptr)
Constructor.
int_vector< t_width > base_type
base_type::size_type size_type
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
value_type operator[](size_type i) const
Return the suffix array value for the sampled index i.
bool is_sampled(size_type i) const
Determine if index i is sampled or not.
void swap(_bwt_sampling &st)
Swap operation.
sa_sampling_tag sampling_category
_fuzzy_isa_sampling_support & operator=(const _fuzzy_isa_sampling_support &st)
Assignment operation.
std::tuple< value_type, size_type > sample_leq(size_type i) const
Returns the rightmost ISA sample <= i and its position.
bool operator!=(_fuzzy_isa_sampling_support const &other) const noexcept
Inequality operator.
_fuzzy_isa_sampling_support(SDSL_UNUSED const cache_config &cconfig, const sa_type *sa_sample)
Constructor.
void load(std::istream &in, const sa_type *sa_sample=nullptr)
Load sampling from disk.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar, const sa_type *sa_sample=nullptr)
_fuzzy_isa_sampling_support(const _fuzzy_isa_sampling_support &st)
Copy constructor.
void swap(_fuzzy_isa_sampling_support &st)
Swap operation.
value_type operator[](size_type i) const
Return the inverse suffix array value for the sampled index i.
std::tuple< value_type, size_type > sample_qeq(size_type i) const
Returns the leftmost ISA sample >= i and its position.
bool operator==(_fuzzy_isa_sampling_support const &other) const noexcept
Equality operator.
_fuzzy_isa_sampling_support()
Default constructor.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
void set_vector(const sa_type *sa_sample=nullptr)
_fuzzy_sa_sampling(const _fuzzy_sa_sampling &st)
Copy constructor.
bool operator==(_fuzzy_sa_sampling const &other) const noexcept
Equality operator.
_fuzzy_sa_sampling & operator=(_fuzzy_sa_sampling &&st)
Move assignment operation.
_fuzzy_sa_sampling(_fuzzy_sa_sampling &&st)
Move constructor.
const t_select_isa & select_marked_isa
value_type inv(size_type i) const
Return the inv permutation at position i (already condensed!!!)
value_type operator[](size_type i) const
Return the suffix array value for the sampled index i.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
bit_vector::value_type value_type
_fuzzy_sa_sampling & operator=(const _fuzzy_sa_sampling &st)
Assignment operation.
bool is_sampled(size_type i) const
Determine if index i is sampled or not.
_fuzzy_sa_sampling()
Default constructor.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
_fuzzy_sa_sampling(cache_config &cconfig, SDSL_UNUSED const t_csa *csa=nullptr)
Constructor.
bit_vector::size_type size_type
bool operator!=(_fuzzy_sa_sampling const &other) const noexcept
Inequality operator.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
_isa_sampling()
Default constructor.
std::tuple< value_type, size_type > sample_qeq(size_type i) const
Returns the leftmost ISA sample >= i and its position.
value_type operator[](size_type i) const
Returns the ISA value at position j, where.
isa_sampling_tag sampling_category
base_type::value_type value_type
_isa_sampling(const cache_config &cconfig, SDSL_UNUSED const sa_type *sa_sample=nullptr)
Constructor.
int_vector< t_width > base_type
void set_vector(SDSL_UNUSED const sa_type *)
base_type::size_type size_type
t_csa::sa_sample_type sa_type
void load(std::istream &in, SDSL_UNUSED const sa_type *sa_sample=nullptr)
Load sampling from disk.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
std::tuple< value_type, size_type > sample_leq(size_type i) const
Returns the rightmost ISA sample <= i and its position.
int_vector< t_width > base_type
value_type operator[](size_type i) const
Return the suffix array value for the sampled index i.
base_type::size_type size_type
bool is_sampled(size_type i) const
Determine if index i is sampled or not.
_sa_order_sampling(const cache_config &cconfig, SDSL_UNUSED const t_csa *csa=nullptr)
Constructor.
_sa_order_sampling()
Default constructor.
base_type::value_type value_type
_text_order_isa_sampling_support(const _text_order_isa_sampling_support &st)
Copy constructor.
value_type operator[](size_type i) const
Return the inverse suffix array value for the sampled index i.
bool operator==(_text_order_isa_sampling_support const &other) const noexcept
Equality operator.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
_text_order_isa_sampling_support & operator=(const _text_order_isa_sampling_support &st)
Assignment operation.
void load(std::istream &in, const sa_type *sa_sample=nullptr)
Load sampling from disk.
bool operator!=(_text_order_isa_sampling_support const &other) const noexcept
Inequality operator.
std::tuple< value_type, size_type > sample_leq(size_type i) const
Returns the rightmost ISA sample <= i and its position.
_text_order_isa_sampling_support(SDSL_UNUSED const cache_config &cconfig, const typename std::enable_if< sa_type::text_order, sa_type * >::type sa_sample)
Constructor.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar, const sa_type *sa_sample=nullptr)
void swap(_text_order_isa_sampling_support &st)
Swap operation.
void set_vector(const sa_type *sa_sample=nullptr)
std::tuple< value_type, size_type > sample_qeq(size_type i) const
Returns the leftmost ISA sample >= i and its position.
value_type condensed_sa(size_type i) const
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
_text_order_sampling(const cache_config &cconfig, SDSL_UNUSED const t_csa *csa=nullptr)
Constructor.
_text_order_sampling(const _text_order_sampling &st)
Copy constructor.
value_type operator[](size_type i) const
Return the suffix array value for the sampled index i.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
_text_order_sampling & operator=(const _text_order_sampling &st)
Assignment operation.
bool is_sampled(size_type i) const
Determine if index i is sampled or not.
void swap(_text_order_sampling &st)
Swap operation.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
_text_order_sampling()
Default constructor.
uint64_t size() const
Returns the number of elements currently stored.
A generic vector class for integers of width .
Definition: int_vector.hpp:216
reference operator[](const size_type &i) noexcept
non const version of [] operator
std::enable_if<!cereal::traits::is_output_serializable< cereal::BinaryData< int_vector< t_width > >, archive_t >::value, void >::type CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Serialise (save) via cereal if archive is not binary.
void swap(int_vector &v) noexcept
Swap method for int_vector.
Definition: int_vector.hpp:503
int_vector_size_type size_type
Definition: int_vector.hpp:229
int_vector & operator=(const int_vector &v)
Assignment operator.
void load(std::istream &in)
Load the int_vector for a stream.
uint8_t width() const noexcept
Returns the width of the integers which are accessed via the [] operator.
Definition: int_vector.hpp:595
size_type size() const noexcept
The number of elements in the int_vector.
iterator insert(const_iterator it, value_type value)
Insert an element before the element that the iterator is pointing to.
Definition: int_vector.hpp:379
std::enable_if<!cereal::traits::is_input_serializable< cereal::BinaryData< int_vector< t_width > >, archive_t >::value, void >::type CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Serialise (load) via cereal if archive is not binary.
int_vector_trait< t_width >::value_type value_type
Definition: int_vector.hpp:221
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
void resize(const size_type size)
Resize the int_vector in terms of elements.
Definition: int_vector.hpp:521
static mm_event_proxy event(const std::string &name)
A rank structure proposed by Sebastiano Vigna.
A bit vector which compresses very sparse populated bit vectors by.
Definition: sd_vector.hpp:115
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
static void add_size(structure_tree_node *v, uint64_t value)
A wavelet tree class for integer sequences.
Definition: wt_int.hpp:49
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the data structure into the given ostream.
Definition: wt_int.hpp:755
size_type size() const
Returns the size of the original vector.
Definition: wt_int.hpp:304
size_type select(size_type i, value_type c) const
Calculates the i-th occurrence of the symbol c in the supported vector.
Definition: wt_int.hpp:431
void load(std::istream &in)
Loads the data structure from the given istream.
Definition: wt_int.hpp:771
#define SDSL_UNUSED
Definition: config.hpp:13
csa_alphabet_strategy.hpp includes different strategy classes for representing an alphabet of a CSA.
int_vector.hpp contains the sdsl::int_vector class.
inv_perm_support.hpp contains a class which adds access to the inverse of a permutation.
constexpr char KEY_SAMPLE_CHAR[]
Definition: config.hpp:45
constexpr char KEY_SA[]
Definition: config.hpp:37
constexpr char KEY_ISA[]
Definition: config.hpp:40
uint64_t id()
uint64_t pid()
void bit_compress(t_int_vec &v)
Bit compress the int_vector.
Definition: util.hpp:484
std::string to_string(const T &t, int w=1)
Namespace for the succinct data structure library.
std::string cache_file_name(const std::string &key, const cache_config &config)
Returns the file name of the resource.
Definition: io.hpp:630
bool load_from_cache(T &v, const std::string &key, const cache_config &config, bool add_type_hash=false)
Definition: io.hpp:711
void construct(t_index &idx, std::string file, uint8_t num_bytes=0, bool move_input=false)
Definition: construct.hpp:45
void construct_isa(cache_config &config)
void register_cache_file(const std::string &key, cache_config &config)
Register the existing resource specified by the key to the cache.
Definition: io.hpp:656
bool store_to_file(const T &v, const std::string &file)
Store a data structure to a file.
Definition: io.hpp:798
bool cache_file_exists(const std::string &key, const cache_config &config)
Checks if the resource specified by the key exists in the cache.
Definition: io.hpp:672
int remove(const std::string &)
Remove a file.
Definition: ram_fs.hpp:194
int_vector ::size_type size(const range_type &r)
Size of a range.
Definition: wt_helper.hpp:787
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition: bits.hpp:651
Helper class for construction process.
Definition: config.hpp:67
_fuzzy_isa_sampling_support< t_csa, typename std::conditional< std::is_void< t_select_sa >::value, typename t_csa::sa_sample_type::bv_sa_type::select_1_type, t_select_sa >::type > type
_text_order_isa_sampling_support< t_csa, t_inv_perm, typename std::conditional< std::is_void< t_sel >::value, typename t_csa::sa_sample_type::bv_type::select_1_type, t_sel >::type > type
wavelet_trees.hpp contains wavelet tree implementations.