SDSL 3.0.1
Succinct Data Structure Library
suffix_array_helper.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
8#ifndef INCLUDED_SDSL_SUFFIX_ARRAY_HELPER
9#define INCLUDED_SDSL_SUFFIX_ARRAY_HELPER
10
11#include <cassert>
12#include <cstdlib>
13#include <stdint.h>
14
15#include <sdsl/iterators.hpp>
16
17namespace sdsl
18{
19
21/*
22 * \param i Position in the first row.
23 * \param csa CSA
24 * \par Time complexity
25 * \f$ \Order{\log \sigma} \f$
26 * TODO: add hinted binary search? Two way binary search?
27 */
28template <typename t_csa>
29typename t_csa::char_type first_row_symbol(const typename t_csa::size_type i, const t_csa & csa)
30{
31 assert(i < csa.size());
32 if (csa.sigma < 16)
33 { //<- if sigma is small search linear
34 typename t_csa::size_type res = 1;
35 while (res < csa.sigma and csa.C[res] <= i) ++res;
36 return csa.comp2char[res - 1];
37 }
38 else
39 {
40 // binary search the character with C
41 typename t_csa::size_type upper_c = csa.sigma,
42 lower_c = 0; // lower_c inclusive, upper_c exclusive
43 typename t_csa::size_type res = 0;
44 do {
45 res = (upper_c + lower_c) / 2;
46 if (i < csa.C[res]) { upper_c = res; }
47 else if (i >= csa.C[res + 1])
48 {
49 lower_c = res + 1;
50 }
51 } while (i < csa.C[res] or i >= csa.C[res + 1]); // i is not in the interval
52 return csa.comp2char[res];
53 }
54}
55
56// psi[] trait
57template <typename t_csa, bool t_direction>
59{
60 typedef typename t_csa::value_type value_type;
61 typedef typename t_csa::size_type size_type;
62 static value_type access(const t_csa & csa, size_type i) { return csa.psi[i]; }
63};
64
65// lf[] trait
66template <typename t_csa>
67struct traverse_csa_psi_trait<t_csa, false>
68{
69 typedef typename t_csa::value_type value_type;
70 typedef typename t_csa::size_type size_type;
71 static value_type access(const t_csa & csa, size_type i)
72 {
73 // TODO: in case of a very sparse sampling of SA it may be faster to
74 // use \sigma binary searches on PSI function to determine the
75 // LF values.
76 return csa.isa[(csa[i] + csa.size() - 1) % csa.size()];
77 }
78};
79
80template <typename t_csa, bool t_direction>
82{
83 public:
84 typedef typename t_csa::value_type value_type;
85 typedef typename t_csa::size_type size_type;
86 typedef typename t_csa::difference_type difference_type;
90
91 private:
92 const t_csa & m_csa;
93
94 public:
96 traverse_csa_psi(const t_csa & csa_psi)
97 : m_csa(csa_psi)
98 {}
101 : m_csa(tcsa.m_csa)
102 {}
103
105
108 {
109 assert(i < size());
111 }
112
114 size_type size() const { return m_csa.size(); }
115
117 size_type empty() const { return m_csa.empty(); }
118
119 const_iterator begin() const { return const_iterator(this, 0); }
120
122
125 const_iterator end() const { return const_iterator(this, size()); }
126};
127
128// psi[] trait
129template <typename t_csa, bool t_direction>
131{
132 typedef typename t_csa::value_type value_type;
133 typedef typename t_csa::size_type size_type;
134 static value_type access(const t_csa & csa, size_type i)
135 {
136 // \f$\Psi[i] = SA^{-1}[SA[i]+1 \mod n]\f$, where \f$n\f$ is the length of the suffix array SA
137 return csa.isa[(csa[i] + 1) % csa.size()];
138 }
139};
140
141// lf[] trait
142template <typename t_csa>
143struct traverse_csa_saisa_trait<t_csa, false>
144{
145 typedef typename t_csa::value_type value_type;
146 typedef typename t_csa::size_type size_type;
147 static value_type access(const t_csa & csa, size_type i)
148 {
149 // TODO: in case of a very sparse sampling of SA it may be faster to
150 // use \sigma binary searches on PSI function to determine the
151 // LF values.
152 return csa.isa[(csa[i] + csa.size() - 1) % csa.size()];
153 }
154};
155
158template <typename t_csa, bool t_direction>
160{
161 public:
162 typedef typename t_csa::value_type value_type;
163 typedef typename t_csa::size_type size_type;
164 typedef typename t_csa::difference_type difference_type;
168
169 private:
170 const t_csa & m_csa;
171
172 public:
174 traverse_csa_saisa(const t_csa & csa)
175 : m_csa(csa)
176 {}
177
178 // Copy constructor
180 : m_csa(tcsa.m_csa)
181 {}
182
184
189 {
190 assert(i < size());
192 }
193
195 size_type size() const { return m_csa.size(); }
196
198 size_type empty() const { return m_csa, empty(); }
199
201
204 const_iterator begin() const { return const_iterator(this, 0); }
205
207
210 const_iterator end() const { return const_iterator(this, size()); }
211};
212
214template <typename t_csa>
216{
217 public:
218 typedef typename t_csa::char_type value_type;
219 typedef typename t_csa::size_type size_type;
220 typedef typename t_csa::char_type char_type;
221 typedef typename t_csa::difference_type difference_type;
224 typedef typename t_csa::alphabet_category alphabet_category;
225
226 private:
227 const t_csa & m_csa; //<- pointer to the (compressed) suffix array that is based on the \f$\Psi\f$ function.
228 public:
230 bwt_of_csa_psi(const t_csa & csa)
231 : m_csa(csa)
232 {}
233
235
240 {
241 assert(i < size());
242 size_type pos = m_csa.lf[i];
243 return first_row_symbol(pos, m_csa);
244 }
245
247
254 size_type rank(size_type i, const char_type c) const { return m_csa.rank_bwt(i, c); }
255
257
264 size_type select(size_type i, const char_type c) const { return m_csa.select_bwt(i, c); }
265
267 size_type size() const { return m_csa.size(); }
268
270 size_type empty() const { return m_csa.empty(); }
271
273
276 const_iterator begin() const { return const_iterator(this, 0); }
277
279
282 const_iterator end() const { return const_iterator(this, size()); }
283};
284
285// psi[] trait
286template <typename t_csa, bool t_direction>
288{
289 typedef typename t_csa::value_type value_type;
290 typedef typename t_csa::char_type char_type;
291 typedef typename t_csa::size_type size_type;
292 static value_type access(const t_csa & csa, size_type i)
293 {
294 char_type c = csa.F[i];
295 return csa.wavelet_tree.select(i - csa.C[csa.char2comp[c]] + 1, c);
296 }
297};
298
299// lf[] trait
300template <typename t_csa>
301struct traverse_csa_wt_traits<t_csa, false>
302{
303 typedef typename t_csa::value_type value_type;
304 typedef typename t_csa::char_type char_type;
305 typedef typename t_csa::size_type size_type;
306 static value_type access(const t_csa & csa, size_type i)
307 {
308 typename t_csa::char_type c;
309 auto rc = csa.wavelet_tree.inverse_select(i);
310 size_type j = rc.first;
311 c = rc.second;
312 return csa.C[csa.char2comp[c]] + j;
313 }
314};
315
318template <typename t_csa, bool t_direction>
320{
321 public:
322 typedef typename t_csa::value_type value_type;
323 typedef typename t_csa::size_type size_type;
324 typedef typename t_csa::char_type char_type;
325 typedef typename t_csa::difference_type difference_type;
329
330 private:
331 const t_csa & m_csa; //<- pointer to the (compressed) suffix array that is based on a wavelet tree
332 traverse_csa_wt(){}; // disable default constructor
333 public:
335 traverse_csa_wt(const t_csa & csa_wt)
336 : m_csa(csa_wt)
337 {}
339
344 {
345 assert(i < m_csa.size());
347 }
348
350 size_type size() const { return m_csa.size(); }
352 size_type empty() const { return m_csa.empty(); }
354 const_iterator begin() const { return const_iterator(this, 0); }
356 const_iterator end() const { return const_iterator(this, size()); }
357};
358
359template <typename t_csa>
361{
362 public:
363 typedef const typename t_csa::char_type value_type;
364 typedef typename t_csa::size_type size_type;
365 typedef typename t_csa::char_type char_type;
366 typedef typename t_csa::difference_type difference_type;
369 typedef typename t_csa::alphabet_category alphabet_category;
370
371 private:
372 const t_csa & m_csa; //<- pointer to the (compressed) suffix array that is based on a wavelet tree
373 bwt_of_csa_wt(){}; // disable default constructor
374 public:
376 bwt_of_csa_wt(const t_csa & csa_wt)
377 : m_csa(csa_wt)
378 {}
380
385 {
386 assert(i < size());
387 return m_csa.wavelet_tree[i];
388 }
390 size_type size() const { return m_csa.size(); }
391
393
400 size_type rank(size_type i, const char_type c) const { return m_csa.rank_bwt(i, c); }
401
403
410 size_type select(size_type i, const char_type c) const { return m_csa.select(i, c); }
411
413 size_type empty() const { return m_csa.empty(); }
415 const_iterator begin() const { return const_iterator(this, 0); }
417 const_iterator end() const { return const_iterator(this, size()); }
418};
419
420template <typename t_csa>
422{
423 public:
424 typedef typename t_csa::value_type value_type;
425 typedef typename t_csa::size_type size_type;
426 typedef typename t_csa::difference_type difference_type;
430
431 private:
432 const t_csa & m_csa; //<- pointer to the (compressed) suffix array that is based on a wavelet tree
433 isa_of_csa_wt(){}; // disable default constructor
434 public:
436 isa_of_csa_wt(const t_csa & csa_wt)
437 : m_csa(csa_wt)
438 {}
439
441
444 {
445 assert(i < size());
446 auto sample = m_csa.isa_sample.sample_qeq(i);
447 value_type result = std::get<0>(sample);
448 if (std::get<1>(sample) < i) { i = std::get<1>(sample) + m_csa.size() - i; }
449 else
450 {
451 i = std::get<1>(sample) - i;
452 }
453 while (i--) { result = m_csa.lf[result]; }
454 return result;
455 }
456
458 size_type size() const { return m_csa.size(); }
460 size_type empty() const { return m_csa.empty(); }
462 const_iterator begin() const { return const_iterator(this, 0); }
464 const_iterator end() const { return const_iterator(this, size()); }
465};
466
467template <typename t_csa>
469{
470 public:
471 typedef typename t_csa::value_type value_type;
472 typedef typename t_csa::size_type size_type;
473 typedef typename t_csa::difference_type difference_type;
477
478 private:
479 const t_csa & m_csa; //<- pointer to the (compressed) suffix array that is based on a wavelet tree
480 isa_of_csa_psi(){}; // disable default constructor
481 public:
483 isa_of_csa_psi(const t_csa & csa_wt)
484 : m_csa(csa_wt)
485 {}
486
488
491 {
492 assert(i < size());
493 // get the rightmost sampled isa value to the left of i
494 auto sample = m_csa.isa_sample.sample_leq(i);
495 value_type result = std::get<0>(sample);
496 i = i - std::get<1>(sample);
497 while (i--) { result = m_csa.psi[result]; }
498 return result;
499 }
501 size_type size() const { return m_csa.size(); }
503 size_type empty() const { return m_csa.empty(); }
505 const_iterator begin() const { return const_iterator(this, 0); }
507 const_iterator end() const { return const_iterator(this, size()); }
508};
509
510template <typename t_csa>
512{
513 public:
514 typedef const typename t_csa::char_type value_type;
515 typedef typename t_csa::size_type size_type;
516 typedef typename t_csa::difference_type difference_type;
519 typedef typename t_csa::alphabet_category alphabet_category;
520
521 private:
522 const t_csa & m_csa;
523
524 public:
526 first_row_of_csa(const t_csa & csa)
527 : m_csa(csa)
528 {}
530
535 {
536 assert(i < size());
537 return first_row_symbol(i, m_csa);
538 }
540 size_type size() const { return m_csa.size(); }
542 size_type empty() const { return m_csa.empty(); }
544 const_iterator begin() const { return const_iterator(this, 0); }
546 const_iterator end() const { return const_iterator(this, size()); }
547};
548
549template <typename t_csa>
551{
552 public:
553 typedef typename t_csa::char_type value_type;
554 typedef typename t_csa::size_type size_type;
555 typedef typename t_csa::difference_type difference_type;
558 typedef typename t_csa::alphabet_category alphabet_category;
559
560 private:
561 const t_csa & m_csa;
562 text_of_csa() {}
563
564 public:
566 text_of_csa(const t_csa & csa)
567 : m_csa(csa)
568 {}
569
571
576 {
577 assert(i < size());
578 return first_row_symbol(m_csa.isa[i], m_csa);
579 }
580
582 size_type size() const { return m_csa.size(); }
583
585 size_type empty() const { return m_csa.empty(); }
586
588
591 const_iterator begin() const { return const_iterator(this, 0); }
592
594
597 const_iterator end() const { return const_iterator(this, size()); }
598};
599} // namespace sdsl
600
601#endif
A wrapper for the bwt of a compressed suffix array that is based on the function.
size_type size() const
Returns the size of the function.
value_type operator[](size_type i) const
Calculate the Burrows Wheeler Transform (BWT) at position i.
random_access_const_iterator< bwt_of_csa_psi > const_iterator
t_csa::alphabet_category alphabet_category
bwt_of_csa_psi(const t_csa &csa)
Constructor.
size_type rank(size_type i, const char_type c) const
Calculates how many symbols c are in the prefix [0..i-1].
size_type empty() const
Returns if the bwt is empty.
size_type select(size_type i, const char_type c) const
Calculates the position of the i-th c.
const_iterator end() const
Returns a const_iterator to the element after the last element.
const_iterator begin() const
Returns a const_iterator to the first element.
t_csa::difference_type difference_type
bwt_of_csa_wt(const t_csa &csa_wt)
Constructor.
size_type rank(size_type i, const char_type c) const
Calculates how many symbols c are in the prefix [0..i-1].
size_type select(size_type i, const char_type c) const
Calculates the position of the i-th c.
random_access_const_iterator< bwt_of_csa_wt > const_iterator
const t_csa::char_type value_type
size_type empty() const
Returns if the BWT function is empty.
size_type size() const
Returns the size of the BWT function.
const_iterator begin() const
Returns a const_iterator to the first element.
value_type operator[](size_type i) const
Calculate the Burrows Wheeler Transform (BWT) at position i.
t_csa::char_type char_type
t_csa::difference_type difference_type
t_csa::size_type size_type
t_csa::alphabet_category alphabet_category
const_iterator end() const
Returns a const_iterator to the element after the last element.
A class for the Compressed Suffix Array (CSA) based on a Wavelet Tree (WT) of the Burrow Wheeler Tran...
Definition: csa_wt.hpp:56
const_iterator end() const
Returns a const_iterator to the element after the last element.
random_access_const_iterator< first_row_of_csa > const_iterator
first_row_of_csa(const t_csa &csa)
Constructor.
const t_csa::char_type value_type
t_csa::alphabet_category alphabet_category
size_type empty() const
Returns if the F column is empty.
t_csa::difference_type difference_type
value_type operator[](size_type i) const
Calculate F[i].
const_iterator begin() const
Returns a const_iterator to the first element.
size_type size() const
Returns the size of the F column.
int_alphabet_tag alphabet_category
isa_of_csa_psi(const t_csa &csa_wt)
Constructor.
const_iterator begin() const
Returns a const_iterator to the first element.
size_type empty() const
Returns if the CSA is empty.
const_iterator end() const
Returns a const_iterator to the element after the last element.
size_type size() const
Returns the size of the CSA.
value_type operator[](size_type i) const
Access operator to ISA.
random_access_const_iterator< isa_of_csa_psi > const_iterator
t_csa::difference_type difference_type
t_csa::value_type value_type
size_type empty() const
Returns if the CSA is empty.
size_type size() const
Returns the size of the CSA.
t_csa::difference_type difference_type
const_iterator begin() const
Returns a const_iterator to the first element.
isa_of_csa_wt(const t_csa &csa_wt)
Constructor.
t_csa::size_type size_type
t_csa::value_type value_type
int_alphabet_tag alphabet_category
const_iterator end() const
Returns a const_iterator to the element after the last element.
value_type operator[](size_type i) const
Access operator to ISA.
random_access_const_iterator< isa_of_csa_wt > const_iterator
Generic iterator for a random access container.
Definition: iterators.hpp:24
value_type operator[](size_type i) const
Character at index of the original text.
size_type empty() const
Returns if text text has size 0.
t_csa::difference_type difference_type
t_csa::char_type value_type
t_csa::alphabet_category alphabet_category
text_of_csa(const t_csa &csa)
Constructor.
size_type size() const
Returns the size of the original text.
random_access_const_iterator< text_of_csa > const_iterator
const_iterator begin() const
Returns a const_iterator to the first element.
const_iterator end() const
Returns a const_iterator to the element after the last element.
t_csa::size_type size_type
random_access_const_iterator< traverse_csa_psi > const_iterator
t_csa::difference_type difference_type
size_type size() const
Returns the size of the function.
const_iterator begin() const
const_iterator end() const
Returns a const_iterator to the element after the last element.
size_type empty() const
Returns if the function is empty.
value_type operator[](size_type i) const
Calculate the or value at position i.
traverse_csa_psi(const traverse_csa_psi &tcsa)
Copy constructor.
int_alphabet_tag alphabet_category
t_csa::value_type value_type
traverse_csa_psi(const t_csa &csa_psi)
Constructor.
A helper class for the function for (compressed) suffix arrays which provide also the inverse suffix...
const_iterator end() const
Returns a const_iterator to the element after the last element.
const_iterator begin() const
Returns a const_iterator to the first element.
traverse_csa_saisa(const traverse_csa_saisa &tcsa)
size_type empty() const
Returns if the function is empty.
traverse_csa_saisa(const t_csa &csa)
Constructor.
size_type size() const
Returns the size of the function.
value_type operator[](size_type i) const
Calculate the value at position i.
t_csa::difference_type difference_type
random_access_const_iterator< traverse_csa_saisa > const_iterator
A wrapper class for the and LF function for (compressed) suffix arrays that are based on a wavelet t...
value_type operator[](size_type i) const
Calculate the value at position i.
t_csa::difference_type difference_type
t_csa::value_type value_type
const_iterator end() const
Returns a const_iterator to the element after the last element.
const_iterator begin() const
Returns a const_iterator to the first element.
traverse_csa_wt(const t_csa &csa_wt)
Constructor.
size_type size() const
Returns the size of the function.
int_alphabet_tag alphabet_category
random_access_const_iterator< traverse_csa_wt > const_iterator
size_type empty() const
Returns if the function is empty.
iterators.hpp contains an generic iterator for random access containers.
int_vector ::size_type size_type
Namespace for the succinct data structure library.
t_csa::char_type first_row_symbol(const typename t_csa::size_type i, const t_csa &csa)
Get the symbol at position i in the first row of the sorted suffixes of CSA.
static value_type access(const t_csa &csa, size_type i)
static value_type access(const t_csa &csa, size_type i)
static value_type access(const t_csa &csa, size_type i)
static value_type access(const t_csa &csa, size_type i)
static value_type access(const t_csa &csa, size_type i)
static value_type access(const t_csa &csa, size_type i)