8#ifndef INCLUDED_SDSL_CST_SCT3
9#define INCLUDED_SDSL_CST_SCT3
76template <
class t_csa = csa_wt<>,
77 class t_lcp = lcp_dac<>,
78 class t_bp_support = bp_support_sada<>,
79 class t_bv = bit_vector,
80 class t_rank =
typename std::conditional<std::is_same<t_bv, bit_vector>::value,
82 typename t_bv::rank_1_type>::type,
84 t_sel =
typename std::conditional<std::is_same<t_bv,
85 bit_vector>::value and std::is_same<
typename t_csa::alphabet_category,
86 byte_alphabet_tag>::value,
87 select_support_scan<>,
88 typename t_bv::select_1_type>::type>
91 static_assert(std::is_same<typename index_tag<t_csa>::type,
csa_tag>::value,
92 "First template argument has to be a compressed suffix array.");
100 typedef typename t_lcp::template type<cst_sct3>
lcp_type;
110 typedef typename t_csa::alphabet_type::sigma_type
sigma_type;
141 assert(m_bp[ckpos] == 0);
142 kpos = m_bp_support.find_open(ckpos);
143 return m_bp_support.rank(kpos) - 1;
155 if (v.cipos > v.jp1pos)
157 ckpos = v.jp1pos - 1;
163 assert(m_bp[ckpos] == 0);
166 kpos = m_bp_support.find_open(ckpos);
167 return m_bp_support.rank(kpos) - 1;
172 size_type r = ckpos - m_bp_support.rank(ckpos);
179 assert(m_bp[ckpos] == 0);
180 kpos = m_bp_support.find_open(ckpos);
181 return m_bp_support.rank(kpos) - 1;
186 if (kpos < m_bp.
size())
187 ckpos = m_bp_support.find_close(kpos);
223 size_type cipos = m_bp_support.find_close(ipos);
224 size_type result = m_bp_support.rank(cipos);
247 psvcpos = m_bp.
size() - 1;
252 psvpos = m_bp_support.enclose(ipos);
253 psvcpos = m_bp_support.find_close(psvpos);
254 return m_bp_support.rank(psvpos) - 1;
257 size_type r0 = cipos - m_bp_support.rank(cipos);
259 const uint64_t * p = m_first_child.data() + (r0 >> 6);
260 uint64_t w = (*p) >> (r0 & 0x3F);
263 next_first_child = cipos +
bits::lo(w);
264 if (cipos == next_first_child and m_bp[next_first_child + 1])
266 psvpos = m_bp_support.enclose(ipos);
267 psvcpos = m_bp_support.find_close(psvpos);
268 return m_bp_support.rank(psvpos) - 1;
276 while (!(w = *p) and steps-- > 0)
281 if (w != 0) { delta +=
bits::lo(w) + 1; }
284 auto pos = m_first_child_select(m_first_child_rank(r0 + 1) + 1);
287 next_first_child = cipos + delta;
289 if (!m_bp[next_first_child + 1])
291 psvcpos = next_first_child + 1;
292 psvpos = m_bp_support.find_open(psvcpos);
293 return m_bp_support.rank(psvpos) - 1;
297 psvpos = m_bp_support.enclose(m_bp_support.find_open(next_first_child));
298 psvcpos = m_bp_support.find_close(psvpos);
299 return m_bp_support.rank(psvpos) - 1;
309 size_type i = m_bp_support.select(l + 1);
310 size_type j = m_bp_support.select(r + 1);
311 size_type fc_i = m_bp_support.find_close(i);
318 size_type ec = m_bp_support.rr_enclose(i, j);
319 if (ec == m_bp_support.size())
325 return m_bp_support.rank(ec) - 1;
358 , m_bp_support(cst.m_bp_support)
359 , m_first_child(cst.m_first_child)
360 , m_first_child_rank(cst.m_first_child_rank)
361 , m_first_child_select(cst.m_first_child_select)
362 , m_nodes(cst.m_nodes)
365 m_bp_support.set_vector(&m_bp);
366 m_first_child_rank.set_vector(&m_first_child);
367 m_first_child_select.set_vector(&m_first_child);
375 : m_csa(std::move(cst.m_csa))
376 , m_bp(std::move(cst.m_bp))
377 , m_bp_support(std::move(cst.m_bp_support))
378 , m_first_child(std::move(cst.m_first_child))
379 , m_first_child_rank(std::move(cst.m_first_child_rank))
380 , m_first_child_select(std::move(cst.m_first_child_select))
381 , m_nodes(cst.m_nodes)
384 m_bp_support.set_vector(&m_bp);
385 m_first_child_rank.set_vector(&m_first_child);
386 m_first_child_select.set_vector(&m_first_child);
407 bool empty()
const {
return m_csa.empty(); }
415 if (0 == m_bp.
size())
423 if (0 == m_bp.
size() and
root() == v)
return end();
443 if (0 == m_bp.
size())
460 *
this = std::move(tmp);
473 m_csa = std::move(cst.m_csa);
475 m_bp = std::move(cst.m_bp);
476 m_bp_support = std::move(cst.m_bp_support);
477 m_bp_support.set_vector(&m_bp);
478 m_first_child = std::move(cst.m_first_child);
479 m_first_child_rank = std::move(cst.m_first_child_rank);
480 m_first_child_rank.set_vector(&m_first_child);
481 m_first_child_select = std::move(cst.m_first_child_select);
482 m_first_child_select.set_vector(&m_first_child);
483 m_nodes = std::move(cst.m_nodes);
497 void load(std::istream & in);
500 template <
typename archive_t>
504 template <
typename archive_t>
510 return (m_csa == other.m_csa) && (m_lcp == other.m_lcp) && (m_bp == other.m_bp) &&
511 (m_bp_support == other.m_bp_support) && (m_first_child == other.m_first_child) &&
512 (m_first_child_rank == other.m_first_child_rank) &&
513 (m_first_child_select == other.m_first_child_select) ;
548 assert(i > 0 and i <=
size());
552 jp1pos = m_bp.
size();
553 else if (m_bp[ipos + 1])
556 jp1pos = m_bp_support.select(i + 1);
557 return node_type(i - 1, i - 1, ipos, m_bp_support.find_close(ipos), jp1pos);
614 size_type psv_pos, psv_cpos, psv_v, nsv_v, nsv_p1pos;
615 psv_v = psv(v.
j + 1, v.
jp1pos, m_bp_support.find_close(v.
jp1pos), psv_pos, psv_cpos);
616 nsv_v = nsv(v.
j + 1, v.
jp1pos) - 1;
617 if (nsv_v ==
size() - 1) { nsv_p1pos = m_bp.
size(); }
620 nsv_p1pos = m_bp_support.select(nsv_v + 2);
622 return node_type(psv_v, nsv_v, psv_pos, psv_cpos, nsv_p1pos);
627 psv_v = psv(v.
i, v.
ipos, v.
cipos, psv_pos, psv_cpos);
661 bool last_child = m_bp[cjp1posm1];
665 size_type first_child_idx = cjp1posm1 - m_bp_support.rank(cjp1posm1);
666 last_child = m_first_child[first_child_idx];
672 if (nsv_v ==
size() - 1) { nsv_p1pos = m_bp.
size(); }
675 nsv_p1pos = m_bp_support.select(nsv_v + 2);
681 size_type new_j = m_bp_support.rank(m_bp_support.find_open(cjp1posm1)) - 2;
685 m_bp_support.find_close(v.
jp1pos),
686 m_bp_support.select(new_j + 2));
708 size_type k = 0, kpos = 0, k_find_close = 0;
710 k = select_l_index(v, 1, kpos, k_find_close);
716 k1 = select_l_index(v, i - 1, kpos1, k_find_close1);
717 if (k1 == v.
j + 1)
return root();
719 k2 = select_l_index(v, i, kpos2, k_find_close2);
720 return node_type(k1, k2 - 1, kpos1, k_find_close1, kpos2);
737 size_type r = closing_pos_of_first_l_index(v);
739 const uint64_t * p = m_first_child.data() + (r0 >> 6);
740 uint8_t offset = r0 & 0x3F;
747 else if (m_first_child.data() == p)
756 while (p > m_first_child.data() and steps-- > 0)
767 auto goal_rank = m_first_child_rank(r0);
768 if (goal_rank == 0) {
return r0 + 2; }
771 return r0 - m_first_child_select(goal_rank) + 1;
793 if (cc == 0 and c != 0)
795 size_type char_ex_max_pos = m_csa.C[((
size_type)1) + cc], char_inc_min_pos = m_csa.C[cc];
801 if (char_pos >= char_ex_max_pos)
806 else if (char_pos >= char_inc_min_pos)
815 if (char_pos < char_inc_min_pos)
820 else if (char_pos < char_ex_max_pos)
826 size_type l_bound = 2, r_bound = child_cnt, mid, kpos, ckpos, l_index;
827 while (l_bound < r_bound)
829 mid = (l_bound + r_bound) >> 1;
831 l_index = select_l_index(v, mid - 1, kpos, ckpos);
834 if (char_inc_min_pos > char_pos) { l_bound = mid + 1; }
835 else if (char_ex_max_pos <= char_pos)
843 size_type lp1_index = m_bp_support.rank(m_bp_support.find_open(ckpos - 1)) - 1;
845 if (lp1_index - 1 <
size() - 1) { jp1pos = m_bp_support.select(lp1_index + 1); }
846 return node_type(l_index, lp1_index - 1, kpos, ckpos, jp1pos);
857 return child(v, c, char_pos);
872 assert(d <=
depth(v));
875 while (c_begin < c_end)
877 mid = (c_begin + c_end) >> 1;
878 if (m_csa.C[mid] <= order) { c_begin = mid + 1; }
884 return m_csa.comp2char[c_begin - 1];
906 size_type min_index_pos = m_bp_support.select(min_index + 1);
907 size_type min_index_cpos = m_bp_support.find_close(min_index_pos);
909 if (min_index_cpos >= (m_bp.
size() - m_csa.sigma))
913 size_type new_j = nsv(min_index, min_index_pos) - 1;
915 size_type new_i = psv(min_index, min_index_pos, min_index_cpos, new_ipos, new_icpos);
917 if (new_j <
size() - 1) { jp1pos = m_bp_support.select(new_j + 2); }
918 return node_type(new_i, new_j, new_ipos, new_icpos, jp1pos);
931 if (v ==
root()) {
return 0; }
934 return size() - m_csa[v.
i];
939 size_type l = select_l_index(v, 1, kpos, ckpos);
981 if (v.
i == 0 and v.
j == 0)
989 size_type min_index_pos = m_bp_support.select(min_index + 1);
990 size_type min_index_cpos = m_bp_support.find_close(min_index_pos);
991 if (min_index_cpos >= (m_bp.
size() - m_csa.sigma))
995 size_type new_j = nsv(min_index, min_index_pos) - 1;
997 size_type new_i = psv(min_index, min_index_pos, min_index_cpos, new_ipos, new_icpos);
999 if (new_j <
size() - 1) { jp1pos = m_bp_support.select(new_j + 2); }
1000 return node_type(new_i, new_j, new_ipos, new_icpos, jp1pos);
1015 size_type c_right = m_csa.bwt.rank(v.
j + 1, c);
1016 if (c_left == c_right)
1018 if (c_left + 1 == c_right)
1019 return select_leaf(m_csa.C[m_csa.char2comp[c]] + c_left + 1);
1022 size_type left = m_csa.C[m_csa.char2comp[c]] + c_left;
1023 size_type right = m_csa.C[m_csa.char2comp[c]] + c_right - 1;
1024 assert(left < right);
1026 size_type ipos = m_bp_support.select(left + 1);
1028 if (right <
size() - 1) { jp1pos = m_bp_support.select(right + 2); }
1029 return node_type(left, right, ipos, m_bp_support.find_close(ipos), jp1pos);
1065 ckpos = v.
cipos - 1;
1067 assert(m_bp[ckpos] == 0);
1068 size_type r0ckpos = ckpos - m_bp_support.rank(ckpos);
1069 return size() + m_first_child_rank(r0ckpos);
1092 id =
id -
size() + 1;
1098 size_type arg = m_first_child_rank(mid);
1099 if (arg <
id) {
lb = mid; }
1116 size_type arg = mid - m_bp_support.rank(mid - 1);
1117 if (arg < r0ckpos + 1) {
lb = mid; }
1125 if (ckpos == m_bp.
size() - 1) {
return root(); }
1126 if (m_bp[ckpos + 1])
1129 size_type j = m_bp_support.rank(jp1pos - 1) - 1;
1130 size_type kpos = m_bp_support.find_open(ckpos);
1131 size_type ipos = m_bp_support.enclose(kpos);
1132 size_type cipos = m_bp_support.find_close(ipos);
1133 size_type i = m_bp_support.rank(ipos - 1);
1134 return node_type(i, j, ipos, cipos, jp1pos);
1139 size_type ipos = m_bp_support.find_open(cipos);
1140 size_type i = m_bp_support.rank(ipos - 1);
1143 if (j !=
size() - 1) { jp1pos = m_bp_support.select(j + 2); }
1144 return node_type(i, j, ipos, cipos, jp1pos);
1163 if (
rb ==
size() - 1) { jp1pos = m_bp.
size(); }
1166 jp1pos = m_bp_support.select(
rb + 2);
1168 return node_type(
lb,
rb, ipos, m_bp_support.find_close(ipos), jp1pos);
1178 size_type ipos = m_bp_support.select(i + 1);
1179 size_type cipos = m_bp_support.find_close(ipos);
1180 return m_first_child_rank.rank(((ipos + cipos - 1) >> 1) - i);
1187template <
class t_csa,
class t_lcp,
class t_bp_support,
class t_bv,
class t_rank,
class t_sel>
1194 m_nodes += m_bp.
size() / 2;
1195 if (m_bp.
size() == 2)
1202 util::init_support(m_bp_support, &m_bp);
1203 util::init_support(m_first_child_rank, &m_first_child);
1204 util::init_support(m_first_child_select, &m_first_child);
1206 if (!build_only_bps)
1213 if (!build_only_bps)
1220template <
class t_csa,
class t_lcp,
class t_bp_support,
class t_bv,
class t_rank,
class t_sel>
1227 written_bytes += m_csa.serialize(out, child,
"csa");
1228 written_bytes += m_lcp.serialize(out, child,
"lcp");
1229 written_bytes += m_bp.
serialize(out, child,
"bp");
1230 written_bytes += m_bp_support.serialize(out, child,
"bp_support");
1231 written_bytes += m_first_child.serialize(out, child,
"mark_child");
1232 written_bytes += m_first_child_rank.serialize(out, child,
"mark_child_rank");
1233 written_bytes += m_first_child_select.serialize(out, child,
"mark_child_select");
1234 written_bytes +=
write_member(m_nodes, out, child,
"node_cnt");
1236 return written_bytes;
1239template <
class t_csa,
class t_lcp,
class t_bp_support,
class t_bv,
class t_rank,
class t_sel>
1245 m_bp_support.load(in, &m_bp);
1246 m_first_child.load(in);
1247 m_first_child_rank.load(in, &m_first_child);
1248 m_first_child_select.load(in, &m_first_child);
1252template <
class t_csa,
class t_lcp,
class t_bp_support,
class t_bv,
class t_rank,
class t_sel>
1253template <
typename archive_t>
1266template <
class t_csa,
class t_lcp,
class t_bp_support,
class t_bv,
class t_rank,
class t_sel>
1267template <
typename archive_t>
1275 m_bp_support.set_vector(&m_bp);
1278 m_first_child_rank.set_vector(&m_first_child);
1280 m_first_child_select.set_vector(&m_first_child);
1284template <
class t_
int>
1309 if (
i != interval.
i)
return i < interval.
i;
1310 return j < interval.
j;
1330template <
class t_
int>
1333 os <<
"-[" << interval.
i <<
"," << interval.
j <<
"](" << interval.
ipos <<
"," << interval.
cipos <<
","
1334 << interval.
jp1pos <<
")";
bp_support.hpp contains several classed which support find_open, find_close, enclose and rr-enclose q...
A forward iterator for a bottom up traversal of a suffix tree.
An forward iterator for (compressed) suffix trees.
A class for the Compressed Suffix Tree (CST) proposed by Ohlebusch and Gog.
t_csa::alphabet_type::sigma_type sigma_type
size_type id(const node_type &v) const
Computes a unique identification number for a node of the suffx tree in the range [0....
node_type parent(const node_type &v) const
Calculate the parent node of a node v.
node_type lca(node_type v, node_type w) const
Calculate the LCA of two nodes v and w
const_iterator begin() const
Returns a const_iterator to the first element of a depth first traversal of the tree.
const_bottom_up_iterator end_bottom_up() const
Returns an iterator to the element after the last element of a bottom-up traversal of the tree.
node_type rightmost_leaf(const node_type &v) const
Calculates the rightmost leaf in the subtree rooted at node v.
size_type size(const node_type &v) const
Calculate the number of leaves in the subtree rooted at node v.
node_type wl(const node_type &v, const char_type c) const
Compute the Weiner link of node v and character c.
const sel_type & first_child_select
t_bp_support bp_support_type
static size_type max_size()
Returns the largest size that cst_sct3 can ever have.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Serialise (load) via cereal.
node_type select_leaf(size_type i) const
Return the i-th leaf (1-based from left to right).
node_type root() const
Return the root of the suffix tree.
node_type inv_id(size_type id)
Computes the node for such that id(v)=id.
size_type node_depth(node_type v) const
Returns the node depth of node v.
const_bottom_up_iterator begin_bottom_up() const
Returns an iterator to the first element of a bottom-up traversal of the tree.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serialize to a stream.
node_type node(size_type lb, size_type rb) const
Get the node in the suffix tree which corresponds to the lcp-interval [lb..rb].
size_type nodes() const
Get the number of nodes of the suffix tree.
cst_bottom_up_const_forward_iterator< cst_sct3 > const_bottom_up_iterator
const_iterator begin(const node_type &v) const
Returns a const_iterator to the first element of a depth first traversal of the subtree rooted at nod...
t_csa::string_type string_type
const bv_type & first_child_bv
bool empty() const
Returns if the data structure is empty.
cst_sct3(cst_sct3 &&cst)
Move constructor.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Serialise (save) via cereal.
t_csa::char_type char_type
bool operator==(cst_sct3 const &other) const noexcept
Equality operator.
bool operator!=(cst_sct3 const &other) const noexcept
Inequality operator.
size_type size() const
Number of leaves of the suffix tree.
node_type child(const node_type &v, const char_type c) const
Get the child w of node v which edge label (v,w) starts with character c.
const rank_type & first_child_rank
const bp_support_type & bp_support
char_type edge(const node_type &v, size_type d) const
Returns the d-th character (1-based indexing) of the edge-label pointing to v.
node_type sl(const node_type &v) const
Compute the suffix link of node v.
node_type child(const node_type &v, const char_type c, size_type &char_pos) const
Get the child w of node v which edge label (v,w) starts with character c.
bool is_leaf(const node_type &v) const
Decide if a node is a leaf.
const_iterator end(const node_type &v) const
Returns a const_iterator to the element past the end of a depth first traversal of the subtree rooted...
const_iterator end() const
Returns a const_iterator to the element after the last element of a depth first traversal of the tree...
cst_dfs_const_forward_iterator< cst_sct3 > const_iterator
t_csa::size_type size_type
bp_interval< size_type > node_type
Type for the nodes in the tree.
node_type sibling(const node_type &v) const
Returns the next sibling of node v.
node_type leftmost_leaf(const node_type &v) const
Calculates the leftmost leaf in the subtree rooted at node v.
size_type depth(const node_type &v) const
Returns the string depth of node v.
cst_sct3 & operator=(const cst_sct3 &cst)
Assignment Operator.
cst_sct3 & operator=(cst_sct3 &&cst)
Assignment Move Operator.
t_lcp::template type< cst_sct3 > lcp_type
ptrdiff_t difference_type
cst_node_child_proxy< cst_sct3 > children(const node_type &v) const
Return a proxy object which allows iterating over the children of a node.
size_type sn(const node_type &v) const
Computes the suffix number of a leaf node v.
size_type rb(const node_type &v) const
Calculates the index of the rightmost leaf in the corresponding suffix array.
cst_sct3(const cst_sct3 &cst)
Copy constructor.
t_csa::alphabet_category alphabet_category
t_csa::alphabet_type::comp_char_type comp_char_type
void load(std::istream &in)
Load from a stream.
node_type select_child(const node_type &v, size_type i) const
Get the i-th child of a node v.
size_type lb(const node_type &v) const
Calculates the index of the leftmost leaf in the corresponding suffix array.
size_type tlcp_idx(size_type i) const
Maps an index i to the position in TLCP where LCP[i] can be found.
size_type degree(const node_type &v) const
Get the number of children of a node v.
cst_sct3()=default
Default constructor.
void load(std::istream &in)
Load the int_vector for a stream.
size_type size() const noexcept
The number of elements in the int_vector.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
static mm_event_proxy event(const std::string &name)
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
static void add_size(structure_tree_node *v, uint64_t value)
construct.hpp contains methods to construct indexes (compressed suffix arrays and trees).
csa_wt.hpp contains an implementation of the compressed suffix array based on a wavelet tree.
cst_iterators.hpp contains iterator classes for traversing (compressed) suffix arrays.
int_vector.hpp contains the sdsl::int_vector class.
iterators.hpp contains an generic iterator for random access containers.
lcp.hpp contains classes for lcp information.
int_vector ::size_type size_type
Namespace for the succinct data structure library.
std::string cache_file_name(const std::string &key, const cache_config &config)
Returns the file name of the resource.
t_csa::size_type get_char_pos(typename t_csa::size_type idx, typename t_csa::size_type d, const t_csa &csa)
bool load_from_cache(T &v, const std::string &key, const cache_config &config, bool add_type_hash=false)
void move_lcp(t_lcp &&lcp, t_lcp &&lcp_c, const t_cst &cst)
void swap(int_vector_reference< t_int_vector > x, int_vector_reference< t_int_vector > y) noexcept
void copy_lcp(t_lcp &lcp, const t_lcp &lcp_c, const t_cst &cst)
bit_vector::size_type construct_supercartesian_tree_bp_succinct_and_first_child(int_vector_buffer< t_width > &lcp_buf, bit_vector &bp, bit_vector &bp_fc, const bool minimum=true)
Calculate the balanced parentheses of the Super-Cartesian tree, described in Ohlebusch and Gog (SPIRE...
void construct_lcp(t_lcp &lcp, const t_cst &cst, cache_config &config)
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
void read_member(T &t, std::istream &in)
void set_lcp_pointer(t_lcp &lcp, const t_cst &cst)
void load_lcp(t_lcp &lcp, std::istream &in, const t_cst &cst)
std::ostream & operator<<(std::ostream &os, const bp_interval< t_int > &interval)
rank_support.hpp contains classes that support a sdsl::bit_vector with constant time rank information...
Contains declarations and definitions of data structure concepts.
select_support.hpp contains classes that support a sdsl::bit_vector with constant time select informa...
static constexpr uint32_t lo(uint64_t x)
Calculates the position of the rightmost 1-bit in the 64bit integer x if it exists.
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
static constexpr uint64_t lo_set[65]
lo_set[i] is a 64-bit word with the i least significant bits set and the high bits not set.
t_int i
The left border of the lcp-interval .
bp_interval & operator=(bp_interval &&interval)=default
Move assignment.
bp_interval(t_int i=0, t_int j=0, t_int ipos=0, t_int cipos=0, t_int jp1pos=0)
Constructor.
bp_interval(const bp_interval &iv)=default
Copy constructor.
t_int j
The right border of the lcp-interval .
bool operator!=(const bp_interval &interval) const
Inequality operator.
bool operator==(const bp_interval &interval) const
Equality operator.
bp_interval & operator=(const bp_interval &interval)=default
Assignment operator.
bool operator<(const bp_interval &interval) const
bp_interval(bp_interval &&iv)=default
Move copy constructor.
Helper class for construction process.
suffix_tree_algorithm.hpp contains algorithms on CSTs
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.