8#ifndef INCLUDED_SDSL_SUFFIX_TREE_ALGORITHM
9#define INCLUDED_SDSL_SUFFIX_TREE_ALGORITHM
34 typename t_cst::node_type & v,
36 const typename t_cst::char_type c,
39 typename std::enable_if<std::is_same<cst_tag, typename t_cst::index_category>::value,
cst_tag>::type x =
42 auto cc = cst.csa.char2comp[c];
43 if (cc == 0 and cc != c)
48 char_pos = cst.csa.psi[char_pos];
49 if (char_pos < cst.csa.C[cc] or char_pos >= cst.csa.C[cc + 1])
return 0;
52 else if (d == depth_node)
54 v = cst.child(v, c, char_pos);
78template <
class t_cst,
class t_pat_iter>
81 typename t_cst::node_type & v,
87 typename std::enable_if<std::is_same<cst_tag, typename t_cst::index_category>::value,
cst_tag>::type x =
90 if (begin == end)
return cst.size(v);
92 t_pat_iter it = begin;
114template <
class t_cst,
class t_pat_iter>
117 return count(cst.csa, begin, end);
135template <
class t_cst,
class t_pat_iter,
class t_rac =
int_vector<64>>
140 typename std::enable_if<std::is_same<cst_tag, typename t_cst::index_category>::value,
cst_tag>::type x =
143 return locate(cst.csa, begin, end);
157template <
class t_cst,
class t_text_iter>
159 const typename t_cst::node_type & v,
162 typename std::enable_if<std::is_same<cst_tag, typename t_cst::index_category>::value,
173 extract(cst.csa, begin, begin + cst.depth(v) - 1, text);
185template <
class t_cst>
186typename t_cst::csa_type::string_type
188 const typename t_cst::node_type & v,
190 typename std::enable_if<std::is_same<cst_tag, typename t_cst::index_category>::value,
cst_tag>::type x =
193 typedef typename t_cst::csa_type::string_type t_rac;
194 if (v == cst.root()) {
return t_rac{}; }
198 return extract(cst.csa, begin, begin + cst.depth(v) - 1);
208template <
class t_cst>
209double H0(
const typename t_cst::node_type & v,
const t_cst & cst)
211 if (cst.is_leaf(v)) {
return 0; }
215 auto n = cst.size(v);
216 for (
const auto & child : cst.children(v))
218 double p = ((double)cst.size(child)) / n;
231template <
class t_cst>
236 std::set<typename t_cst::size_type> leafs_with_d_smaller_k;
239 leafs_with_d_smaller_k.insert(cst.csa.isa[cst.csa.size() - d]);
241 for (
typename t_cst::const_iterator it = cst.begin(), end = cst.end(); it != end; ++it)
245 if (!cst.is_leaf(*it))
250 if (d == k) { hk += cst.
size(*it) *
H0(*it, cst); }
258 if (leafs_with_d_smaller_k.find(cst.lb(*it)) == leafs_with_d_smaller_k.end()) { ++context; }
263 return { hk, context };
size_type size() const noexcept
The number of elements in the int_vector.
int_vector ::size_type size_type
Namespace for the succinct data structure library.
std::pair< double, size_t > Hk(const t_cst &cst, typename t_cst::size_type k)
Calculate the k-th order entropy of a text.
uint64_t count(const t_k2_treap &treap, k2_treap_ns::point_type p1, k2_treap_ns::point_type p2)
Count how many points are in the rectangle (p1,p2)
t_csa::size_type forward_search(const t_csa &csa, typename t_csa::size_type l, typename t_csa::size_type r, t_pat_iter begin, t_pat_iter end, typename t_csa::size_type &l_res, typename t_csa::size_type &r_res, SDSL_UNUSED typename std::enable_if< std::is_same< csa_tag, typename t_csa::index_category >::value, csa_tag >::type x=csa_tag())
Forward search for a pattern in an -interval in the CSA.
t_rac locate(const t_csa &csa, t_pat_iter begin, t_pat_iter end, SDSL_UNUSED typename std::enable_if< std::is_same< csa_tag, typename t_csa::index_category >::value, csa_tag >::type x=csa_tag())
Calculates all occurrences of a pattern pat in a CSA.
t_csa::size_type extract(const t_csa &csa, typename t_csa::size_type begin, typename t_csa::size_type end, t_text_iter text, SDSL_UNUSED typename std::enable_if< std::is_same< csa_tag, typename t_csa::index_category >::value, csa_tag >::type x=csa_tag())
Writes the substring T[begin..end] of the original text T to text[0..end-begin+1].
double H0(const typename t_cst::node_type &v, const t_cst &cst)
Calculate the zeroth order entropy of the text that follows a certain substring s.
int_vector ::size_type size(const range_type &r)
Size of a range.
suffix_array_algorithm.hpp contains algorithms on CSAs