SDSL 3.0.1
Succinct Data Structure Library
lcp_byte.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
8#ifndef INCLUDED_SDSL_LCP_BYTE
9#define INCLUDED_SDSL_LCP_BYTE
10
11#include <algorithm> // for lower_bound
12#include <cassert>
13#include <iomanip>
14#include <iostream>
15#include <iterator>
16#include <utility> // for pair
17#include <vector>
18
19#include <sdsl/int_vector.hpp>
20#include <sdsl/iterators.hpp>
21#include <sdsl/lcp.hpp>
22
23namespace sdsl
24{
25
27
38template <uint8_t t_width = 0>
40{
41 public:
48 typedef const pointer const_pointer;
50 typedef ptrdiff_t difference_type;
51
54
55 enum
56 {
59 sa_order = 1
60 }; // as the lcp_byte is not fast for texts with long repetition
61
62 template <class Cst>
63 using type = lcp_byte;
64
65 private:
66 int_vector<8> m_small_lcp; // vector for LCP values < 255
67 int_vector<t_width> m_big_lcp; // vector for LCP values > 254
68 int_vector<t_width> m_big_lcp_idx; // index of LCP entries in the LCP array
69
70 typedef std::pair<size_type, size_type> tPII;
71 typedef std::vector<tPII> tVPII;
72
73 public:
75 lcp_byte() = default;
76 lcp_byte(const lcp_byte &) = default;
77 lcp_byte(lcp_byte &&) = default;
78 lcp_byte & operator=(const lcp_byte &) = default;
79 lcp_byte & operator=(lcp_byte &&) = default;
80
83 {
84 std::string lcp_file = cache_file_name(conf::KEY_LCP, config);
85 int_vector_buffer<> lcp_buf(lcp_file);
86 m_small_lcp = int_vector<8>(lcp_buf.size());
87 size_type l = 0, max_l = 0, max_big_idx = 0, big_sum = 0;
88
89 for (size_type i = 0; i < m_small_lcp.size(); ++i)
90 {
91 if ((l = lcp_buf[i]) < 255) { m_small_lcp[i] = l; }
92 else
93 {
94 m_small_lcp[i] = 255;
95 if (l > max_l) max_l = l;
96 max_big_idx = i;
97 ++big_sum;
98 }
99 }
100 m_big_lcp = int_vector<>(big_sum, 0, bits::hi(max_l) + 1);
101 m_big_lcp_idx = int_vector<>(big_sum, 0, bits::hi(max_big_idx) + 1);
102
103 for (size_type i = 0, ii = 0; i < m_small_lcp.size(); ++i)
104 {
105 if ((l = lcp_buf[i]) >= 255)
106 {
107 m_big_lcp[ii] = l;
108 m_big_lcp_idx[ii] = i;
109 ++ii;
110 }
111 }
112 }
113
115 size_type size() const { return m_small_lcp.size(); }
116
119
121 bool empty() const { return m_small_lcp.empty(); }
122
124 const_iterator begin() const { return const_iterator(this, 0); }
125
127 const_iterator end() const { return const_iterator(this, size()); }
128
130
134 {
135 if (m_small_lcp[i] != 255) { return m_small_lcp[i]; }
136 else
137 {
138 size_type idx = std::lower_bound(m_big_lcp_idx.begin(), m_big_lcp_idx.end(), i) - m_big_lcp_idx.begin();
139 return m_big_lcp[idx];
140 }
141 }
142
144 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
145 {
146 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
147 size_type written_bytes = 0;
148 written_bytes += m_small_lcp.serialize(out, child, "small_lcp");
149 written_bytes += m_big_lcp.serialize(out, child, "large_lcp");
150 written_bytes += m_big_lcp_idx.serialize(out, child, "large_lcp_idx");
151 structure_tree::add_size(child, written_bytes);
152 return written_bytes;
153 }
154
156 void load(std::istream & in)
157 {
158 m_small_lcp.load(in);
159 m_big_lcp.load(in);
160 m_big_lcp_idx.load(in);
161 }
162
163 template <typename archive_t>
164 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
165 {
166 ar(CEREAL_NVP(m_small_lcp));
167 ar(CEREAL_NVP(m_big_lcp));
168 ar(CEREAL_NVP(m_big_lcp_idx));
169 }
170
171 template <typename archive_t>
172 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
173 {
174 ar(CEREAL_NVP(m_small_lcp));
175 ar(CEREAL_NVP(m_big_lcp));
176 ar(CEREAL_NVP(m_big_lcp_idx));
177 }
178
180 bool operator==(lcp_byte const & other) const noexcept
181 {
182 return (m_small_lcp == other.m_small_lcp) && (m_big_lcp == other.m_big_lcp) &&
183 (m_big_lcp_idx == other.m_big_lcp_idx);
184 }
185
187 bool operator!=(lcp_byte const & other) const noexcept { return !(*this == other); }
188};
189
190} // end namespace sdsl
191#endif
#define CEREAL_NVP(X)
Definition: cereal.hpp:31
uint64_t size() const
Returns the number of elements currently stored.
A generic vector class for integers of width .
Definition: int_vector.hpp:216
iterator end() noexcept
Iterator that points to the element after the last element of int_vector.
Definition: int_vector.hpp:764
bool empty() const noexcept
Equivalent to size() == 0.
Definition: int_vector.hpp:500
void load(std::istream &in)
Load the int_vector for a stream.
size_type size() const noexcept
The number of elements in the int_vector.
int_vector_trait< t_width >::value_type value_type
Definition: int_vector.hpp:221
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
static size_type max_size() noexcept
Maximum size of the int_vector.
Definition: int_vector.hpp:542
iterator begin() noexcept
Iterator that points to the first element of the int_vector.
Definition: int_vector.hpp:759
A class for a simple compressed version of LCP information.
Definition: lcp_byte.hpp:40
lcp_tag index_tag
Definition: lcp_byte.hpp:53
lcp_byte(cache_config &config)
Constructor.
Definition: lcp_byte.hpp:82
const_iterator end() const
Returns a const_iterator to the element after the last element.
Definition: lcp_byte.hpp:127
lcp_byte(const lcp_byte &)=default
void load(std::istream &in)
Load from a stream.
Definition: lcp_byte.hpp:156
int_vector< t_width >::value_type value_type
Definition: lcp_byte.hpp:42
lcp_byte()=default
Default Constructor.
lcp_byte & operator=(lcp_byte &&)=default
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
Definition: lcp_byte.hpp:172
const_reference reference
Definition: lcp_byte.hpp:46
lcp_byte & operator=(const lcp_byte &)=default
int_vector ::size_type size_type
Definition: lcp_byte.hpp:49
bool operator==(lcp_byte const &other) const noexcept
Equality operator.
Definition: lcp_byte.hpp:180
random_access_const_iterator< lcp_byte > const_iterator
Definition: lcp_byte.hpp:43
value_type operator[](size_type i) const
[]-operator
Definition: lcp_byte.hpp:133
bool operator!=(lcp_byte const &other) const noexcept
Inequality operator.
Definition: lcp_byte.hpp:187
ptrdiff_t difference_type
Definition: lcp_byte.hpp:50
lcp_byte(lcp_byte &&)=default
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serialize to a stream.
Definition: lcp_byte.hpp:144
const_reference * pointer
Definition: lcp_byte.hpp:47
const value_type const_reference
Definition: lcp_byte.hpp:45
lcp_plain_tag lcp_category
Definition: lcp_byte.hpp:52
const_iterator iterator
Definition: lcp_byte.hpp:44
const pointer const_pointer
Definition: lcp_byte.hpp:48
size_type size() const
Number of elements in the instance.
Definition: lcp_byte.hpp:115
static size_type max_size()
Returns the largest size that lcp_byte can ever have.
Definition: lcp_byte.hpp:118
const_iterator begin() const
Returns a const_iterator to the first element.
Definition: lcp_byte.hpp:124
bool empty() const
Returns if the data strucutre is empty.
Definition: lcp_byte.hpp:121
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
Definition: lcp_byte.hpp:164
Generic iterator for a random access container.
Definition: iterators.hpp:24
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
static void add_size(structure_tree_node *v, uint64_t value)
int_vector.hpp contains the sdsl::int_vector class.
iterators.hpp contains an generic iterator for random access containers.
lcp.hpp contains classes for lcp information.
constexpr char KEY_LCP[]
Definition: config.hpp:44
Namespace for the succinct data structure library.
std::string cache_file_name(const std::string &key, const cache_config &config)
Returns the file name of the resource.
Definition: io.hpp:630
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition: bits.hpp:651
Helper class for construction process.
Definition: config.hpp:67