SDSL 3.0.1
Succinct Data Structure Library
construct_lcp_helper.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
4#ifndef INCLUDED_SDSL_CONSTRUCT_LCP_HELPER
5#define INCLUDED_SDSL_CONSTRUCT_LCP_HELPER
6
7#include <list>
8#include <queue>
9#include <vector>
10
11#include <sdsl/int_vector.hpp>
12
13namespace sdsl
14{
15
17
26inline void insert_lcp_values(int_vector<> & partial_lcp,
27 bit_vector & index_done,
28 std::string lcp_file,
29 uint64_t max_lcp_value,
30 uint64_t lcp_value_offset)
31{
32 std::string tmp_lcp_file = lcp_file + "_TMP";
33 const uint64_t buffer_size = 1000000; // has to be a multiple of 64
35 int_vector_buffer<> lcp_buffer(lcp_file, std::ios::in, buffer_size); // open lcp_file
36 uint64_t n = lcp_buffer.size();
37
38 // open tmp_lcp_file
39 uint8_t int_width = bits::hi(max_lcp_value) + 1;
40 int_vector_buffer<> out_buf(tmp_lcp_file, std::ios::out, buffer_size, int_width); // Output buffer
41 // Write values into buffer
42 for (size_type i = 0, calc_idx = 0; i < n; ++i)
43 {
44 if (index_done[i])
45 { // If value was already calculated
46 out_buf[i] = lcp_buffer[i]; // Copy value
47 }
48 else
49 {
50 if (partial_lcp[calc_idx])
51 { // If value was calculated now
52 // Insert value
53 out_buf[i] = partial_lcp[calc_idx] + lcp_value_offset;
54 index_done[i] = true;
55 }
56 ++calc_idx;
57 }
58 }
59 lcp_buffer.close();
60 out_buf.close();
61 // Close file and replace old file with new one
62 sdsl::rename(tmp_lcp_file, lcp_file);
63}
64
65template <class tWT>
66void create_C_array(std::vector<uint64_t> & C, const tWT & wt)
67{
68 uint64_t quantity; // quantity of characters in interval
69 std::vector<unsigned char> cs(wt.sigma); // list of characters in the interval
70 std::vector<uint64_t> rank_c_i(wt.sigma); // number of occurrence of character in [0 .. i-1]
71 std::vector<uint64_t> rank_c_j(wt.sigma); // number of occurrence of character in [0 .. j-1]
72
73 C = std::vector<uint64_t>(257, 0);
74 interval_symbols(wt, 0, wt.size(), quantity, cs, rank_c_i, rank_c_j);
75 for (uint64_t i = 0; i < quantity; ++i)
76 {
77 unsigned char c = cs[i];
78 C[c + 1] = rank_c_j[i];
79 }
80 for (uint64_t i = 1; i < C.size() - 1; ++i) { C[i + 1] += C[i]; }
81}
82
84{
85 typedef bit_vector::size_type size_type;
86 typedef std::queue<uint8_t> tQ;
87
88 private:
89 static const uint32_t m_buffer_size = 10000; // 409600;
90 uint8_t m_write_buf[m_buffer_size];
91 uint8_t m_read_buf[m_buffer_size];
92 size_type m_widx; // write index
93 size_type m_ridx; // read index
94 bool m_sync; // are read and write buffer the same?
95 size_type m_disk_buffered_blocks; // number of blocks written to disk and not read again yet
96 char m_c;
97 size_type m_rb; // read blocks
98 size_type m_wb; // written blocks
99
100 std::string m_file_name;
101
102 std::fstream m_stream;
103
104 public:
106 : m_widx(0)
107 , m_ridx(0)
108 , m_sync(true)
109 , m_disk_buffered_blocks(0)
110 , m_c('?')
111 , m_rb(0)
112 , m_wb(0)
113 {}
114
115 void init(const std::string & dir, char c)
116 {
117 m_c = c;
118 m_file_name = dir + "buffered_char_queue_" + util::to_string(util::pid());
119 // m_stream.rdbuf()->pubsetbuf(0, 0);
120 }
121
123 {
124 m_stream.close();
125 sdsl::remove(m_file_name);
126 }
127
128 void push_back(uint8_t x)
129 {
130 m_write_buf[m_widx] = x;
131 if (m_sync) { m_read_buf[m_widx] = x; }
132 ++m_widx;
133 if (m_widx == m_buffer_size)
134 {
135 if (!m_sync)
136 { // if not sync, write block to disk
137 if (!m_stream.is_open())
138 {
139 m_stream.open(m_file_name, std::ios::in | std::ios::out | std::ios::binary | std::ios::trunc);
140 }
141 m_stream.seekp(m_buffer_size * (m_wb++), std::ios::beg);
142 m_stream.write((char *)m_write_buf, m_buffer_size);
143 ++m_disk_buffered_blocks;
144 }
145 m_sync = 0;
146 m_widx = 0;
147 }
148 }
149
150 uint8_t pop_front()
151 {
152 uint8_t x = m_read_buf[m_ridx];
153 ++m_ridx;
154 if (m_ridx == m_buffer_size)
155 {
156 if (m_disk_buffered_blocks > 0)
157 {
158 m_stream.seekg(m_buffer_size * (m_rb++), std::ios::beg);
159 m_stream.read((char *)m_read_buf, m_buffer_size);
160 --m_disk_buffered_blocks;
161 }
162 else
163 { // m_disk_buffered_blocks == 0
164 m_sync = 1;
165 memcpy(m_read_buf, m_write_buf, m_widx + 1);
166 }
167 m_ridx = 0;
168 }
169 return x;
170 }
171};
172
175
176template <class size_type_class>
177void push_front_m_index(size_type_class i,
178 uint8_t c,
179 tLI (&m_list)[256],
180 uint8_t (&m_chars)[256],
181 size_type_class & m_char_count)
182{
183 if (m_list[c].empty()) { m_chars[m_char_count++] = c; }
184 m_list[c].push_front(i);
185}
186
187template <class size_type_class>
188void push_back_m_index(size_type_class i,
189 uint8_t c,
190 tLI (&m_list)[256],
191 uint8_t (&m_chars)[256],
192 size_type_class & m_char_count)
193{
194 if (m_list[c].empty()) { m_chars[m_char_count++] = c; }
195 m_list[c].push_back(i);
196}
197
198} // namespace sdsl
199
200#endif
void init(const std::string &dir, char c)
uint64_t size() const
Returns the number of elements currently stored.
void close(bool remove_file=false)
Close the int_vector_buffer.
int_vector_size_type size_type
Definition: int_vector.hpp:229
int_vector.hpp contains the sdsl::int_vector class.
int_vector ::size_type size_type
uint64_t pid()
std::string to_string(const T &t, int w=1)
Namespace for the succinct data structure library.
void push_back_m_index(size_type_class i, uint8_t c, tLI(&m_list)[256], uint8_t(&m_chars)[256], size_type_class &m_char_count)
void interval_symbols(const t_wt &wt, typename t_wt::size_type i, typename t_wt::size_type j, typename t_wt::size_type &k, std::vector< typename t_wt::value_type > &cs, std::vector< typename t_wt::size_type > &rank_c_i, std::vector< typename t_wt::size_type > &rank_c_j)
For each symbol c in wt[i..j-1] get rank(i,c) and rank(j,c).
void create_C_array(std::vector< uint64_t > &C, const tWT &wt)
void insert_lcp_values(int_vector<> &partial_lcp, bit_vector &index_done, std::string lcp_file, uint64_t max_lcp_value, uint64_t lcp_value_offset)
Merges a partial LCP array into the LCP array on disk.
bool empty(const range_type &r)
Empty range check.
Definition: wt_helper.hpp:782
int rename(const std::string &old_filename, const std::string &new_filename)
Rename a file.
Definition: ram_fs.hpp:204
std::vector< int_vector<>::size_type > tVI
int remove(const std::string &)
Remove a file.
Definition: ram_fs.hpp:194
std::list< int_vector<>::size_type > tLI
void push_front_m_index(size_type_class i, uint8_t c, tLI(&m_list)[256], uint8_t(&m_chars)[256], size_type_class &m_char_count)
static constexpr uint32_t hi(uint64_t x)
Position of the most significant set bit the 64-bit word x.
Definition: bits.hpp:651