SDSL 3.0.1
Succinct Data Structure Library
int_vector_buffer.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
8#ifndef INCLUDED_INT_VECTOR_BUFFER
9#define INCLUDED_INT_VECTOR_BUFFER
10
11#include <cassert>
12#include <fstream>
13#include <iostream>
14#include <stdio.h>
15#include <string>
16
17#include <sdsl/int_vector.hpp>
18#include <sdsl/iterators.hpp>
19
20namespace sdsl
21{
22
23template <uint8_t t_width = 0>
25{
26 public:
27 class iterator;
30
31 private:
32 static_assert(t_width <= 64, "int_vector_buffer: width must be at most 64 bits.");
33 sdsl::isfstream m_ifile;
34 sdsl::osfstream m_ofile;
35 std::string m_filename;
36 int_vector<t_width> m_buffer;
37 bool m_need_to_write = false;
38 // length of int_vector header in bytes: 0 for plain, 8 for int_vector<t_width> (0 < t_width), 9 for int_vector<0>
39 uint64_t m_offset = 0;
40 uint64_t m_buffersize = 8; // in elements! m_buffersize*width() must be a multiple of 8!
41 uint64_t m_size = 0; // size of int_vector_buffer
42 uint64_t m_begin = 0; // number in elements
43
45 void read_block(const uint64_t idx)
46 {
47 m_begin = (idx / m_buffersize) * m_buffersize;
48 if (m_begin >= m_size) { util::set_to_value(m_buffer, 0); }
49 else
50 {
51 m_ifile.seekg(m_offset + (m_begin * width()) / 8);
52 assert(m_ifile.good());
53 m_ifile.read((char *)m_buffer.data(), (m_buffersize * width()) / 8);
54 if ((uint64_t)m_ifile.gcount() < (m_buffersize * width()) / 8) { m_ifile.clear(); }
55 assert(m_ifile.good());
56 for (uint64_t i = m_size - m_begin; i < m_buffersize; ++i) { m_buffer[i] = 0; }
57 }
58 }
59
61 void write_block()
62 {
63 if (m_need_to_write)
64 {
65 m_ofile.seekp(m_offset + (m_begin * width()) / 8);
66 assert(m_ofile.good());
67 if (m_begin + m_buffersize >= m_size)
68 {
69 // last block in file
70 uint64_t wb = ((m_size - m_begin) * width() + 7) / 8;
71 m_ofile.write((char *)m_buffer.data(), wb);
72 }
73 else
74 {
75 m_ofile.write((char *)m_buffer.data(), (m_buffersize * width()) / 8);
76 }
77 m_ofile.flush();
78 assert(m_ofile.good());
79 m_need_to_write = false;
80 }
81 }
82
84 uint64_t read(const uint64_t idx)
85 {
86 assert(is_open());
87 assert(idx < m_size);
88 if (idx < m_begin or m_begin + m_buffersize <= idx)
89 {
90 write_block();
91 read_block(idx);
92 }
93 return m_buffer[idx - m_begin];
94 }
95
97 void write(const uint64_t idx, const uint64_t value)
98 {
99 assert(is_open());
100 // If idx is not in current block, write current block and load needed block
101 if (idx < m_begin or m_begin + m_buffersize <= idx)
102 {
103 write_block();
104 read_block(idx);
105 }
106 if (m_size <= idx) { m_size = idx + 1; }
107 m_need_to_write = true;
108 m_buffer[idx - m_begin] = value;
109 }
110
111 public:
114
116
125 int_vector_buffer(const std::string filename,
126 std::ios::openmode mode = std::ios::in,
127 const uint64_t buffer_size = 1024 * 1024,
128 const uint8_t int_width = t_width,
129 const bool is_plain = false)
130 {
131 m_filename = filename;
132 assert(!(mode & std::ios::app));
133 mode &= ~std::ios::app;
134 m_buffer.width(int_width);
135 if (is_plain)
136 {
137 m_offset = 0;
138 // is_plain is only allowed with width() in {8, 16, 32, 64}
139 assert(8 == width() or 16 == width() or 32 == width() or 64 == width());
140 }
141 else
142 {
143 m_offset = 8; // TODO: make this dependent on header size of int_vector<t_width>
144 }
145
146 // Open file for IO
147 m_ofile.open(m_filename, mode | std::ios::out | std::ios::binary);
148 assert(m_ofile.good());
149 m_ifile.open(m_filename, std::ios::in | std::ios::binary);
150 assert(m_ifile.good());
151 if (mode & std::ios::in)
152 {
153 uint64_t size = 0;
154 if (is_plain)
155 {
156 m_ifile.seekg(0, std::ios_base::end);
157 size = m_ifile.tellg() * 8;
158 }
159 else
160 {
161 uint8_t width = 0;
163 m_buffer.width(width);
164 }
165 assert(m_ifile.good());
166 m_size = size / width();
167 }
168 buffersize(buffer_size);
169 }
170
173 : m_filename(std::move(ivb.m_filename))
174 , m_buffer(std::move(ivb.m_buffer))
175 , m_need_to_write(ivb.m_need_to_write)
176 , m_offset(ivb.m_offset)
177 , m_buffersize(ivb.m_buffersize)
178 , m_size(ivb.m_size)
179 , m_begin(ivb.m_begin)
180 {
181 ivb.m_ifile.close();
182 ivb.m_ofile.close();
183 m_ifile.open(m_filename, std::ios::in | std::ios::binary);
184 m_ofile.open(m_filename, std::ios::in | std::ios::out | std::ios::binary);
185 assert(m_ifile.good());
186 assert(m_ofile.good());
187 // set ivb to default-constructor state
188 ivb.m_filename = "";
189 ivb.m_buffer = int_vector<t_width>();
190 ivb.m_need_to_write = false;
191 ivb.m_offset = 0;
192 ivb.m_buffersize = 8;
193 ivb.m_size = 0;
194 ivb.m_begin = 0;
195 }
196
199
202 {
203 close();
204 ivb.m_ifile.close();
205 ivb.m_ofile.close();
206 m_filename = ivb.m_filename;
207 m_ifile.open(m_filename, std::ios::in | std::ios::binary);
208 m_ofile.open(m_filename, std::ios::in | std::ios::out | std::ios::binary);
209 assert(m_ifile.good());
210 assert(m_ofile.good());
211 // assign the values of ivb to this
212 m_buffer = (int_vector<t_width> &&) ivb.m_buffer;
213 m_need_to_write = ivb.m_need_to_write;
214 m_offset = ivb.m_offset;
215 m_buffersize = ivb.m_buffersize;
216 m_size = ivb.m_size;
217 m_begin = ivb.m_begin;
218 // set ivb to default-constructor state
219 ivb.m_filename = "";
220 ivb.m_buffer = int_vector<t_width>();
221 ivb.m_need_to_write = false;
222 ivb.m_offset = 0;
223 ivb.m_buffersize = 8;
224 ivb.m_size = 0;
225 ivb.m_begin = 0;
226 return *this;
227 }
228
230 uint8_t width() const { return m_buffer.width(); }
231
233 uint64_t size() const { return m_size; }
234
236 std::string filename() const { return m_filename; }
237
239 uint64_t buffersize() const
240 {
241 assert(m_buffersize * width() % 8 == 0);
242 return (m_buffersize * width()) / 8;
243 }
244
246 void buffersize(uint64_t buffersize)
247 {
248 if (0ULL == buffersize) buffersize = 8;
249 write_block();
250 if (0 == (buffersize * 8) % width())
251 {
252 m_buffersize = buffersize * 8 /
253 width(); // m_buffersize might not be multiple of 8, but m_buffersize*width() is.
254 }
255 else
256 {
257 uint64_t element_buffersize = (buffersize * 8) / width() +
258 1; // one more element than fits into given buffersize in byte
259 m_buffersize = element_buffersize + 7 - (element_buffersize + 7) % 8; // take next multiple of 8
260 }
261 m_buffer = int_vector<t_width>(m_buffersize, 0, width());
262 if (0 != m_buffersize) read_block(0);
263 }
264
266 bool good() { return m_ifile.good() and m_ofile.good(); }
267
269 bool is_open()
270 {
271 return m_ifile.is_open() and m_ofile.is_open();
272 ;
273 }
274
276 void reset()
277 {
278 // reset file
279 assert(m_ifile.good());
280 assert(m_ofile.good());
281 m_ifile.close();
282 m_ofile.close();
283 m_ofile.open(m_filename, std::ios::out | std::ios::binary);
284 assert(m_ofile.good());
285 m_ifile.open(m_filename, std::ios::in | std::ios::binary);
286 assert(m_ifile.good());
287 assert(m_ofile.good());
288 // reset member variables
289 m_need_to_write = false;
290 m_size = 0;
291 // reset buffer
292 read_block(0);
293 }
294
295 // Forward declaration
296 class reference;
297
299
302 reference operator[](uint64_t idx) { return reference(this, idx); }
303
305 void push_back(const uint64_t value) { write(m_size, value); }
306
308
311 void close(bool remove_file = false)
312 {
313 if (is_open())
314 {
315 if (!remove_file)
316 {
317 write_block();
318 if (0 < m_offset)
319 { // in case of int_vector, write header and trailing zeros
320 uint64_t size = m_size * width();
321 m_ofile.seekp(0, std::ios::beg);
323 assert(m_ofile.good());
324 uint64_t wb = (size + 7) / 8;
325 if (wb % 8)
326 {
327 m_ofile.seekp(m_offset + wb);
328 assert(m_ofile.good());
329 m_ofile.write("\0\0\0\0\0\0\0\0", 8 - wb % 8);
330 assert(m_ofile.good());
331 }
332 }
333 }
334 m_ifile.close();
335 assert(m_ifile.good());
336 m_ofile.close();
337 assert(m_ofile.good());
338 if (remove_file) { sdsl::remove(m_filename); }
339 }
340 }
341
342 iterator begin() { return iterator(*this, 0); }
343
344 iterator end() { return iterator(*this, size()); }
345
347 {
348 friend class int_vector_buffer<t_width>;
349
350 private:
351 int_vector_buffer<t_width> * const m_int_vector_buffer = nullptr;
352 uint64_t m_idx = 0;
353
354 reference() {}
355
356 reference(int_vector_buffer<t_width> * _int_vector_buffer, uint64_t _idx)
357 : m_int_vector_buffer(_int_vector_buffer)
358 , m_idx(_idx)
359 {}
360
361 public:
363 operator uint64_t() const { return m_int_vector_buffer->read(m_idx); }
364
366 reference & operator=(const uint64_t & val)
367 {
368 m_int_vector_buffer->write(m_idx, val);
369 return *this;
370 }
371
373 reference & operator=(reference & x) { return *this = (uint64_t)(x); };
374
375 reference(reference const &) = default;
376
379 {
380 uint64_t x = m_int_vector_buffer->read(m_idx);
381 m_int_vector_buffer->write(m_idx, x + 1);
382 return *this;
383 }
384
386 uint64_t operator++(int)
387 {
388 uint64_t val = (uint64_t) * this;
389 ++(*this);
390 return val;
391 }
392
395 {
396 uint64_t x = m_int_vector_buffer->read(m_idx);
397 m_int_vector_buffer->write(m_idx, x - 1);
398 return *this;
399 }
400
402 uint64_t operator--(int)
403 {
404 uint64_t val = (uint64_t) * this;
405 --(*this);
406 return val;
407 }
408
410 reference & operator+=(const uint64_t x)
411 {
412 uint64_t w = m_int_vector_buffer->read(m_idx);
413 m_int_vector_buffer->write(m_idx, w + x);
414 return *this;
415 }
416
418 reference & operator-=(const uint64_t x)
419 {
420 uint64_t w = m_int_vector_buffer->read(m_idx);
421 m_int_vector_buffer->write(m_idx, w - x);
422 return *this;
423 }
424
425 bool operator==(const reference & x) const { return (uint64_t) * this == (uint64_t)x; }
426
427 bool operator<(const reference & x) const { return (uint64_t) * this < (uint64_t)x; }
428 };
429
431 {
432 private:
434 uint64_t m_idx = 0;
435
436 public:
437 using iterator_category = std::random_access_iterator_tag;
442
443 iterator() = delete;
444 iterator(int_vector_buffer<t_width> & ivb, uint64_t idx = 0)
445 : m_ivb(ivb)
446 , m_idx(idx)
447 {}
448
450 {
451 ++m_idx;
452 return *this;
453 }
454
456 {
457 iterator it = *this;
458 ++(*this);
459 return it;
460 }
461
463 {
464 --m_idx;
465 return *this;
466 }
467
469 {
470 iterator it = *this;
471 --(*this);
472 return it;
473 }
474
475 reference operator*() const { return m_ivb[m_idx]; }
476
478 {
479 if (i < 0) return *this -= (-i);
480 m_idx += i;
481 return *this;
482 }
483
485 {
486 if (i < 0) return *this += (-i);
487 m_idx -= i;
488 return *this;
489 }
490
492 {
493 iterator it = *this;
494 return it += i;
495 }
496
498 {
499 iterator it = *this;
500 return it -= i;
501 }
502
503 bool operator==(const iterator & it) const { return &m_ivb == &(it.m_ivb) and m_idx == it.m_idx; }
504
505 bool operator!=(const iterator & it) const { return !(*this == it); }
506 inline difference_type operator-(const iterator & it) { return (m_idx - it.m_idx); }
507 };
508};
509
510} // namespace sdsl
511
512#endif // include guard
iterator operator-(difference_type i) const
bool operator!=(const iterator &it) const
sdsl::int_vector_buffer< t_width >::difference_type difference_type
sdsl::int_vector_buffer< t_width >::value_type value_type
iterator(int_vector_buffer< t_width > &ivb, uint64_t idx=0)
std::random_access_iterator_tag iterator_category
difference_type operator-(const iterator &it)
iterator & operator-=(difference_type i)
iterator operator+(difference_type i) const
bool operator==(const iterator &it) const
iterator & operator+=(difference_type i)
uint64_t operator++(int)
Postfix increment of the proxy object.
bool operator==(const reference &x) const
reference & operator-=(const uint64_t x)
Subtract assign from the proxy object.
reference & operator--()
Prefix decrement of the proxy object.
reference & operator=(reference &x)
Assignment operator.
reference(reference const &)=default
reference & operator+=(const uint64_t x)
Add assign from the proxy object.
reference & operator=(const uint64_t &val)
Assignment operator for write operations.
uint64_t operator--(int)
Postfix decrement of the proxy object.
reference & operator++()
Prefix increment of the proxy object.
bool operator<(const reference &x) const
uint64_t size() const
Returns the number of elements currently stored.
void reset()
Delete all content and set size to 0.
void close(bool remove_file=false)
Close the int_vector_buffer.
reference operator[](uint64_t idx)
[] operator
int_vector_buffer(const std::string filename, std::ios::openmode mode=std::ios::in, const uint64_t buffer_size=1024 *1024, const uint8_t int_width=t_width, const bool is_plain=false)
Constructor for int_vector_buffer.
int_vector_buffer< t_width > & operator=(int_vector_buffer &&ivb)
Move assignment operator.
int_vector< t_width >::difference_type difference_type
int_vector< t_width >::value_type value_type
bool good()
Returns whether state of underlying streams are good.
void buffersize(uint64_t buffersize)
Set the buffersize in bytes.
std::string filename() const
Returns the filename.
uint64_t buffersize() const
Returns the buffersize in bytes.
void push_back(const uint64_t value)
Appends the given element value to the end of the int_vector_buffer.
uint8_t width() const
Returns the width of the integers which are accessed via the [] operator.
int_vector_buffer(int_vector_buffer &&ivb)
Move constructor.
bool is_open()
Returns whether underlying streams are currently associated to a file.
A generic vector class for integers of width .
Definition: int_vector.hpp:216
ptrdiff_t difference_type
Definition: int_vector.hpp:228
uint8_t width() const noexcept
Returns the width of the integers which are accessed via the [] operator.
Definition: int_vector.hpp:595
int_vector_trait< t_width >::value_type value_type
Definition: int_vector.hpp:221
static size_t read_header(int_vector_size_type &size, int_width_type &int_width, std::istream &in)
Read the size and int_width of a int_vector.
Definition: int_vector.hpp:789
const uint64_t * data() const noexcept
Pointer to the raw data of the int_vector.
Definition: int_vector.hpp:566
static uint64_t write_header(uint64_t size, uint8_t int_width, std::ostream &out)
Write the size and int_width of a int_vector.
Definition: int_vector.hpp:806
isfstream & seekg(pos_type pos)
Definition: sfstream.hpp:257
bool is_open()
Is the stream close?
Definition: sfstream.hpp:222
std::streampos tellg()
Definition: sfstream.hpp:303
buf_ptr_type open(const std::string &file, std::ios_base::openmode mode=std::ios_base::in)
Open the stream.
Definition: sfstream.hpp:194
void close()
Close the stream.
Definition: sfstream.hpp:233
osfstream & seekp(pos_type pos)
Definition: sfstream.hpp:111
void close()
Close the stream.
Definition: sfstream.hpp:87
bool is_open()
Is the stream close?
Definition: sfstream.hpp:76
buf_ptr_type open(const std::string &file, std::ios_base::openmode mode=std::ios_base::out)
Open the stream.
Definition: sfstream.hpp:48
int_vector.hpp contains the sdsl::int_vector class.
iterators.hpp contains an generic iterator for random access containers.
void set_to_value(t_int_vec &v, uint64_t k)
Set all entries of int_vector to value k.
Definition: util.hpp:563
Namespace for the succinct data structure library.
int remove(const std::string &)
Remove a file.
Definition: ram_fs.hpp:194