SDSL 3.0.1
Succinct Data Structure Library
int_vector_mapper.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
4#ifndef SDSL_INT_VECTOR_MAPPER
5#define SDSL_INT_VECTOR_MAPPER
6
7#include <cinttypes>
8#include <cstdio>
9#include <ios>
10
11#include <sdsl/int_vector.hpp>
13
14namespace sdsl
15{
16
17template <uint8_t t_width = 0, std::ios_base::openmode t_mode = std::ios_base::out | std::ios_base::in>
19{
20 static_assert(t_width <= 64, "int_vector_mapper: width must be at most 64 bits.");
21
22 public:
27 static constexpr uint8_t fixed_int_width = t_width;
28
29 public:
31
32 private:
33 uint8_t * m_mapped_data = nullptr;
34 uint64_t m_file_size_bytes = 0;
35 off_t m_data_offset = 0;
36 int m_fd = -1;
37 int_vector<t_width> m_wrapper;
38 std::string m_file_name;
39 bool m_delete_on_close;
40
41 public:
45
46 public:
48 {
49 if (m_mapped_data)
50 {
51 auto ret = memory_manager::mem_unmap(m_fd, m_mapped_data, m_file_size_bytes);
52 if (ret != 0)
53 {
54 std::cerr << "int_vector_mapper: error unmapping file mapping'" << m_file_name << "': " << ret
55 << std::endl;
56 }
57
58 if (t_mode & std::ios_base::out)
59 { // write was possible
60 if (m_data_offset)
61 { // if the file is not a plain file
62 // set std::ios::in to not truncate the file
63 osfstream out(m_file_name, std::ios::in);
64 if (out)
65 {
66 out.seekp(0, std::ios::beg);
67 int_vector<t_width>::write_header(m_wrapper.m_size, m_wrapper.m_width, out);
68
69 // out.seekp(0, std::ios::end);
70 }
71 else
72 {
73 std::cerr << "int_vector_mapper: could not open file for header update" << std::endl;
74 /*
75 * throw std::runtime_error("int_vector_mapper: \
76 * could not open file for header update");
77 */
78 }
79 }
80 }
81
82 if (t_mode & std::ios_base::out)
83 {
84 // do we have to truncate?
85 size_type current_bit_size = m_wrapper.m_size;
86 size_type data_size_in_bytes = ((current_bit_size + 63) >> 6) << 3;
87 if (m_file_size_bytes != data_size_in_bytes + m_data_offset)
88 {
89 int tret = memory_manager::truncate_file_mmap(m_fd, data_size_in_bytes + m_data_offset);
90 if (tret == -1)
91 {
92 std::string truncate_error = std::string("int_vector_mapper: truncate error. ") +
93 std::string(util::str_from_errno());
94 std::cerr << truncate_error;
95 }
96 }
97 }
98 }
99 if (m_fd != -1)
100 {
102 if (ret != 0)
103 {
104 std::cerr << "int_vector_mapper: error closing file mapping'" << m_file_name << "': " << ret
105 << std::endl;
106 }
107 if (m_delete_on_close)
108 {
109 int ret_code = sdsl::remove(m_file_name);
110 if (ret_code != 0)
111 {
112 std::cerr << "int_vector_mapper: error deleting file '" << m_file_name << "': " << ret_code
113 << std::endl;
114 }
115 }
116 }
117 m_wrapper.m_data = nullptr;
118 m_wrapper.m_size = 0;
119 }
120
122 {
123 m_wrapper.m_data = ivm.m_wrapper.m_data;
124 m_wrapper.m_size = ivm.m_wrapper.m_size;
125 m_wrapper.width(ivm.m_wrapper.width());
126 m_file_name = ivm.m_file_name;
127 m_delete_on_close = ivm.m_delete_on_close;
128 ivm.m_wrapper.m_data = nullptr;
129 ivm.m_wrapper.m_size = 0;
130 ivm.m_mapped_data = nullptr;
131 ivm.m_fd = -1;
132 }
133
135 {
136 m_wrapper.m_data = ivm.m_wrapper.m_data;
137 m_wrapper.m_size = ivm.m_wrapper.m_size;
138 m_wrapper.width(ivm.m_wrapper.width());
139 m_file_name = ivm.m_file_name;
140 m_delete_on_close = ivm.m_delete_on_close;
141 ivm.m_wrapper.m_data = nullptr;
142 ivm.m_wrapper.m_size = 0;
143 ivm.m_mapped_data = nullptr;
144 ivm.m_fd = -1;
145 return (*this);
146 }
147
148 int_vector_mapper(const std::string & key, const cache_config & config)
149 : int_vector_mapper(cache_file_name(key, config))
150 {}
151
152 int_vector_mapper(const std::string filename, bool is_plain = false, bool delete_on_close = false)
153 : m_data_offset(0)
154 , m_file_name(filename)
155 , m_delete_on_close(delete_on_close)
156 {
157 size_type size_in_bits = 0;
158 uint8_t int_width = t_width;
159 {
160 isfstream f(filename, std::ifstream::binary);
161 if (!f.is_open())
162 {
163 throw std::runtime_error("int_vector_mapper: file " + m_file_name + " does not exist.");
164 }
165 if (!is_plain) { m_data_offset = int_vector<t_width>::read_header(size_in_bits, int_width, f); }
166 }
167
168 m_file_size_bytes = util::file_size(m_file_name);
169
170 if (is_plain)
171 {
172 if (8 != t_width and 16 != t_width and 32 != t_width and 64 != t_width)
173 {
174 throw std::runtime_error("int_vector_mapper: plain vector can "
175 "only be of width 8, 16, 32, 64.");
176 }
177 else
178 {
179 uint8_t byte_width = t_width / 8;
180 // if( m_file_size_bytes % (t_width/8) != 0)
181 if ((m_file_size_bytes & bits::lo_set[bits::cnt(byte_width - 1)]) != 0)
182 {
183 throw std::runtime_error("int_vector_mapper: plain vector not a multiple of byte: " +
184 std::to_string(m_file_size_bytes) + " mod " + std::to_string(byte_width) +
185 " != 0");
186 }
187 }
188 size_in_bits = m_file_size_bytes * 8;
189 }
190
191 // open backend file depending on mode
192 m_fd = memory_manager::open_file_for_mmap(m_file_name, t_mode);
193 if (m_fd == -1)
194 {
195 std::string open_error = std::string("int_vector_mapper: open file error.") +
196 std::string(util::str_from_errno());
197 throw std::runtime_error(open_error);
198 }
199
200 // prepare for mmap
201 m_wrapper.width(int_width);
202 // mmap data
203 m_mapped_data = (uint8_t *)memory_manager::mmap_file(m_fd, m_file_size_bytes, t_mode);
204 if (m_mapped_data == nullptr)
205 {
206 std::string mmap_error = std::string("int_vector_mapper: mmap error. ") +
207 std::string(util::str_from_errno());
208 throw std::runtime_error(mmap_error);
209 }
210
211 m_wrapper.m_size = size_in_bits;
212 free(m_wrapper.m_data);
213 m_wrapper.m_data = (uint64_t *)(m_mapped_data + m_data_offset);
214 }
215
216 std::string file_name() const { return m_file_name; }
217 width_type width() const { return m_wrapper.width(); }
218 void width(const uint8_t new_int_width)
219 {
220 static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'width'");
221 m_wrapper.width(new_int_width);
222 }
223 size_type size() const { return m_wrapper.size(); }
225 {
226 static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'bit_resize'");
227 size_type new_size_in_bytes = ((bit_size + 63) >> 6) << 3;
228 if (m_file_size_bytes != new_size_in_bytes + m_data_offset)
229 {
230 if (m_mapped_data)
231 {
232 auto ret = memory_manager::mem_unmap(m_fd, m_mapped_data, m_file_size_bytes);
233 if (ret != 0)
234 {
235 std::cerr << "int_vector_mapper: error unmapping file mapping'" << m_file_name << "': " << ret
236 << std::endl;
237 }
238 }
239 int tret = memory_manager::truncate_file_mmap(m_fd, new_size_in_bytes + m_data_offset);
240 if (tret == -1)
241 {
242 std::string truncate_error = std::string("int_vector_mapper: truncate error. ") +
243 std::string(util::str_from_errno());
244 throw std::runtime_error(truncate_error);
245 }
246 m_file_size_bytes = new_size_in_bytes + m_data_offset;
247
248 // perform the actual mapping
249 m_mapped_data = (uint8_t *)memory_manager::mmap_file(m_fd, m_file_size_bytes, t_mode);
250 if (m_mapped_data == nullptr)
251 {
252 std::string mmap_error = std::string("int_vector_mapper: mmap error. ") +
253 std::string(util::str_from_errno());
254 throw std::runtime_error(mmap_error);
255 }
256
257 // update wrapper
258 m_wrapper.m_data = (uint64_t *)(m_mapped_data + m_data_offset);
259 }
260 m_wrapper.m_size = bit_size;
261 }
262
264 {
265 static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'resize'");
266 size_type size_in_bits = size * width();
267 bit_resize(size_in_bits);
268 }
269
271 {
272 static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'begin'");
273 return m_wrapper.begin();
274 }
276 {
277 static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'end'");
278 return m_wrapper.end();
279 }
280 auto begin() const -> typename int_vector<t_width>::const_iterator { return m_wrapper.begin(); }
281 auto end() const -> typename int_vector<t_width>::const_iterator { return m_wrapper.end(); }
282 auto cbegin() const -> typename int_vector<t_width>::const_iterator { return m_wrapper.begin(); }
283 auto cend() const -> typename int_vector<t_width>::const_iterator { return m_wrapper.end(); }
285 {
286 return m_wrapper[idx];
287 }
289 {
290 static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'operator[]'");
291 return m_wrapper[idx];
292 }
293 const uint64_t * data() const { return m_wrapper.data(); }
294 uint64_t * data()
295 {
296 static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'data'");
297 return m_wrapper.data();
298 }
299 value_type get_int(size_type idx, const uint8_t len = 64) const { return m_wrapper.get_int(idx, len); }
300 void set_int(size_type idx, value_type x, const uint8_t len = 64)
301 {
302 static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'set_int'");
303 m_wrapper.set_int(idx, x, len);
304 }
306 {
307 static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'push_back'");
308 if (capacity() < size() + 1)
309 {
310 size_type old_size = m_wrapper.m_size;
311 size_type size_in_bits = (size() + append_block_size) * width();
312 bit_resize(size_in_bits);
313 m_wrapper.m_size = old_size;
314 }
315 // update size in wrapper only
316 m_wrapper.m_size += width();
317 m_wrapper[size() - 1] = x;
318 }
320 {
321 size_t data_size_in_bits = 8 * (m_file_size_bytes - m_data_offset);
322 return data_size_in_bits / width();
323 }
324 size_type bit_size() const { return m_wrapper.bit_size(); }
325 template <class container>
326 bool operator==(const container & v) const
327 {
328 return std::equal(begin(), end(), v.begin());
329 }
330 bool operator==(const int_vector<t_width> & v) const { return m_wrapper == v; }
331 bool operator==(const int_vector_mapper & v) const { return m_wrapper == v.m_wrapper; }
332 template <class container>
333 bool operator!=(const container & v) const
334 {
335 return !(*this == v);
336 }
337 void flip()
338 {
339 static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'flip'");
340 m_wrapper.flip();
341 }
342 bool empty() const { return m_wrapper.empty(); }
343};
344
345template <uint8_t t_width = 0>
347{
348 private:
349 static std::string tmp_file(const std::string & dir)
350 {
351 char tmp_file_name[1024] = { 0 };
352#ifdef _WIN32
353 auto ret = GetTempFileName(dir.c_str(), "tmp_mapper_file_", 0, tmp_file_name);
354 if (ret == 0) { throw std::runtime_error("could not create temporary file."); }
355#else
356 sprintf(tmp_file_name, "%s/tmp_mapper_file_%" PRIu64 "_XXXXXX.sdsl", dir.c_str(), util::pid());
357 int fd = mkstemps(tmp_file_name, 5);
358 if (fd == -1) { throw std::runtime_error("could not create temporary file."); }
359 close(fd);
360#endif
361 return std::string(tmp_file_name, strlen(tmp_file_name));
362 }
363
364 public:
366 {
367#ifdef MSVC_COMPILER
368 char tmp_dir_name[1024] = { 0 };
369 auto tmp_dir = GetTempPath(1024, tmp_dir_name);
370 auto file_name = tmp_file(tmp_dir_name);
371#else
372 auto file_name = tmp_file("/tmp");
373#endif
374 return create(file_name);
375 }
377 {
378 auto file_name = tmp_file(config.dir);
379 return create(file_name);
380 }
381 static int_vector_mapper<t_width> create(const std::string & file_name)
382 {
383 // write empty int_vector to init the file
384 int_vector<t_width> tmp_vector;
385 store_to_file(tmp_vector, file_name);
387 }
388};
389
390// creates emtpy int_vector<> that will not be deleted
391template <uint8_t t_width = 0>
393{
394 public:
395 static int_vector_mapper<t_width> create(const std::string & key, cache_config & config)
396 {
397 auto file_name = cache_file_name(key, config);
398 auto tmp = create(file_name);
399 register_cache_file(key, config);
400 return std::move(tmp);
401 }
402 static int_vector_mapper<t_width> create(const std::string & file_name)
403 {
404 // write empty int_vector to init the file
405 int_vector<t_width> tmp_vector;
406 store_to_file(tmp_vector, file_name);
408 }
409 static int_vector_mapper<t_width> create(const std::string & file_name, size_t size, uint8_t int_width = t_width)
410 {
411 // write empty int_vector to init the file
412 int_vector<t_width> tmp_vector(0, 0, int_width);
413 store_to_file(tmp_vector, file_name);
415 mapper.resize(size);
416 return mapper;
417 }
418};
419
420template <std::ios_base::openmode t_mode = std::ios_base::out | std::ios_base::in>
422
423template <uint8_t t_width = 0>
425
426} // namespace sdsl
427
428#endif
int_vector_mapper(const std::string filename, bool is_plain=false, bool delete_on_close=false)
bool operator!=(const container &v) const
auto cbegin() const -> typename int_vector< t_width >::const_iterator
value_type get_int(size_type idx, const uint8_t len=64) const
int_vector_mapper(const std::string &key, const cache_config &config)
std::string file_name() const
void bit_resize(const size_type bit_size)
const uint64_t * data() const
bool operator==(const int_vector< t_width > &v) const
auto begin() -> typename int_vector< t_width >::iterator
const size_type append_block_size
void width(const uint8_t new_int_width)
void set_int(size_type idx, value_type x, const uint8_t len=64)
int_vector< t_width >::int_width_type width_type
int_vector_mapper(int_vector_mapper &&ivm)
static constexpr uint8_t fixed_int_width
auto operator[](const size_type &idx) -> typename int_vector< t_width >::reference
int_vector_mapper(const int_vector_mapper &)=delete
int_vector_mapper & operator=(int_vector_mapper &&ivm)
bool operator==(const int_vector_mapper &v) const
auto end() const -> typename int_vector< t_width >::const_iterator
int_vector_mapper & operator=(const int_vector_mapper &)=delete
void push_back(value_type x)
auto cend() const -> typename int_vector< t_width >::const_iterator
auto end() -> typename int_vector< t_width >::iterator
auto operator[](const size_type &idx) const -> typename int_vector< t_width >::const_reference
bool operator==(const container &v) const
int_vector< t_width >::size_type size_type
auto begin() const -> typename int_vector< t_width >::const_iterator
int_vector< t_width >::difference_type difference_type
void resize(const size_type size)
int_vector< t_width >::value_type value_type
A proxy class that acts as a reference to an integer of length len bits in a int_vector.
Definition: int_vector.hpp:845
A generic vector class for integers of width .
Definition: int_vector.hpp:216
iterator end() noexcept
Iterator that points to the element after the last element of int_vector.
Definition: int_vector.hpp:764
void flip()
Flip all bits of bit_vector.
Definition: int_vector.hpp:779
bool empty() const noexcept
Equivalent to size() == 0.
Definition: int_vector.hpp:500
value_type get_int(size_type idx, const uint8_t len=64) const
Get the integer value of the binary string of length len starting at position idx in the int_vector.
int_vector_size_type size_type
Definition: int_vector.hpp:229
ptrdiff_t difference_type
Definition: int_vector.hpp:228
int_vector_trait< t_width >::const_reference const_reference
Definition: int_vector.hpp:225
int_vector_trait< t_width >::int_width_type int_width_type
Definition: int_vector.hpp:230
size_type bit_size() const noexcept
The number of bits in the int_vector.
Definition: int_vector.hpp:547
uint8_t width() const noexcept
Returns the width of the integers which are accessed via the [] operator.
Definition: int_vector.hpp:595
size_type size() const noexcept
The number of elements in the int_vector.
int_vector_trait< t_width >::value_type value_type
Definition: int_vector.hpp:221
static size_t read_header(int_vector_size_type &size, int_width_type &int_width, std::istream &in)
Read the size and int_width of a int_vector.
Definition: int_vector.hpp:789
const uint64_t * data() const noexcept
Pointer to the raw data of the int_vector.
Definition: int_vector.hpp:566
static uint64_t write_header(uint64_t size, uint8_t int_width, std::ostream &out)
Write the size and int_width of a int_vector.
Definition: int_vector.hpp:806
void set_int(size_type idx, value_type x, const uint8_t len=64)
Set the bits from position idx to idx+len-1 to the binary representation of integer x.
iterator begin() noexcept
Iterator that points to the first element of the int_vector.
Definition: int_vector.hpp:759
bool is_open()
Is the stream close?
Definition: sfstream.hpp:222
static void * mmap_file(int fd, uint64_t file_size, std::ios_base::openmode mode)
static int close_file_for_mmap(int fd)
static int mem_unmap(int fd, void *addr, const uint64_t size)
static int open_file_for_mmap(std::string &filename, std::ios_base::openmode mode)
static int truncate_file_mmap(int fd, const uint64_t new_size)
osfstream & seekp(pos_type pos)
Definition: sfstream.hpp:111
static int_vector_mapper< t_width > create()
static int_vector_mapper< t_width > create(const std::string &file_name)
static int_vector_mapper< t_width > create(const cache_config &config)
static int_vector_mapper< t_width > create(const std::string &file_name)
static int_vector_mapper< t_width > create(const std::string &key, cache_config &config)
static int_vector_mapper< t_width > create(const std::string &file_name, size_t size, uint8_t int_width=t_width)
int_vector.hpp contains the sdsl::int_vector class.
memory_management.hpp contains two function for allocating and deallocating memory
int close(const int fd)
Get fd for file.
Definition: ram_fs.hpp:110
Get the size of a file in bytes size_t file_size(const std::string &file)
Definition: util.hpp:183
uint64_t pid()
std::string to_string(const T &t, int w=1)
Namespace for the succinct data structure library.
std::string cache_file_name(const std::string &key, const cache_config &config)
Returns the file name of the resource.
Definition: io.hpp:630
void register_cache_file(const std::string &key, cache_config &config)
Register the existing resource specified by the key to the cache.
Definition: io.hpp:656
bool store_to_file(const T &v, const std::string &file)
Store a data structure to a file.
Definition: io.hpp:798
int remove(const std::string &)
Remove a file.
Definition: ram_fs.hpp:194
int_vector ::size_type size(const range_type &r)
Size of a range.
Definition: wt_helper.hpp:787
static constexpr uint64_t cnt(uint64_t x)
Counts the number of set bits in x.
Definition: bits.hpp:484
static constexpr uint64_t lo_set[65]
lo_set[i] is a 64-bit word with the i least significant bits set and the high bits not set.
Definition: bits.hpp:187
Helper class for construction process.
Definition: config.hpp:67
std::string dir
Definition: config.hpp:71