SDSL 3.0.1
Succinct Data Structure Library
construct_bwt.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
8#ifndef INCLUDED_SDSL_CONSTRUCT_BWT
9#define INCLUDED_SDSL_CONSTRUCT_BWT
10
11#include <iostream>
12#include <list>
13#include <stdexcept>
14
15#include <sdsl/config.hpp> // for cache_config
16#include <sdsl/int_vector.hpp>
17#include <sdsl/sfstream.hpp>
18#include <sdsl/util.hpp>
19
20namespace sdsl
21{
22
24
35template <uint8_t t_width>
37{
38 static_assert(t_width == 0 or t_width == 8,
39 "construct_bwt: width must be `0` for integer alphabet and `8` for byte alphabet");
40
44
45 // (1) Load text from disk
47 size_type n = text.size();
48 uint8_t bwt_width = text.width();
49 std::string bwt_file = cache_file_name(KEY_BWT, config);
50
51 auto gen_bwt = [&n](auto & bwt, auto & text, auto & sa) {
52 size_type to_add[2] = { (size_type)-1, n - 1 };
53 for (size_type i = 0; i < n; ++i) { bwt[i] = text[sa[i] + to_add[sa[i] == 0]]; }
54 };
55 // (2) Prepare to stream SA from disc and BWT to disc
56 if (is_ram_file(bwt_file))
57 {
59 auto bwt = write_out_mapper<t_width>::create(bwt_file, n, bwt_width);
60 gen_bwt(bwt, text, sa);
61 }
62 else
63 {
64 size_type buffer_size = 1000000; // buffer_size is a multiple of 8!
65 std::string sa_file = cache_file_name(conf::KEY_SA, config);
66 int_vector_buffer<> sa_buf(sa_file, std::ios::in, buffer_size);
67 auto bwt = write_out_mapper<t_width>::create(bwt_file, n, bwt_width);
68 // (3) Construct BWT sequentially by streaming SA and random access to text
69 gen_bwt(bwt, text, sa_buf);
70 }
72}
73
74} // namespace sdsl
75
76#endif
A generic vector class for integers of width .
Definition: int_vector.hpp:216
static int_vector_mapper< t_width > create(const std::string &key, cache_config &config)
int_vector.hpp contains the sdsl::int_vector class.
constexpr char KEY_SA[]
Definition: config.hpp:37
constexpr char KEY_TEXT[]
Definition: config.hpp:41
constexpr char KEY_BWT[]
Definition: config.hpp:35
int_vector ::size_type size_type
Namespace for the succinct data structure library.
std::string cache_file_name(const std::string &key, const cache_config &config)
Returns the file name of the resource.
Definition: io.hpp:630
bool is_ram_file(const std::string &file)
Determines if the given file is a RAM-file.
Definition: ram_fs.hpp:158
void register_cache_file(const std::string &key, cache_config &config)
Register the existing resource specified by the key to the cache.
Definition: io.hpp:656
void construct_bwt(cache_config &config)
Constructs the Burrows and Wheeler Transform (BWT) from text over byte- or integer-alphabet and suffi...
sfstream.hpp contains a two stream class which can be used to read/write from/to files or strings.
Helper class for construction process.
Definition: config.hpp:67
Helper classes to transform width=0 and width=8 to corresponding bwt key.
Definition: config.hpp:110
Helper classes to transform width=0 and width=8 to corresponding text key.
Definition: config.hpp:91
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.