SDSL 3.0.1
Succinct Data Structure Library
bp_support_gg.hpp
Go to the documentation of this file.
1// Copyright (c) 2016, the SDSL Project Authors. All rights reserved.
2// Please see the AUTHORS file for details. Use of this source code is governed
3// by a BSD license that can be found in the LICENSE file.
8#ifndef INCLUDED_SDSL_BP_SUPPORT_GG
9#define INCLUDED_SDSL_BP_SUPPORT_GG
10
11#include <map>
12#include <set>
13#include <stack>
14#include <stdexcept>
15#include <utility>
16
18#include <sdsl/int_vector.hpp>
20#include <sdsl/rank_support.hpp>
22#include <sdsl/util.hpp>
23
24namespace sdsl
25{
26
28
53// TODO: can rrr_vector replace nearest_neighbour_dictionary?
54template <class t_nnd = nearest_neighbour_dictionary<30>,
55 class t_rank = rank_support_v5<>,
56 class t_select = select_support_mcl<>,
57 uint32_t t_bs = 840>
59{
60 static_assert(t_bs > 2, "bp_support_gg: block size must be greater than 2.");
61
62 public:
64 typedef t_nnd nnd_type;
65 typedef t_rank rank_type;
66 typedef t_select select_type;
68
69 private:
70 const bit_vector * m_bp = nullptr; // balanced parentheses sequence
71 rank_type m_rank_bp; // rank support for m_bp => see excess() and rank()
72 select_type m_select_bp; // select support for m_bp => see select()
73
74 nnd_type m_nnd; // nearest neighbour dictionary for pioneers bit_vector
75
76 bit_vector m_pioneer_bp; // pioneer sequence
77 std::unique_ptr<bp_support_type> m_pioneer_bp_support = nullptr;
78
79 size_type m_size = 0;
80 size_type m_blocks = 0; // number of blocks
81
82 public:
83 const rank_type & bp_rank = m_rank_bp;
84 const select_type & bp_select = m_select_bp;
85
86 bp_support_gg() = default;
87
89 explicit bp_support_gg(const bit_vector * bp)
90 : m_bp(bp)
91 , m_size(bp == nullptr ? 0 : bp->size())
92 , m_blocks((m_size + t_bs - 1) / t_bs)
93 , bp_rank(m_rank_bp)
94 , bp_select(m_select_bp)
95 {
96 if (bp == nullptr)
97 { // -> m_bp == nullptr
98 return;
99 }
100
101 util::init_support(m_rank_bp, bp);
102 util::init_support(m_select_bp, bp);
103 {
104 bit_vector pioneer;
105 pioneer = calculate_pioneers_bitmap_succinct(*m_bp, t_bs);
106 m_nnd = nnd_type(pioneer);
107 }
108
109 m_pioneer_bp.resize(m_nnd.ones());
110 if (m_nnd.ones() > 0 and m_nnd.ones() == m_bp->size())
111 { // m_bp != nullptr see above
112 throw std::logic_error(util::demangle(typeid(this).name()) +
113 ": recursion in the construction does not terminate!");
114 }
115
116 for (size_type i = 1; i <= m_nnd.ones(); ++i) { m_pioneer_bp[i - 1] = (*m_bp)[m_nnd.select(i)]; }
117
118 if (m_bp->size() > 0)
119 { // m_bp != nullptr see above
120 m_pioneer_bp_support = std::unique_ptr<bp_support_type>(new bp_support_type(&m_pioneer_bp));
121 }
122 }
123
126 : m_rank_bp(v.m_rank_bp)
127 , m_select_bp(v.m_select_bp)
128 , m_nnd(v.m_nnd)
129 , m_pioneer_bp(v.m_pioneer_bp)
130 , m_size(v.m_size)
131 , m_blocks(v.m_blocks)
132 {
133
134 m_rank_bp.set_vector(m_bp);
135 m_select_bp.set_vector(m_bp);
136
137 m_pioneer_bp_support.reset(nullptr);
138 if (v.m_pioneer_bp_support != nullptr)
139 {
140 m_pioneer_bp_support.reset(new bp_support_type(*(v.m_pioneer_bp_support)));
141 m_pioneer_bp_support->set_vector(&m_pioneer_bp);
142 }
143 }
144
146 bp_support_gg(bp_support_gg && bp_support) { *this = std::move(bp_support); }
147
149 ~bp_support_gg() = default;
150
153 {
154 if (this != &v)
155 {
156 bp_support_gg tmp(v);
157 *this = std::move(tmp);
158 }
159 return *this;
160 }
161
164 {
165 if (this != &bp_support)
166 {
167 m_bp = bp_support.m_bp;
168 bp_support.m_bp = nullptr;
169
170 m_rank_bp = std::move(bp_support.m_rank_bp);
171 m_rank_bp.set_vector(m_bp);
172 m_select_bp = std::move(bp_support.m_select_bp);
173 m_select_bp.set_vector(m_bp);
174
175 m_nnd = std::move(bp_support.m_nnd);
176
177 m_size = bp_support.m_size;
178 m_blocks = bp_support.m_blocks;
179
180 m_pioneer_bp = std::move(bp_support.m_pioneer_bp);
181
182 m_pioneer_bp_support.reset(nullptr);
183 if (bp_support.m_pioneer_bp_support != nullptr)
184 {
185 std::swap(m_pioneer_bp_support, bp_support.m_pioneer_bp_support);
186 m_pioneer_bp_support->set_vector(&m_pioneer_bp);
187 }
188 }
189 return *this;
190 }
191
192 void set_vector(const bit_vector * bp)
193 {
194 m_bp = bp;
195 m_rank_bp.set_vector(bp);
196 m_select_bp.set_vector(bp);
197 }
198
202 inline size_type excess(size_type i) const { return (m_rank_bp(i + 1) << 1) - i - 1; }
203
207 size_type rank(size_type i) const { return m_rank_bp(i + 1); }
208
214 size_type select(size_type i) const { return m_select_bp(i); }
215
223 {
224 assert(i < m_size);
225 if (!(*m_bp)[i])
226 { // if there is a closing parenthesis at index i return i
227 return i;
228 }
229 size_type mi = 0; // match for i
230 if ((mi = near_find_closing(*m_bp, i + 1, 1, t_bs)) == i)
231 {
232 const size_type i_ = m_nnd.rank(i + 1) - 1; // lemma that this gives us an opening pioneer
233 assert(m_pioneer_bp[i_] == 1); // assert that i2 is an opening parenthesis
234 size_type mi_ = m_pioneer_bp_support->find_close(i_);
235 assert(m_pioneer_bp[mi_] == 0);
236 mi = m_nnd.select(mi_ + 1); /* matching pioneer position in bp */
237 assert((*m_bp)[mi] == 0);
238 mi = (mi / t_bs) * t_bs;
239 size_type epb2 = excess(mi - 1); // excess of first parenthesis in the pioneer block
240 const size_type ei = excess(i); // excess at position i
241 /* invariant: epb >= ei-1 */ // assert( epb+1 >= ei );
242 return near_find_closing(*m_bp, mi, epb2 - ei + 1, t_bs);
243 }
244 return mi;
245 }
246
248
255 {
256 assert(i < m_size);
257 if ((*m_bp)[i])
258 { // if there is a opening parenthesis
259 return i; // return i
260 }
261 size_type mi = 0; // match for i
262 if ((mi = near_find_opening(*m_bp, i - 1, 1, t_bs)) == i)
263 {
264 const size_type i_ = m_nnd.rank(i); // lemma that this gives us an closing pioneer
265 assert(m_pioneer_bp[i_] == 0); // assert that i' is an opening parenthesis
266 const size_type mi_ = m_pioneer_bp_support->find_open(i_);
267 assert(m_pioneer_bp[mi_] == 1);
268 mi = m_nnd.select(mi_ + 1); /* matching pioneer position in bp */
269 assert((*m_bp)[mi] == 1);
270 mi = (mi / t_bs) * t_bs + t_bs - 1;
271 assert(mi < i);
272 size_type epb2 = excess(mi + 1); // excess of last parenthesis in the pioneer block
273 const size_type ei = excess(i); // excess at position i
274 /*invariant: epb >= ei+1*/ // assert( epb >= ei+1 );
275 return near_find_opening(*m_bp, mi, epb2 - ei + 1 - 2 * ((*m_bp)[mi + 1]), t_bs);
276 }
277 return mi;
278 }
279
281
287 {
288 assert(i < m_size);
289 if (!(*m_bp)[i])
290 { // if there is closing parenthesis at position i
291 return find_open(i);
292 }
293 const size_type exi = excess(i);
294 if (exi == 1) // if i is not enclosed by a parentheses pair..
295 return size();
296 size_type ei; // enclose for i
297 if ((ei = near_find_opening(*m_bp, i - 1, 1, t_bs)) == i)
298 {
299 const size_type i_ = m_nnd.rank(i); // next parenthesis in the pioneer bitmap
300 size_type ei_; // enclose for i'
301 ei_ = m_pioneer_bp_support->enclose(i_);
302 assert(m_pioneer_bp[ei_] == 1);
303 ei = m_nnd.select(ei_ + 1);
304 assert((*m_bp)[ei] == 1);
305 ei = (ei / t_bs) * t_bs + t_bs - 1;
306 assert(ei < i);
307 size_type epb2 = excess(ei + 1); // excess of last parenthesis in the pioneer block
308 /* invariant epb+1 >= exi */ // assert( epb+1 >= exi );
309 return near_find_opening(*m_bp, ei, epb2 - exi + 1 + 2 * ((*m_bp)[ei + 1] == 0), t_bs);
310 }
311 return ei;
312 }
313
315
325 size_type rr_enclose(const size_type i, const size_type j) const
326 {
327 assert(j < m_size);
328 assert((*m_bp)[i] == 1 and (*m_bp)[j] == 1);
329 const size_type mip1 = find_close(i) + 1;
330 if (mip1 >= j) return size();
331 return rmq_open(mip1, j);
332 }
333
342 size_type rmq_open(const size_type l, const size_type r) const
343 {
344 if (l >= r) return size();
345 size_type min_ex_pos = r;
346
347 if (l / t_bs == r / t_bs) { min_ex_pos = near_rmq_open(*m_bp, l, r); }
348 else
349 { // parentheses pair does not start in the same block
350 // note: l and r are not in the same block
351 size_type k, ex; // helper variables
352 size_type min_ex = excess(r) + 2 * ((*m_bp)[r] == 0); // minimal excess
353
354 // 1.2
355 size_type l_ = m_nnd.rank(l); // l_ inclusive
356 size_type r_ = m_nnd.rank(r); // r_ exclusive
357
358 size_type min_ex_pos_ = m_pioneer_bp_support->rmq_open(l_, r_);
359 if (min_ex_pos_ < r_)
360 {
361 k = m_nnd.select(min_ex_pos_ + 1);
362 min_ex = excess(k);
363 min_ex_pos = k;
364 }
365 else
366 {
367 // 1.1
368 k = near_rmq_open(*m_bp, (r / t_bs) * t_bs, r);
369 if (k < r)
370 {
371 assert(excess(k) < min_ex);
372 min_ex = excess(k);
373 min_ex_pos = k;
374 }
375 }
376 // 1.3
377 k = near_rmq_open(*m_bp, l, (l / t_bs + 1) * t_bs);
378 if (k < (l / t_bs + 1) * t_bs and (ex = excess(k)) < min_ex)
379 {
380 min_ex = ex;
381 min_ex_pos = k;
382 }
383 }
384 // 1.4
385 if (min_ex_pos < r)
386 return min_ex_pos;
387 else
388 return size();
389 }
390
392
400 {
401 assert(j > i and j < m_size);
402 assert((*m_bp)[i] == 1 and (*m_bp)[j] == 1);
403 size_type mi = find_close(i); // matching parenthesis to i
404 assert(mi > i and mi < j);
405 assert(find_close(j) > j);
406 size_type k = enclose(j);
407 if (k == m_size or k < i) // there exists no opening parenthesis at position mi<k<j.
408 return m_size;
409 size_type kk;
410 do {
411 kk = k;
412 k = enclose(k);
413 } while (k != m_size and k > mi);
414 return kk;
415 }
416
418
423 {
424 assert(l <= r);
425 size_type m = rmq_open(l, r + 1);
426 if (m == size())
427 return r;
428 else if (m == l)
429 return l;
430 else
431 { // m>l and m<=r
432 assert(0 == (*m_bp)[m - 1]);
433 return m - 1;
434 }
435 }
436
438
444 {
445 assert(j > i);
446 assert((*m_bp)[i] == 1 and (*m_bp)[j] == 1);
447 size_type k = rr_enclose(i, j);
448 if (k == size())
449 return enclose(j);
450 else
451 return enclose(k);
452 }
453
455
458 {
459 assert(i < m_size);
460 if (!i) return 0;
461 size_type ones = m_rank_bp(i);
462 if (ones)
463 { // ones > 0
464 assert(m_select_bp(ones) < i);
465 return i - m_select_bp(ones) - 1;
466 }
467 else
468 {
469 return i;
470 }
471 }
472
476 size_type size() const { return m_size; }
477
479
483 size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const
484 {
485 structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
486 size_type written_bytes = 0;
487 written_bytes += write_member(m_size, out, child, "size");
488 written_bytes += write_member(m_blocks, out, child, "block_cnt");
489
490 written_bytes += m_rank_bp.serialize(out, child, "bp_rank");
491 written_bytes += m_select_bp.serialize(out, child, "bp_select");
492 written_bytes += m_nnd.serialize(out, child, "nearest_neighbour_dictionary");
493
494 written_bytes += m_pioneer_bp.serialize(out, child, "pioneer_bp");
495 if (m_bp != nullptr and m_bp->size() > 0)
496 written_bytes += m_pioneer_bp_support->serialize(out, child, "pioneer_bp_support");
497 structure_tree::add_size(child, written_bytes);
498 return written_bytes;
499 }
500
502
506 void load(std::istream & in, const bit_vector * bp)
507 {
508 m_bp = bp;
509 read_member(m_size, in);
510 read_member(m_blocks, in);
511
512 m_rank_bp.load(in, m_bp);
513 m_select_bp.load(in, m_bp);
514 m_nnd.load(in);
515
516 m_pioneer_bp.load(in);
517 m_pioneer_bp_support.reset(nullptr);
518 if (m_bp != nullptr and m_bp->size() > 0)
519 {
520 m_pioneer_bp_support.reset(new bp_support_type());
521 m_pioneer_bp_support->load(in, &m_pioneer_bp);
522 }
523 }
524
525 template <typename archive_t>
526 void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
527 {
528 ar(CEREAL_NVP(m_size));
529 ar(CEREAL_NVP(m_blocks));
530 ar(CEREAL_NVP(m_rank_bp));
531 ar(CEREAL_NVP(m_select_bp));
532 ar(CEREAL_NVP(m_nnd));
533 ar(CEREAL_NVP(m_pioneer_bp));
534 ar(CEREAL_NVP(m_pioneer_bp_support));
535 }
536
537 template <typename archive_t>
538 void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
539 {
540 ar(CEREAL_NVP(m_size));
541 ar(CEREAL_NVP(m_blocks));
542 ar(CEREAL_NVP(m_rank_bp));
543 ar(CEREAL_NVP(m_select_bp));
544 ar(CEREAL_NVP(m_nnd));
545 ar(CEREAL_NVP(m_pioneer_bp));
546 ar(CEREAL_NVP(m_pioneer_bp_support));
547 if (m_pioneer_bp_support != nullptr) { m_pioneer_bp_support->set_vector(&m_pioneer_bp); }
548 }
549
551 bool operator==(bp_support_gg const & other) const noexcept
552 {
553 return (m_size == other.m_size) && (m_blocks == other.m_blocks) && (m_rank_bp == other.m_rank_bp) &&
554 (m_select_bp == other.m_select_bp) && (m_nnd == other.m_nnd) && (m_pioneer_bp == other.m_pioneer_bp) &&
555 ((m_pioneer_bp_support == other.m_pioneer_bp_support) ||
556 (*m_pioneer_bp_support == *other.m_pioneer_bp_support));
557 }
558
560 bool operator!=(bp_support_gg const & other) const noexcept { return !(*this == other); }
561};
562
563} // namespace sdsl
564
565#endif
bp_support_algorithm.hpp contains algorithms for balanced parentheses sequences.
#define CEREAL_NVP(X)
Definition: cereal.hpp:31
A class that provides support for bit_vectors that represent a BP sequence.
size_type rr_enclose(const size_type i, const size_type j) const
Range restricted enclose operation.
size_type preceding_closing_parentheses(size_type i) const
Return the number of zeros which procede position i in the balanced parentheses sequence.
bp_support_gg & operator=(const bp_support_gg &v)
Assignment operator.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the bp_support_gg to a stream.
const select_type & bp_select
size_type size() const
The size of the supported balanced parentheses sequence.
void CEREAL_LOAD_FUNCTION_NAME(archive_t &ar)
size_type rmq_open(const size_type l, const size_type r) const
Search the interval [l,r-1] for an opening parenthesis, say i, such that find_close(i) >= r.
void set_vector(const bit_vector *bp)
size_type double_enclose(size_type i, size_type j) const
The double enclose operation.
size_type find_open(size_type i) const
Calculate the matching opening parenthesis.
size_type rank(size_type i) const
Returns the number of opening parentheses up to and including index i.
bp_support_gg & operator=(bp_support_gg &&bp_support)
Assignment Move operator.
void CEREAL_SAVE_FUNCTION_NAME(archive_t &ar) const
size_type rr_enclose_naive(size_type i, size_type j) const
The range restricted enclose operation.
size_type find_close(size_type i) const
Calculate the index of the matching closing parenthesis to the parenthesis at index i.
bp_support_gg< nnd_type, rank_type, select_support_scan<>, t_bs > bp_support_type
size_type rmq(size_type l, size_type r) const
The range minimum query (rmq) returns the index of the parenthesis with minimal excess in the range .
const rank_type & bp_rank
size_type excess(size_type i) const
Calculates the excess value at index i.
void load(std::istream &in, const bit_vector *bp)
Load the bp_support_gg for a bit_vector v.
bp_support_gg()=default
bp_support_gg(const bit_vector *bp)
Constructor.
bool operator==(bp_support_gg const &other) const noexcept
Equality operator.
bit_vector::size_type size_type
bool operator!=(bp_support_gg const &other) const noexcept
Inequality operator.
bp_support_gg(bp_support_gg &&bp_support)
Move constructor.
size_type enclose(size_type i) const
Calculate enclose.
~bp_support_gg()=default
Destructor.
size_type select(size_type i) const
Returns the index of the i-th opening parenthesis.
bp_support_gg(const bp_support_gg &v)
Copy constructor.
int_vector_size_type size_type
Definition: int_vector.hpp:229
void load(std::istream &in)
Load the int_vector for a stream.
size_type size() const noexcept
The number of elements in the int_vector.
size_type serialize(std::ostream &out, structure_tree_node *v=nullptr, std::string name="") const
Serializes the int_vector to a stream.
void resize(const size_type size)
Resize the int_vector in terms of elements.
Definition: int_vector.hpp:521
static structure_tree_node * add_child(structure_tree_node *v, const std::string &name, const std::string &type)
static void add_size(structure_tree_node *v, uint64_t value)
int_vector.hpp contains the sdsl::int_vector class.
Namespace for the succinct data structure library.
uint64_t near_find_closing(const bit_vector &bp, uint64_t i, uint64_t closings, const uint64_t block_size)
uint64_t near_rmq_open(const bit_vector &bp, const uint64_t begin, const uint64_t end)
void swap(int_vector_reference< t_int_vector > x, int_vector_reference< t_int_vector > y) noexcept
Definition: int_vector.hpp:946
size_t write_member(const T &t, std::ostream &out, sdsl::structure_tree_node *v=nullptr, std::string name="")
Definition: io.hpp:84
void read_member(T &t, std::istream &in)
Definition: io.hpp:111
uint64_t near_find_opening(const bit_vector &bp, uint64_t i, const uint64_t openings, const uint64_t block_size)
bit_vector calculate_pioneers_bitmap_succinct(const bit_vector &bp, uint64_t block_size)
Space-efficient version of calculate_pioneers_bitmap.
nearest_neighbour_dictionary.hpp contains a class which supports rank/select for sparse populated sds...
rank_support.hpp contains classes that support a sdsl::bit_vector with constant time rank information...
select_support.hpp contains classes that support a sdsl::bit_vector with constant time select informa...
util.hpp contains some helper methods for int_vector and other stuff like demangle class names.