SeqAn3 3.3.0-rc.1
The Modern C++ library for sequence analysis.
bitpacked_sequence.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <concepts>
16#include <iterator>
17#include <ranges>
18#include <type_traits>
19
20#include <sdsl/int_vector.hpp>
21
29
30namespace seqan3
31{
32
63template <writable_semialphabet alphabet_type>
64 requires std::regular<alphabet_type>
66{
67private:
69 static constexpr size_t bits_per_letter = detail::ceil_log2(alphabet_size<alphabet_type>);
70
71 static_assert(bits_per_letter <= 64, "alphabet must be representable in at most 64bit.");
72
74 using data_type = sdsl::int_vector<bits_per_letter>;
75
78
80 class reference_proxy_type : public alphabet_proxy<reference_proxy_type, alphabet_type>
81 {
82 private:
86 friend base_t;
87
89 std::ranges::range_reference_t<data_type> internal_proxy;
90
92 constexpr void on_update() noexcept
93 {
95 }
96
97 public:
98 // Import from base:
99 using base_t::operator=;
100
106 constexpr reference_proxy_type(reference_proxy_type const &) noexcept = default;
107 constexpr reference_proxy_type(reference_proxy_type &&) noexcept = default;
108 constexpr reference_proxy_type & operator=(reference_proxy_type const &) noexcept = default;
109 constexpr reference_proxy_type & operator=(reference_proxy_type &&) noexcept = default;
110 ~reference_proxy_type() noexcept = default;
111
113 reference_proxy_type(std::ranges::range_reference_t<data_type> const internal) noexcept :
114 internal_proxy{internal}
115 {
116 // Call alphabet_base's assign_rank to prevent calling on_update() during construction
117 // which is not necessary, because internal_proxy is already correctly initialised!
118 base_t::base_t::assign_rank(static_cast<alphabet_rank_t<alphabet_type>>(internal));
119 }
121 };
122
125 //NOTE(h-2): it is entirely unclear to me why we need this
126 template <typename t>
127 requires std::is_same_v<std::ranges::range_value_t<std::remove_cvref_t<t>>, alphabet_type>
128 static constexpr bool has_same_value_type_v = true;
130
131public:
139 using value_type = alphabet_type;
150 using const_reference = alphabet_type;
165 using difference_type = std::ranges::range_difference_t<data_type>;
170 using size_type = std::ranges::range_size_t<data_type>;
172
176 bitpacked_sequence() = default;
177 constexpr bitpacked_sequence(bitpacked_sequence const &) = default;
178 constexpr bitpacked_sequence(bitpacked_sequence &&) = default;
179 constexpr bitpacked_sequence & operator=(bitpacked_sequence const &) = default;
182
198 template <typename other_range_t>
199 requires (!std::same_as<bitpacked_sequence, std::remove_cvref_t<other_range_t>>)
200 && std::ranges::input_range<other_range_t> && has_same_value_type_v<other_range_t>
201 explicit bitpacked_sequence(other_range_t && range) :
202 bitpacked_sequence{std::ranges::begin(range), std::ranges::end(range)}
203 {}
204
220 {}
221
239 template <std::forward_iterator begin_iterator_type, typename end_iterator_type>
240 bitpacked_sequence(begin_iterator_type begin_it, end_iterator_type end_it)
241 requires std::sentinel_for<end_iterator_type, begin_iterator_type>
242 && std::common_reference_with<std::iter_value_t<begin_iterator_type>, value_type>
243 {
244 insert(cend(), begin_it, end_it);
245 }
246
261 {}
262
277 {
278 assign(std::begin(ilist), std::end(ilist));
279 return *this;
280 }
281
297 template <std::ranges::input_range other_range_t>
298 void assign(other_range_t && range)
299 requires std::common_reference_with<std::ranges::range_value_t<other_range_t>, value_type>
300 {
301 bitpacked_sequence rhs{std::forward<other_range_t>(range)};
302 swap(rhs);
303 }
304
319 void assign(size_type const count, value_type const value)
320 {
321 bitpacked_sequence rhs{count, value};
322 swap(rhs);
323 }
324
342 template <std::forward_iterator begin_iterator_type, typename end_iterator_type>
343 void assign(begin_iterator_type begin_it, end_iterator_type end_it)
344 requires std::sentinel_for<end_iterator_type, begin_iterator_type>
345 && std::common_reference_with<std::iter_value_t<begin_iterator_type>, value_type>
346 {
347 bitpacked_sequence rhs{begin_it, end_it};
348 swap(rhs);
349 }
350
365 {
366 assign(std::begin(ilist), std::end(ilist));
367 }
368
370
389 iterator begin() noexcept
390 {
391 return iterator{*this};
392 }
393
395 const_iterator begin() const noexcept
396 {
397 return const_iterator{*this};
398 }
399
401 const_iterator cbegin() const noexcept
402 {
403 return const_iterator{*this};
404 }
405
421 iterator end() noexcept
422 {
423 return iterator{*this, size()};
424 }
425
427 const_iterator end() const noexcept
428 {
429 return const_iterator{*this, size()};
430 }
431
433 const_iterator cend() const noexcept
434 {
435 return const_iterator{*this, size()};
436 }
438
458 {
459 if (i >= size()) // [[unlikely]]
460 {
461 throw std::out_of_range{"Trying to access element behind the last in bitpacked_sequence."};
462 }
463 return (*this)[i];
464 }
465
468 {
469 if (i >= size()) // [[unlikely]]
470 {
471 throw std::out_of_range{"Trying to access element behind the last in bitpacked_sequence."};
472 }
473 return (*this)[i];
474 }
475
494 {
495 assert(i < size());
496 return data[i];
497 }
498
500 const_reference operator[](size_type const i) const noexcept
501 {
502 assert(i < size());
503 return assign_rank_to(data[i], const_reference{});
504 }
505
521 reference front() noexcept
522 {
523 assert(size() > 0);
524 return (*this)[0];
525 }
526
528 const_reference front() const noexcept
529 {
530 assert(size() > 0);
531 return (*this)[0];
532 }
533
549 reference back() noexcept
550 {
551 assert(size() > 0);
552 return (*this)[size() - 1];
553 }
554
556 const_reference back() const noexcept
557 {
558 assert(size() > 0);
559 return (*this)[size() - 1];
560 }
561
571 constexpr data_type & raw_data() noexcept
572 {
573 return data;
574 }
575
577 constexpr data_type const & raw_data() const noexcept
578 {
579 return data;
580 }
582
599 bool empty() const noexcept
600 {
601 return size() == 0;
602 }
603
617 size_type size() const noexcept
618 {
619 return data.size();
620 }
621
638 size_type max_size() const noexcept
639 {
640 return data.max_size();
641 }
642
656 size_type capacity() const noexcept
657 {
658 return data.capacity();
659 }
660
681 void reserve(size_type const new_cap)
682 {
683 data.reserve(new_cap);
684 }
685
704 {
705 data.shrink_to_fit();
706 }
708
724 void clear() noexcept
725 {
726 data.clear();
727 }
728
750 {
751 return insert(pos, 1, value);
752 }
753
776 {
777 auto const pos_as_num = std::distance(cbegin(), pos); // we want to insert BEFORE this position
778
779 data.insert(data.begin() + pos_as_num, count, to_rank(value));
780
781 return begin() + pos_as_num;
782 }
783
810 template <std::forward_iterator begin_iterator_type, typename end_iterator_type>
811 iterator insert(const_iterator pos, begin_iterator_type begin_it, end_iterator_type end_it)
812 requires std::sentinel_for<end_iterator_type, begin_iterator_type>
813 && std::common_reference_with<std::iter_value_t<begin_iterator_type>, value_type>
814 {
815 auto const pos_as_num = std::distance(cbegin(), pos);
816
817 auto v = std::ranges::subrange<begin_iterator_type, end_iterator_type>{begin_it, end_it}
818 | views::convert<value_type> | views::to_rank;
819 data.insert(data.begin() + pos_as_num, std::ranges::begin(v), std::ranges::end(v));
820
821 return begin() + pos_as_num;
822 }
823
845 {
846 return insert(pos, ilist.begin(), ilist.end());
847 }
848
871 {
872 if (begin_it >= end_it) // [[unlikely]]
873 return begin() + std::distance(cbegin(), end_it);
874
875 auto const begin_it_pos = std::distance(cbegin(), begin_it);
876 auto const end_it_pos = std::distance(cbegin(), end_it);
877
878 data.erase(data.cbegin() + begin_it_pos, data.cbegin() + end_it_pos);
879
880 return begin() + begin_it_pos;
881 }
882
905 {
906 return erase(pos, pos + 1);
907 }
908
926 void push_back(value_type const value)
927 {
928 data.push_back(to_rank(value));
929 }
930
949 void pop_back()
950 {
951 assert(size() > 0);
952 data.pop_back();
953 }
954
984 {
985 assert(count < max_size());
986 data.resize(count);
987 }
988
993 void resize(size_type const count, value_type const value)
994 {
995 assert(count < max_size());
996 data.resize(count, to_rank(value));
997 }
998
1012 constexpr void swap(bitpacked_sequence & rhs) noexcept
1013 {
1014 std::swap(data, rhs.data);
1015 }
1016
1018 constexpr void swap(bitpacked_sequence && rhs) noexcept
1019 {
1020 std::swap(data, rhs.data);
1021 }
1022
1037 friend constexpr void swap(bitpacked_sequence & lhs, bitpacked_sequence & rhs) noexcept
1038 {
1039 std::swap(lhs, rhs);
1040 }
1041
1043 friend constexpr void swap(bitpacked_sequence && lhs, bitpacked_sequence && rhs) noexcept
1044 {
1045 std::swap(lhs, rhs);
1046 }
1048
1057 constexpr bool operator==(bitpacked_sequence const & rhs) const noexcept
1058 {
1059 return data == rhs.data;
1060 }
1061
1066 constexpr bool operator!=(bitpacked_sequence const & rhs) const noexcept
1067 {
1068 return data != rhs.data;
1069 }
1070
1075 constexpr bool operator<(bitpacked_sequence const & rhs) const noexcept
1076 {
1077 return data < rhs.data;
1078 }
1079
1084 constexpr bool operator>(bitpacked_sequence const & rhs) const noexcept
1085 {
1086 return data > rhs.data;
1087 }
1088
1093 constexpr bool operator<=(bitpacked_sequence const & rhs) const noexcept
1094 {
1095 return data <= rhs.data;
1096 }
1097
1102 constexpr bool operator>=(bitpacked_sequence const & rhs) const noexcept
1103 {
1104 return data >= rhs.data;
1105 }
1107
1115 template <cereal_archive archive_t>
1116 void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive)
1117 {
1118 archive(data);
1119 }
1121};
1122
1123} // namespace seqan3
Provides seqan3::alphabet_proxy.
T begin(T... args)
Adaptions of concepts from the Cereal library.
A CRTP-base that eases the definition of proxy types returned in place of regular alphabets.
Definition: alphabet_proxy.hpp:65
constexpr auto to_rank() const noexcept
Returns the rank.
Definition: alphabet_proxy.hpp:207
Proxy data type returned by seqan3::bitpacked_sequence as reference to element.
Definition: bitpacked_sequence.hpp:81
reference_proxy_type()=delete
Deleted, because using this proxy without a parent would be undefined behaviour.
friend base_t
Befriend the base type so it can call our on_update().
Definition: bitpacked_sequence.hpp:86
constexpr void on_update() noexcept
Update the sdsl-proxy.
Definition: bitpacked_sequence.hpp:92
constexpr reference_proxy_type(reference_proxy_type const &) noexcept=default
Defaulted.
std::ranges::range_reference_t< data_type > internal_proxy
The proxy of the underlying data type.
Definition: bitpacked_sequence.hpp:89
constexpr reference_proxy_type(reference_proxy_type &&) noexcept=default
Defaulted.
A space-optimised version of std::vector that compresses multiple letters into a single byte.
Definition: bitpacked_sequence.hpp:66
void assign(size_type const count, value_type const value)
Assign with count times value.
Definition: bitpacked_sequence.hpp:319
alphabet_type value_type
Equals the alphabet_type.
Definition: bitpacked_sequence.hpp:139
std::ranges::range_difference_t< data_type > difference_type
A signed integer type (usually std::ptrdiff_t)
Definition: bitpacked_sequence.hpp:165
iterator insert(const_iterator pos, begin_iterator_type begin_it, end_iterator_type end_it)
Inserts elements from range [begin_it, end_it) before position in the container.
Definition: bitpacked_sequence.hpp:811
constexpr bitpacked_sequence(bitpacked_sequence &&)=default
Defaulted.
iterator erase(const_iterator pos)
Removes specified elements from the container.
Definition: bitpacked_sequence.hpp:904
constexpr bool operator>(bitpacked_sequence const &rhs) const noexcept
Checks whether *this is greater than rhs.
Definition: bitpacked_sequence.hpp:1084
size_type max_size() const noexcept
Returns the maximum number of elements the container is able to hold due to system or library impleme...
Definition: bitpacked_sequence.hpp:638
bool empty() const noexcept
Checks whether the container is empty.
Definition: bitpacked_sequence.hpp:599
reference operator[](size_type const i) noexcept
Return the i-th element.
Definition: bitpacked_sequence.hpp:493
const_reference back() const noexcept
Return the last element.
Definition: bitpacked_sequence.hpp:556
const_iterator cbegin() const noexcept
Returns an iterator to the first element of the container.
Definition: bitpacked_sequence.hpp:401
iterator erase(const_iterator begin_it, const_iterator end_it)
Removes specified elements from the container.
Definition: bitpacked_sequence.hpp:870
iterator insert(const_iterator pos, value_type const value)
Inserts value before position in the container.
Definition: bitpacked_sequence.hpp:749
~bitpacked_sequence()=default
Defaulted.
void resize(size_type const count, value_type const value)
Resizes the container to contain count elements.
Definition: bitpacked_sequence.hpp:993
void clear() noexcept
Removes all elements from the container.
Definition: bitpacked_sequence.hpp:724
constexpr bool operator<=(bitpacked_sequence const &rhs) const noexcept
Checks whether *this is less than or equal to rhs.
Definition: bitpacked_sequence.hpp:1093
const_iterator end() const noexcept
Returns an iterator to the element following the last element of the container.
Definition: bitpacked_sequence.hpp:427
constexpr bool operator>=(bitpacked_sequence const &rhs) const noexcept
Checks whether *this is greater than or equal to rhs.
Definition: bitpacked_sequence.hpp:1102
constexpr bitpacked_sequence(bitpacked_sequence const &)=default
Defaulted.
void push_back(value_type const value)
Appends the given element value to the end of the container.
Definition: bitpacked_sequence.hpp:926
const_reference at(size_type const i) const
Return the i-th element.
Definition: bitpacked_sequence.hpp:467
constexpr bool operator!=(bitpacked_sequence const &rhs) const noexcept
Checks whether *this is not equal to rhs.
Definition: bitpacked_sequence.hpp:1066
static constexpr size_t bits_per_letter
The number of bits needed to represent a single letter of the alphabet_type.
Definition: bitpacked_sequence.hpp:69
iterator end() noexcept
Returns an iterator to the element following the last element of the container.
Definition: bitpacked_sequence.hpp:421
void assign(std::initializer_list< value_type > ilist)
Assign from std::initializer_list.
Definition: bitpacked_sequence.hpp:364
void reserve(size_type const new_cap)
Increase the capacity to a value that's greater or equal to new_cap.
Definition: bitpacked_sequence.hpp:681
const_iterator begin() const noexcept
Returns an iterator to the first element of the container.
Definition: bitpacked_sequence.hpp:395
bitpacked_sequence(std::initializer_list< value_type > ilist)
Construct from std::initializer_list.
Definition: bitpacked_sequence.hpp:260
void shrink_to_fit()
Requests the removal of unused capacity.
Definition: bitpacked_sequence.hpp:703
const_reference operator[](size_type const i) const noexcept
Return the i-th element.
Definition: bitpacked_sequence.hpp:500
iterator begin() noexcept
Returns an iterator to the first element of the container.
Definition: bitpacked_sequence.hpp:389
std::ranges::range_size_t< data_type > size_type
An unsigned integer type (usually std::size_t)
Definition: bitpacked_sequence.hpp:170
data_type data
The data storage.
Definition: bitpacked_sequence.hpp:77
size_type capacity() const noexcept
Returns the number of elements that the container has currently allocated space for.
Definition: bitpacked_sequence.hpp:656
void assign(other_range_t &&range)
Assign from a different range.
Definition: bitpacked_sequence.hpp:298
alphabet_type const_reference
Equals the alphabet_type / value_type.
Definition: bitpacked_sequence.hpp:150
iterator insert(const_iterator pos, std::initializer_list< value_type > const &ilist)
Inserts elements from initializer list before position in the container.
Definition: bitpacked_sequence.hpp:844
reference at(size_type const i)
Return the i-th element.
Definition: bitpacked_sequence.hpp:457
bitpacked_sequence(begin_iterator_type begin_it, end_iterator_type end_it)
Construct from pair of iterators.
Definition: bitpacked_sequence.hpp:240
friend constexpr void swap(bitpacked_sequence &lhs, bitpacked_sequence &rhs) noexcept
Swap contents with another instance.
Definition: bitpacked_sequence.hpp:1037
const_reference front() const noexcept
Return the first element. Calling front on an empty container is undefined.
Definition: bitpacked_sequence.hpp:528
constexpr bitpacked_sequence & operator=(bitpacked_sequence const &)=default
Defaulted.
constexpr data_type const & raw_data() const noexcept
Provides direct, unsafe access to underlying data structures.
Definition: bitpacked_sequence.hpp:577
constexpr bool operator==(bitpacked_sequence const &rhs) const noexcept
Checks whether *this is equal to rhs.
Definition: bitpacked_sequence.hpp:1057
friend constexpr void swap(bitpacked_sequence &&lhs, bitpacked_sequence &&rhs) noexcept
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: bitpacked_sequence.hpp:1043
sdsl::int_vector< bits_per_letter > data_type
Type of the underlying SDSL vector.
Definition: bitpacked_sequence.hpp:74
const_iterator cend() const noexcept
Returns an iterator to the element following the last element of the container.
Definition: bitpacked_sequence.hpp:433
bitpacked_sequence(size_type const count, value_type const value)
Construct with count times value.
Definition: bitpacked_sequence.hpp:219
bitpacked_sequence & operator=(std::initializer_list< value_type > ilist)
Assign from std::initializer_list.
Definition: bitpacked_sequence.hpp:276
void resize(size_type const count)
Resizes the container to contain count elements.
Definition: bitpacked_sequence.hpp:983
void pop_back()
Removes the last element of the container.
Definition: bitpacked_sequence.hpp:949
reference back() noexcept
Return the last element.
Definition: bitpacked_sequence.hpp:549
iterator insert(const_iterator pos, size_type const count, value_type const value)
Inserts count copies of value before position in the container.
Definition: bitpacked_sequence.hpp:775
bitpacked_sequence()=default
Defaulted.
constexpr void swap(bitpacked_sequence &&rhs) noexcept
Swap contents with another instance.
Definition: bitpacked_sequence.hpp:1018
bitpacked_sequence(other_range_t &&range)
Construct from a different range.
Definition: bitpacked_sequence.hpp:201
constexpr data_type & raw_data() noexcept
Provides direct, unsafe access to underlying data structures.
Definition: bitpacked_sequence.hpp:571
constexpr bool operator<(bitpacked_sequence const &rhs) const noexcept
Checks whether *this is less than rhs.
Definition: bitpacked_sequence.hpp:1075
constexpr void swap(bitpacked_sequence &rhs) noexcept
Swap contents with another instance.
Definition: bitpacked_sequence.hpp:1012
size_type size() const noexcept
Returns the number of elements in the container, i.e. std::distance(begin(), end()).
Definition: bitpacked_sequence.hpp:617
reference front() noexcept
Return the first element. Calling front on an empty container is undefined.
Definition: bitpacked_sequence.hpp:521
constexpr bitpacked_sequence & operator=(bitpacked_sequence &&)=default
Defaulted.
void assign(begin_iterator_type begin_it, end_iterator_type end_it)
Assign from pair of iterators.
Definition: bitpacked_sequence.hpp:343
A generic random access iterator that delegates most operations to the range.
Definition: random_access_iterator.hpp:294
T distance(T... args)
T end(T... args)
auto const to_rank
A view that calls seqan3::to_rank() on each element in the input range.
Definition: to_rank.hpp:66
decltype(seqan3::to_rank(std::declval< semi_alphabet_type >())) alphabet_rank_t
The rank_type of the semi-alphabet; defined as the return type of seqan3::to_rank....
Definition: alphabet/concept.hpp:169
constexpr auto assign_rank_to
Assign a rank to an alphabet object.
Definition: alphabet/concept.hpp:293
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: alphabet/concept.hpp:155
constexpr ptrdiff_t count
Count the occurrences of a type in a pack.
Definition: type_pack/traits.hpp:164
constexpr unsigned_t ceil_log2(unsigned_t const n) noexcept
Computes the ceil of the logarithm to the base of two for unsigned integers.
Definition: math.hpp:88
Refines seqan3::alphabet and adds assignability.
Provides math related functionality.
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
SeqAn specific customisations in the standard namespace.
#define CEREAL_SERIALIZE_FUNCTION_NAME
Macro for Cereal's serialize function.
Definition: platform.hpp:163
Provides the seqan3::detail::random_access_iterator class.
T swap(T... args)
Provides seqan3::views::to_char.
Provides seqan3::views::to_rank.
Provides seqan3::views::convert.