SeqAn3 3.3.0-rc.1
The Modern C++ library for sequence analysis.
interleaved_bloom_filter.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <algorithm>
16#include <bit>
17
18#include <sdsl/bit_vectors.hpp>
19
22
23namespace seqan3
24{
27enum data_layout : bool
28{
31};
32
35struct bin_count : public detail::strong_type<size_t, bin_count, detail::strong_type_skill::convert>
36{
37 using detail::strong_type<size_t, bin_count, detail::strong_type_skill::convert>::strong_type;
38};
39
42struct bin_size : public detail::strong_type<size_t, bin_size, detail::strong_type_skill::convert>
43{
44 using detail::strong_type<size_t, bin_size, detail::strong_type_skill::convert>::strong_type;
45};
46
49struct hash_function_count : public detail::strong_type<size_t, hash_function_count, detail::strong_type_skill::convert>
50{
51 using detail::strong_type<size_t, hash_function_count, detail::strong_type_skill::convert>::strong_type;
52};
53
56struct bin_index : public detail::strong_type<size_t, bin_index, detail::strong_type_skill::convert>
57{
58 using detail::strong_type<size_t, bin_index, detail::strong_type_skill::convert>::strong_type;
59};
60
132template <data_layout data_layout_mode_ = data_layout::uncompressed>
134{
135private:
137 template <data_layout data_layout_mode>
138 friend class interleaved_bloom_filter;
140
142 using data_type =
144
146 size_t bins{};
150 size_t bin_size_{};
152 size_t hash_shift{};
154 size_t bin_words{};
156 size_t hash_funs{};
160 static constexpr std::array<size_t, 5> hash_seeds{13572355802537770549ULL, // 2**64 / (e/2)
161 13043817825332782213ULL, // 2**64 / sqrt(2)
162 10650232656628343401ULL, // 2**64 / sqrt(3)
163 16499269484942379435ULL, // 2**64 / (sqrt(5)/2)
164 4893150838803335377ULL}; // 2**64 / (3*pi/5)
165
173 inline constexpr size_t hash_and_fit(size_t h, size_t const seed) const
174 {
175 h *= seed;
176 assert(hash_shift < 64);
177 h ^= h >> hash_shift; // XOR and shift higher bits into lower bits
178 h *= 11400714819323198485ULL; // = 2^64 / golden_ration, to expand h to 64 bit range
179 // Use fastrange (integer modulo without division) if possible.
180#ifdef __SIZEOF_INT128__
181 h = static_cast<uint64_t>((static_cast<__uint128_t>(h) * static_cast<__uint128_t>(bin_size_)) >> 64);
182#else
183 h %= bin_size_;
184#endif
185 h *= technical_bins;
186 return h;
187 }
188
189public:
191 static constexpr data_layout data_layout_mode = data_layout_mode_;
192
193 class membership_agent_type; // documented upon definition below
194
195 template <std::integral value_t>
196 class counting_agent_type; // documented upon definition below
197
207
225 {
226 bins = bins_.get();
227 bin_size_ = size.get();
228 hash_funs = funs.get();
229
230 if (bins == 0)
231 throw std::logic_error{"The number of bins must be > 0."};
232 if (hash_funs == 0 || hash_funs > 5)
233 throw std::logic_error{"The number of hash functions must be > 0 and <= 5."};
234 if (bin_size_ == 0)
235 throw std::logic_error{"The size of a bin must be > 0."};
236
238 bin_words = (bins + 63) >> 6; // = ceil(bins/64)
239 technical_bins = bin_words << 6; // = bin_words * 64
240 data = sdsl::bit_vector(technical_bins * bin_size_);
241 }
242
253 {
255 std::tie(ibf.bins, ibf.technical_bins, ibf.bin_size_, ibf.hash_shift, ibf.bin_words, ibf.hash_funs);
256
257 data = sdsl::bit_vector{ibf.data.begin(), ibf.data.end()};
258 }
259
273 {
275 std::tie(ibf.bins, ibf.technical_bins, ibf.bin_size_, ibf.hash_shift, ibf.bin_words, ibf.hash_funs);
276
277 data = sdsl::sd_vector<>{ibf.data};
278 }
280
296 void emplace(size_t const value, bin_index const bin) noexcept
298 {
299 assert(bin.get() < bins);
300 for (size_t i = 0; i < hash_funs; ++i)
301 {
302 size_t idx = hash_and_fit(value, hash_seeds[i]);
303 idx += bin.get();
304 assert(idx < data.size());
305 data[idx] = 1;
306 };
307 }
308
320 void clear(bin_index const bin) noexcept
322 {
323 assert(bin.get() < bins);
324 for (size_t idx = bin.get(), i = 0; i < bin_size_; idx += technical_bins, ++i)
325 data[idx] = 0;
326 }
327
341 template <typename rng_t>
343 void clear(rng_t && bin_range) noexcept
344 {
345 static_assert(std::ranges::forward_range<rng_t>, "The range of bins to clear must model a forward_range.");
346 static_assert(std::same_as<std::remove_cvref_t<std::ranges::range_reference_t<rng_t>>, bin_index>,
347 "The reference type of the range to clear must be seqan3::bin_index.");
348#ifndef NDEBUG
349 for (auto && bin : bin_range)
350 assert(bin.get() < bins);
351#endif // NDEBUG
352
353 for (size_t offset = 0, i = 0; i < bin_size_; offset += technical_bins, ++i)
354 for (auto && bin : bin_range)
355 data[bin.get() + offset] = 0;
356 }
357
381 void increase_bin_number_to(bin_count const new_bins_)
383 {
384 size_t new_bins = new_bins_.get();
385
386 if (new_bins < bins)
387 throw std::invalid_argument{"The number of new bins must be >= the current number of bins."};
388
389 // Equivalent to ceil(new_bins / 64)
390 size_t new_bin_words = (new_bins + 63) >> 6;
391
392 bins = new_bins;
393
394 if (new_bin_words == bin_words) // No need for internal resize if bin_words does not change.
395 return;
396
397 size_t new_technical_bins = new_bin_words << 6;
398 size_t new_bits = bin_size_ * new_technical_bins;
399
400 size_t idx_{new_bits}, idx{data.size()};
401 size_t delta = new_technical_bins - technical_bins + 64;
402
403 data.resize(new_bits);
404
405 for (size_t i = idx_, j = idx; j > 0; i -= new_technical_bins, j -= technical_bins)
406 {
407 size_t stop = i - new_technical_bins;
408
409 for (size_t ii = i - delta, jj = j - 64; stop && ii >= stop; ii -= 64, jj -= 64)
410 {
411 uint64_t old = data.get_int(jj);
412 data.set_int(jj, 0);
413 data.set_int(ii, old);
414 }
415 }
416
417 bin_words = new_bin_words;
418 technical_bins = new_technical_bins;
419 }
421
437 {
438 return membership_agent_type{*this};
439 }
440
452 template <typename value_t = uint16_t>
454 {
455 return counting_agent_type<value_t>{*this};
456 }
458
465 size_t hash_function_count() const noexcept
466 {
467 return hash_funs;
468 }
469
473 size_t bin_count() const noexcept
474 {
475 return bins;
476 }
477
481 size_t bin_size() const noexcept
482 {
483 return bin_size_;
484 }
485
489 size_t bit_size() const noexcept
490 {
491 return data.size();
492 }
494
503 friend bool operator==(interleaved_bloom_filter const & lhs, interleaved_bloom_filter const & rhs) noexcept
504 {
505 return std::tie(lhs.bins,
506 lhs.technical_bins,
507 lhs.bin_size_,
508 lhs.hash_shift,
509 lhs.bin_words,
510 lhs.hash_funs,
511 lhs.data)
512 == std::tie(rhs.bins,
513 rhs.technical_bins,
514 rhs.bin_size_,
515 rhs.hash_shift,
516 rhs.bin_words,
517 rhs.hash_funs,
518 rhs.data);
519 }
520
526 friend bool operator!=(interleaved_bloom_filter const & lhs, interleaved_bloom_filter const & rhs) noexcept
527 {
528 return !(lhs == rhs);
529 }
531
542 constexpr data_type & raw_data() noexcept
543 {
544 return data;
545 }
546
548 constexpr data_type const & raw_data() const noexcept
549 {
550 return data;
551 }
553
561 template <cereal_archive archive_t>
562 void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive)
563 {
564 archive(bins);
565 archive(technical_bins);
566 archive(bin_size_);
567 archive(hash_shift);
568 archive(bin_words);
569 archive(hash_funs);
570 archive(data);
571 }
573};
574
585template <data_layout data_layout_mode>
587{
588private:
591
593 ibf_t const * ibf_ptr{nullptr};
594
595public:
596 class binning_bitvector;
597
607
612 explicit membership_agent_type(ibf_t const & ibf) : ibf_ptr(std::addressof(ibf)), result_buffer(ibf.bin_count())
613 {}
615
618
639 [[nodiscard]] binning_bitvector const & bulk_contains(size_t const value) & noexcept
640 {
641 assert(ibf_ptr != nullptr);
642 assert(result_buffer.size() == ibf_ptr->bin_count());
643
644 std::array<size_t, 5> bloom_filter_indices;
645 std::memcpy(&bloom_filter_indices, &ibf_ptr->hash_seeds, sizeof(size_t) * ibf_ptr->hash_funs);
646
647 for (size_t i = 0; i < ibf_ptr->hash_funs; ++i)
648 bloom_filter_indices[i] = ibf_ptr->hash_and_fit(value, bloom_filter_indices[i]);
649
650 for (size_t batch = 0; batch < ibf_ptr->bin_words; ++batch)
651 {
652 size_t tmp{-1ULL};
653 for (size_t i = 0; i < ibf_ptr->hash_funs; ++i)
654 {
655 assert(bloom_filter_indices[i] < ibf_ptr->data.size());
656 tmp &= ibf_ptr->data.get_int(bloom_filter_indices[i]);
657 bloom_filter_indices[i] += 64;
658 }
659
660 result_buffer.data.set_int(batch << 6, tmp);
661 }
662
663 return result_buffer;
664 }
665
666 // `bulk_contains` cannot be called on a temporary, since the object the returned reference points to
667 // is immediately destroyed.
668 [[nodiscard]] binning_bitvector const & bulk_contains(size_t const value) && noexcept = delete;
670};
671
673template <data_layout data_layout_mode>
675{
676private:
678 using data_type = sdsl::bit_vector;
681
682 friend class membership_agent_type;
683
684public:
688 binning_bitvector() = default;
693 ~binning_bitvector() = default;
694
696 explicit binning_bitvector(size_t const size) : data(size)
697 {}
699
701 size_t size() const noexcept
702 {
703 return data.size();
704 }
705
710 auto begin() noexcept
711 {
712 return data.begin();
713 }
714
716 auto begin() const noexcept
717 {
718 return data.begin();
719 }
720
722 auto end() noexcept
723 {
724 return data.end();
725 }
726
728 auto end() const noexcept
729 {
730 return data.end();
731 }
733
738 friend bool operator==(binning_bitvector const & lhs, binning_bitvector const & rhs) noexcept
739 {
740 return lhs.data == rhs.data;
741 }
742
744 friend bool operator!=(binning_bitvector const & lhs, binning_bitvector const & rhs) noexcept
745 {
746 return !(lhs == rhs);
747 }
749
754 auto operator[](size_t const i) noexcept
755 {
756 assert(i < size());
757 return data[i];
758 }
759
761 auto operator[](size_t const i) const noexcept
762 {
763 assert(i < size());
764 return data[i];
765 }
766
774 constexpr data_type & raw_data() noexcept
775 {
776 return data;
777 }
778
780 constexpr data_type const & raw_data() const noexcept
781 {
782 return data;
783 }
785};
786
810template <std::integral value_t>
811class counting_vector : public std::vector<value_t>
812{
813private:
816
818 template <typename binning_bitvector_t>
819 static constexpr bool is_binning_bitvector =
820 std::same_as<binning_bitvector_t,
822 || std::same_as<binning_bitvector_t,
824
825public:
829 counting_vector() = default;
830 counting_vector(counting_vector const &) = default;
834 ~counting_vector() = default;
835
836 using base_t::base_t;
838
851 template <typename binning_bitvector_t>
852 requires is_binning_bitvector<binning_bitvector_t>
853 counting_vector & operator+=(binning_bitvector_t const & binning_bitvector)
854 {
855 for_each_set_bin(binning_bitvector,
856 [this](size_t const bin)
857 {
858 ++(*this)[bin];
859 });
860 return *this;
861 }
862
870 template <typename binning_bitvector_t>
871 requires is_binning_bitvector<binning_bitvector_t>
872 counting_vector & operator-=(binning_bitvector_t const & binning_bitvector)
873 {
874 for_each_set_bin(binning_bitvector,
875 [this](size_t const bin)
876 {
877 assert((*this)[bin] > 0);
878 --(*this)[bin];
879 });
880 return *this;
881 }
882
894 {
895 assert(this->size() >= rhs.size()); // The counting vector may be bigger than what we need.
896
897 std::transform(this->begin(), this->end(), rhs.begin(), this->begin(), std::plus<value_t>());
898
899 return *this;
900 }
901
907 {
908 assert(this->size() >= rhs.size()); // The counting vector may be bigger than what we need.
909
910 std::transform(this->begin(),
911 this->end(),
912 rhs.begin(),
913 this->begin(),
914 [](auto a, auto b)
915 {
916 assert(a >= b);
917 return a - b;
918 });
919
920 return *this;
921 }
922
923private:
925 template <typename binning_bitvector_t, typename on_bin_fn_t>
926 void for_each_set_bin(binning_bitvector_t && binning_bitvector, on_bin_fn_t && on_bin_fn)
927 {
928 assert(this->size() >= binning_bitvector.size()); // The counting vector may be bigger than what we need.
929
930 // Jump to the next 1 and return the number of places jumped in the bit_sequence
931 auto jump_to_next_1bit = [](size_t & x)
932 {
933 auto const zeros = std::countr_zero(x);
934 x >>= zeros; // skip number of zeros
935 return zeros;
936 };
937
938 // Each iteration can handle 64 bits
939 for (size_t bit_pos = 0; bit_pos < binning_bitvector.size(); bit_pos += 64)
940 {
941 // get 64 bits starting at position `bit_pos`
942 size_t bit_sequence = binning_bitvector.raw_data().get_int(bit_pos);
943
944 // process each relative bin inside the bit_sequence
945 for (size_t bin = bit_pos; bit_sequence != 0u; ++bin, bit_sequence >>= 1)
946 {
947 // Jump to the next 1 and
948 bin += jump_to_next_1bit(bit_sequence);
949
950 on_bin_fn(bin);
951 }
952 }
953 }
954};
955
965template <data_layout data_layout_mode>
966template <std::integral value_t>
968{
969private:
972
974 ibf_t const * ibf_ptr{nullptr};
975
978
979public:
989
994 explicit counting_agent_type(ibf_t const & ibf) :
995 ibf_ptr(std::addressof(ibf)),
996 membership_agent(ibf),
997 result_buffer(ibf.bin_count())
998 {}
1000
1003
1026 template <std::ranges::range value_range_t>
1027 [[nodiscard]] counting_vector<value_t> const & bulk_count(value_range_t && values) & noexcept
1028 {
1029 assert(ibf_ptr != nullptr);
1030 assert(result_buffer.size() == ibf_ptr->bin_count());
1031
1032 static_assert(std::ranges::input_range<value_range_t>, "The values must model input_range.");
1033 static_assert(std::unsigned_integral<std::ranges::range_value_t<value_range_t>>,
1034 "An individual value must be an unsigned integral.");
1035
1036 std::ranges::fill(result_buffer, 0);
1037
1038 for (auto && value : values)
1039 result_buffer += membership_agent.bulk_contains(value);
1040
1041 return result_buffer;
1042 }
1043
1044 // `bulk_count` cannot be called on a temporary, since the object the returned reference points to
1045 // is immediately destroyed.
1046 template <std::ranges::range value_range_t>
1047 [[nodiscard]] counting_vector<value_t> const & bulk_count(value_range_t && values) && noexcept = delete;
1049};
1050
1051} // namespace seqan3
Adaptions of concepts from the Cereal library.
A data structure that behaves like a std::vector and can be used to consolidate the results of multip...
Definition: interleaved_bloom_filter.hpp:812
static constexpr bool is_binning_bitvector
Is binning_bitvector_t a seqan3::interleaved_bloom_filter::membership_agent_type::binning_bitvector?
Definition: interleaved_bloom_filter.hpp:819
counting_vector & operator+=(counting_vector const &rhs)
Bin-wise addition of two seqan3::counting_vectors.
Definition: interleaved_bloom_filter.hpp:893
void for_each_set_bin(binning_bitvector_t &&binning_bitvector, on_bin_fn_t &&on_bin_fn)
Enumerates all bins of a seqan3::interleaved_bloom_filter::membership_agent_type::binning_bitvector.
Definition: interleaved_bloom_filter.hpp:926
counting_vector & operator=(counting_vector const &)=default
Defaulted.
counting_vector & operator=(counting_vector &&)=default
Defaulted.
counting_vector & operator+=(binning_bitvector_t const &binning_bitvector)
Bin-wise adds the bits of a seqan3::interleaved_bloom_filter::membership_agent_type::binning_bitvecto...
Definition: interleaved_bloom_filter.hpp:853
counting_vector(counting_vector const &)=default
Defaulted.
~counting_vector()=default
Defaulted.
counting_vector(counting_vector &&)=default
Defaulted.
counting_vector & operator-=(binning_bitvector_t const &binning_bitvector)
Bin-wise subtracts the bits of a seqan3::interleaved_bloom_filter::membership_agent_type::binning_bit...
Definition: interleaved_bloom_filter.hpp:872
counting_vector()=default
Defaulted.
counting_vector & operator-=(counting_vector const &rhs)
Bin-wise substraction of two seqan3::counting_vectors.
Definition: interleaved_bloom_filter.hpp:906
CRTP base class to declare a strong typedef for a regular type to avoid ambiguous parameter settings ...
Definition: strong_type.hpp:177
constexpr value_t & get() &noexcept
Returns the underlying value.
Definition: strong_type.hpp:204
Manages counting ranges of values for the seqan3::interleaved_bloom_filter.
Definition: interleaved_bloom_filter.hpp:968
counting_vector< value_t > const & bulk_count(value_range_t &&values) &noexcept
Counts the occurrences in each bin for all values in a range.
Definition: interleaved_bloom_filter.hpp:1027
membership_agent_type membership_agent
Store a seqan3::interleaved_bloom_filter::membership_agent to call bulk_contains.
Definition: interleaved_bloom_filter.hpp:977
counting_agent_type(counting_agent_type &&)=default
Defaulted.
counting_agent_type(counting_agent_type const &)=default
Defaulted.
counting_vector< value_t > const & bulk_count(value_range_t &&values) &&noexcept=delete
Counts the occurrences in each bin for all values in a range.
counting_agent_type & operator=(counting_agent_type const &)=default
Defaulted.
counting_vector< value_t > result_buffer
Stores the result of bulk_count().
Definition: interleaved_bloom_filter.hpp:1002
counting_agent_type(ibf_t const &ibf)
Construct a counting_agent_type for an existing seqan3::interleaved_bloom_filter.
Definition: interleaved_bloom_filter.hpp:994
counting_agent_type & operator=(counting_agent_type &&)=default
Defaulted.
A bitvector representing the result of a call to bulk_contains of the seqan3::interleaved_bloom_filte...
Definition: interleaved_bloom_filter.hpp:675
constexpr data_type & raw_data() noexcept
Provides direct, unsafe access to the underlying data structure.
Definition: interleaved_bloom_filter.hpp:774
auto end() noexcept
Returns an iterator to the element following the last element of the container.
Definition: interleaved_bloom_filter.hpp:722
constexpr data_type const & raw_data() const noexcept
Provides direct, unsafe access to the underlying data structure.
Definition: interleaved_bloom_filter.hpp:780
binning_bitvector(binning_bitvector const &)=default
Defaulted.
auto end() const noexcept
Returns an iterator to the element following the last element of the container.
Definition: interleaved_bloom_filter.hpp:728
data_type data
The bitvector.
Definition: interleaved_bloom_filter.hpp:680
auto operator[](size_t const i) const noexcept
Return the i-th element.
Definition: interleaved_bloom_filter.hpp:761
binning_bitvector(size_t const size)
Construct with given size.
Definition: interleaved_bloom_filter.hpp:696
size_t size() const noexcept
Returns the number of elements.
Definition: interleaved_bloom_filter.hpp:701
binning_bitvector & operator=(binning_bitvector &&)=default
Defaulted.
auto begin() noexcept
Returns an iterator to the first element of the container.
Definition: interleaved_bloom_filter.hpp:710
auto begin() const noexcept
Returns an iterator to the first element of the container.
Definition: interleaved_bloom_filter.hpp:716
auto operator[](size_t const i) noexcept
Return the i-th element.
Definition: interleaved_bloom_filter.hpp:754
binning_bitvector & operator=(binning_bitvector const &)=default
Defaulted.
sdsl::bit_vector data_type
The underlying datatype to use.
Definition: interleaved_bloom_filter.hpp:678
friend bool operator==(binning_bitvector const &lhs, binning_bitvector const &rhs) noexcept
Test for equality.
Definition: interleaved_bloom_filter.hpp:738
friend bool operator!=(binning_bitvector const &lhs, binning_bitvector const &rhs) noexcept
Test for inequality.
Definition: interleaved_bloom_filter.hpp:744
Manages membership queries for the seqan3::interleaved_bloom_filter.
Definition: interleaved_bloom_filter.hpp:587
binning_bitvector const & bulk_contains(size_t const value) &noexcept
Determines set membership of a given value.
Definition: interleaved_bloom_filter.hpp:639
membership_agent_type(ibf_t const &ibf)
Construct a membership_agent_type from a seqan3::interleaved_bloom_filter.
Definition: interleaved_bloom_filter.hpp:612
membership_agent_type & operator=(membership_agent_type const &)=default
Defaulted.
binning_bitvector const & bulk_contains(size_t const value) &&noexcept=delete
Determines set membership of a given value.
membership_agent_type(membership_agent_type &&)=default
Defaulted.
membership_agent_type(membership_agent_type const &)=default
Defaulted.
membership_agent_type & operator=(membership_agent_type &&)=default
Defaulted.
binning_bitvector result_buffer
Stores the result of bulk_contains().
Definition: interleaved_bloom_filter.hpp:617
The IBF binning directory. A data structure that efficiently answers set-membership queries for multi...
Definition: interleaved_bloom_filter.hpp:134
interleaved_bloom_filter(interleaved_bloom_filter< data_layout::uncompressed > const &ibf)
Construct a compressed Interleaved Bloom Filter.
Definition: interleaved_bloom_filter.hpp:271
void emplace(size_t const value, bin_index const bin) noexcept
Inserts a value into a specific bin.
Definition: interleaved_bloom_filter.hpp:296
size_t hash_funs
The number of hash functions.
Definition: interleaved_bloom_filter.hpp:156
size_t bin_words
The number of 64-bit integers needed to store bins many bits (e.g. bins = 50 -> bin_words = 1).
Definition: interleaved_bloom_filter.hpp:154
interleaved_bloom_filter(interleaved_bloom_filter< data_layout::compressed > const &ibf)
Construct an uncompressed Interleaved Bloom Filter from a compressed one.
Definition: interleaved_bloom_filter.hpp:251
size_t bin_size_
The size of each bin in bits.
Definition: interleaved_bloom_filter.hpp:150
interleaved_bloom_filter & operator=(interleaved_bloom_filter const &)=default
Defaulted.
membership_agent_type membership_agent() const
Returns a seqan3::interleaved_bloom_filter::membership_agent_type to be used for lookup.
Definition: interleaved_bloom_filter.hpp:436
size_t hash_function_count() const noexcept
Returns the number of hash functions used in the Interleaved Bloom Filter.
Definition: interleaved_bloom_filter.hpp:465
constexpr size_t hash_and_fit(size_t h, size_t const seed) const
Perturbs a value and fits it into the vector.
Definition: interleaved_bloom_filter.hpp:173
constexpr data_type const & raw_data() const noexcept
Provides direct, unsafe access to the underlying data structure.
Definition: interleaved_bloom_filter.hpp:548
void clear(bin_index const bin) noexcept
Clears a specific bin.
Definition: interleaved_bloom_filter.hpp:320
interleaved_bloom_filter(seqan3::bin_count bins_, seqan3::bin_size size, seqan3::hash_function_count funs=seqan3::hash_function_count{2u})
Construct an uncompressed Interleaved Bloom Filter.
Definition: interleaved_bloom_filter.hpp:221
interleaved_bloom_filter()=default
Defaulted.
size_t hash_shift
The number of bits to shift the hash value before doing multiplicative hashing.
Definition: interleaved_bloom_filter.hpp:152
static constexpr std::array< size_t, 5 > hash_seeds
Precalculated seeds for multiplicative hashing. We use large irrational numbers for a uniform hashing...
Definition: interleaved_bloom_filter.hpp:160
counting_agent_type< value_t > counting_agent() const
Returns a seqan3::interleaved_bloom_filter::counting_agent_type to be used for counting.
Definition: interleaved_bloom_filter.hpp:453
constexpr data_type & raw_data() noexcept
Provides direct, unsafe access to the underlying data structure.
Definition: interleaved_bloom_filter.hpp:542
interleaved_bloom_filter & operator=(interleaved_bloom_filter &&)=default
Defaulted.
friend bool operator!=(interleaved_bloom_filter const &lhs, interleaved_bloom_filter const &rhs) noexcept
Test for inequality.
Definition: interleaved_bloom_filter.hpp:526
interleaved_bloom_filter(interleaved_bloom_filter &&)=default
Defaulted.
void increase_bin_number_to(bin_count const new_bins_)
Increases the number of bins stored in the Interleaved Bloom Filter.
Definition: interleaved_bloom_filter.hpp:381
size_t bin_count() const noexcept
Returns the number of bins that the Interleaved Bloom Filter manages.
Definition: interleaved_bloom_filter.hpp:473
void clear(rng_t &&bin_range) noexcept
Clears a range of bins.
Definition: interleaved_bloom_filter.hpp:343
size_t bin_size() const noexcept
Returns the size of a single bin that the Interleaved Bloom Filter manages.
Definition: interleaved_bloom_filter.hpp:481
size_t bins
The number of bins specified by the user.
Definition: interleaved_bloom_filter.hpp:146
size_t bit_size() const noexcept
Returns the size of the underlying bitvector.
Definition: interleaved_bloom_filter.hpp:489
interleaved_bloom_filter(interleaved_bloom_filter const &)=default
Defaulted.
~interleaved_bloom_filter()=default
Defaulted.
size_t technical_bins
The number of bins stored in the IBF (next multiple of 64 of bins).
Definition: interleaved_bloom_filter.hpp:148
friend bool operator==(interleaved_bloom_filter const &lhs, interleaved_bloom_filter const &rhs) noexcept
Test for equality.
Definition: interleaved_bloom_filter.hpp:503
static constexpr data_layout data_layout_mode
Indicates whether the Interleaved Bloom Filter is compressed.
Definition: interleaved_bloom_filter.hpp:191
data_type data
The bitvector.
Definition: interleaved_bloom_filter.hpp:158
T countl_zero(T... args)
T countr_zero(T... args)
T data(T... args)
T fill(T... args)
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
data_layout
Determines if the Interleaved Bloom Filter is compressed.
Definition: interleaved_bloom_filter.hpp:28
@ uncompressed
The Interleaved Bloom Filter is uncompressed.
Definition: interleaved_bloom_filter.hpp:29
@ compressed
The Interleaved Bloom Filter is compressed.
Definition: interleaved_bloom_filter.hpp:30
constexpr size_t size
The size of a type pack.
Definition: type_pack/traits.hpp:146
T memcpy(T... args)
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
SeqAn specific customisations in the standard namespace.
#define CEREAL_SERIALIZE_FUNCTION_NAME
Macro for Cereal's serialize function.
Definition: platform.hpp:163
Provides basic data structure for strong types.
A strong type that represents the number of bins for the seqan3::interleaved_bloom_filter.
Definition: interleaved_bloom_filter.hpp:36
A strong type that represents the bin index for the seqan3::interleaved_bloom_filter.
Definition: interleaved_bloom_filter.hpp:57
A strong type that represents the number of bits for each bin in the seqan3::interleaved_bloom_filter...
Definition: interleaved_bloom_filter.hpp:43
A strong type that represents the number of hash functions for the seqan3::interleaved_bloom_filter.
Definition: interleaved_bloom_filter.hpp:50
strong_type for seed.
Definition: minimiser_hash.hpp:25
T tie(T... args)
T transform(T... args)