18#include <sdsl/bit_vectors.hpp>
132template <data_layout data_layout_mode_ = data_layout::uncompressed>
137 template <data_layout data_layout_mode>
161 13043817825332782213ULL,
162 10650232656628343401ULL,
163 16499269484942379435ULL,
164 4893150838803335377ULL};
178 h *= 11400714819323198485ULL;
180#ifdef __SIZEOF_INT128__
181 h =
static_cast<uint64_t
>((
static_cast<__uint128_t
>(h) *
static_cast<__uint128_t
>(
bin_size_)) >> 64);
195 template <std::
integral value_t>
233 throw std::logic_error{
"The number of hash functions must be > 0 and <= 5."};
255 std::tie(ibf.bins, ibf.technical_bins, ibf.bin_size_, ibf.hash_shift, ibf.bin_words, ibf.hash_funs);
257 data = sdsl::bit_vector{ibf.data.begin(), ibf.data.end()};
275 std::tie(ibf.bins, ibf.technical_bins, ibf.bin_size_, ibf.hash_shift, ibf.bin_words, ibf.hash_funs);
277 data = sdsl::sd_vector<>{ibf.data};
299 assert(bin.get() <
bins);
304 assert(idx <
data.size());
323 assert(bin.get() <
bins);
341 template <
typename rng_t>
343 void clear(rng_t && bin_range)
noexcept
345 static_assert(std::ranges::forward_range<rng_t>,
"The range of bins to clear must model a forward_range.");
346 static_assert(std::same_as<std::remove_cvref_t<std::ranges::range_reference_t<rng_t>>,
bin_index>,
347 "The reference type of the range to clear must be seqan3::bin_index.");
349 for (
auto && bin : bin_range)
350 assert(bin.get() <
bins);
354 for (
auto && bin : bin_range)
384 size_t new_bins = new_bins_.get();
390 size_t new_bin_words = (new_bins + 63) >> 6;
397 size_t new_technical_bins = new_bin_words << 6;
398 size_t new_bits =
bin_size_ * new_technical_bins;
400 size_t idx_{new_bits}, idx{
data.size()};
403 data.resize(new_bits);
405 for (
size_t i = idx_, j = idx; j > 0; i -= new_technical_bins, j -=
technical_bins)
407 size_t stop = i - new_technical_bins;
409 for (
size_t ii = i - delta, jj = j - 64; stop && ii >= stop; ii -= 64, jj -= 64)
411 uint64_t old =
data.get_int(jj);
413 data.set_int(ii, old);
452 template <
typename value_t = u
int16_t>
528 return !(lhs == rhs);
561 template <cereal_archive archive_t>
585template <data_layout data_layout_mode>
596 class binning_bitvector;
641 assert(ibf_ptr !=
nullptr);
642 assert(result_buffer.
size() == ibf_ptr->bin_count());
645 std::memcpy(&bloom_filter_indices, &ibf_ptr->hash_seeds,
sizeof(
size_t) * ibf_ptr->hash_funs);
647 for (
size_t i = 0; i < ibf_ptr->hash_funs; ++i)
648 bloom_filter_indices[i] = ibf_ptr->hash_and_fit(value, bloom_filter_indices[i]);
650 for (
size_t batch = 0; batch < ibf_ptr->bin_words; ++batch)
653 for (
size_t i = 0; i < ibf_ptr->hash_funs; ++i)
655 assert(bloom_filter_indices[i] < ibf_ptr->
data.size());
656 tmp &= ibf_ptr->data.get_int(bloom_filter_indices[i]);
657 bloom_filter_indices[i] += 64;
660 result_buffer.
data.set_int(batch << 6, tmp);
663 return result_buffer;
673template <data_layout data_layout_mode>
740 return lhs.data == rhs.data;
746 return !(lhs == rhs);
810template <std::
integral value_t>
818 template <
typename binning_bitvector_t>
820 std::same_as<binning_bitvector_t,
822 || std::same_as<binning_bitvector_t,
836 using base_t::base_t;
851 template <
typename binning_bitvector_t>
852 requires is_binning_bitvector<binning_bitvector_t>
856 [
this](
size_t const bin)
870 template <
typename binning_bitvector_t>
871 requires is_binning_bitvector<binning_bitvector_t>
875 [
this](
size_t const bin)
877 assert((*
this)[bin] > 0);
925 template <
typename binning_bitvector_t,
typename on_bin_fn_t>
928 assert(this->
size() >= binning_bitvector.size());
931 auto jump_to_next_1bit = [](
size_t & x)
939 for (
size_t bit_pos = 0; bit_pos < binning_bitvector.size(); bit_pos += 64)
942 size_t bit_sequence = binning_bitvector.raw_data().get_int(bit_pos);
945 for (
size_t bin = bit_pos; bit_sequence != 0u; ++bin, bit_sequence >>= 1)
948 bin += jump_to_next_1bit(bit_sequence);
965template <data_layout data_layout_mode>
966template <std::
integral value_t>
995 ibf_ptr(
std::addressof(ibf)),
1026 template <std::ranges::range value_range_t>
1029 assert(ibf_ptr !=
nullptr);
1030 assert(result_buffer.
size() == ibf_ptr->bin_count());
1032 static_assert(std::ranges::input_range<value_range_t>,
"The values must model input_range.");
1033 static_assert(std::unsigned_integral<std::ranges::range_value_t<value_range_t>>,
1034 "An individual value must be an unsigned integral.");
1038 for (
auto && value : values)
1041 return result_buffer;
1046 template <std::ranges::range value_range_t>
Adaptions of concepts from the Cereal library.
A data structure that behaves like a std::vector and can be used to consolidate the results of multip...
Definition: interleaved_bloom_filter.hpp:812
static constexpr bool is_binning_bitvector
Is binning_bitvector_t a seqan3::interleaved_bloom_filter::membership_agent_type::binning_bitvector?
Definition: interleaved_bloom_filter.hpp:819
counting_vector & operator+=(counting_vector const &rhs)
Bin-wise addition of two seqan3::counting_vectors.
Definition: interleaved_bloom_filter.hpp:893
void for_each_set_bin(binning_bitvector_t &&binning_bitvector, on_bin_fn_t &&on_bin_fn)
Enumerates all bins of a seqan3::interleaved_bloom_filter::membership_agent_type::binning_bitvector.
Definition: interleaved_bloom_filter.hpp:926
counting_vector & operator=(counting_vector const &)=default
Defaulted.
counting_vector & operator=(counting_vector &&)=default
Defaulted.
counting_vector & operator+=(binning_bitvector_t const &binning_bitvector)
Bin-wise adds the bits of a seqan3::interleaved_bloom_filter::membership_agent_type::binning_bitvecto...
Definition: interleaved_bloom_filter.hpp:853
counting_vector(counting_vector const &)=default
Defaulted.
~counting_vector()=default
Defaulted.
counting_vector(counting_vector &&)=default
Defaulted.
counting_vector & operator-=(binning_bitvector_t const &binning_bitvector)
Bin-wise subtracts the bits of a seqan3::interleaved_bloom_filter::membership_agent_type::binning_bit...
Definition: interleaved_bloom_filter.hpp:872
counting_vector()=default
Defaulted.
counting_vector & operator-=(counting_vector const &rhs)
Bin-wise substraction of two seqan3::counting_vectors.
Definition: interleaved_bloom_filter.hpp:906
CRTP base class to declare a strong typedef for a regular type to avoid ambiguous parameter settings ...
Definition: strong_type.hpp:177
constexpr value_t & get() &noexcept
Returns the underlying value.
Definition: strong_type.hpp:204
constexpr strong_type() noexcept=default
Defaulted.
Manages counting ranges of values for the seqan3::interleaved_bloom_filter.
Definition: interleaved_bloom_filter.hpp:968
counting_vector< value_t > const & bulk_count(value_range_t &&values) &noexcept
Counts the occurrences in each bin for all values in a range.
Definition: interleaved_bloom_filter.hpp:1027
membership_agent_type membership_agent
Store a seqan3::interleaved_bloom_filter::membership_agent to call bulk_contains.
Definition: interleaved_bloom_filter.hpp:977
~counting_agent_type()=default
Defaulted.
counting_agent_type()=default
Defaulted.
counting_agent_type(counting_agent_type &&)=default
Defaulted.
counting_agent_type(counting_agent_type const &)=default
Defaulted.
counting_vector< value_t > const & bulk_count(value_range_t &&values) &&noexcept=delete
Counts the occurrences in each bin for all values in a range.
counting_agent_type & operator=(counting_agent_type const &)=default
Defaulted.
counting_vector< value_t > result_buffer
Stores the result of bulk_count().
Definition: interleaved_bloom_filter.hpp:1002
counting_agent_type(ibf_t const &ibf)
Construct a counting_agent_type for an existing seqan3::interleaved_bloom_filter.
Definition: interleaved_bloom_filter.hpp:994
counting_agent_type & operator=(counting_agent_type &&)=default
Defaulted.
A bitvector representing the result of a call to bulk_contains of the seqan3::interleaved_bloom_filte...
Definition: interleaved_bloom_filter.hpp:675
binning_bitvector(binning_bitvector &&)=default
Defaulted.
constexpr data_type & raw_data() noexcept
Provides direct, unsafe access to the underlying data structure.
Definition: interleaved_bloom_filter.hpp:774
auto end() noexcept
Returns an iterator to the element following the last element of the container.
Definition: interleaved_bloom_filter.hpp:722
constexpr data_type const & raw_data() const noexcept
Provides direct, unsafe access to the underlying data structure.
Definition: interleaved_bloom_filter.hpp:780
~binning_bitvector()=default
Defaulted.
binning_bitvector(binning_bitvector const &)=default
Defaulted.
auto end() const noexcept
Returns an iterator to the element following the last element of the container.
Definition: interleaved_bloom_filter.hpp:728
data_type data
The bitvector.
Definition: interleaved_bloom_filter.hpp:680
auto operator[](size_t const i) const noexcept
Return the i-th element.
Definition: interleaved_bloom_filter.hpp:761
binning_bitvector(size_t const size)
Construct with given size.
Definition: interleaved_bloom_filter.hpp:696
size_t size() const noexcept
Returns the number of elements.
Definition: interleaved_bloom_filter.hpp:701
binning_bitvector & operator=(binning_bitvector &&)=default
Defaulted.
auto begin() noexcept
Returns an iterator to the first element of the container.
Definition: interleaved_bloom_filter.hpp:710
auto begin() const noexcept
Returns an iterator to the first element of the container.
Definition: interleaved_bloom_filter.hpp:716
auto operator[](size_t const i) noexcept
Return the i-th element.
Definition: interleaved_bloom_filter.hpp:754
binning_bitvector & operator=(binning_bitvector const &)=default
Defaulted.
sdsl::bit_vector data_type
The underlying datatype to use.
Definition: interleaved_bloom_filter.hpp:678
binning_bitvector()=default
Defaulted.
friend bool operator==(binning_bitvector const &lhs, binning_bitvector const &rhs) noexcept
Test for equality.
Definition: interleaved_bloom_filter.hpp:738
friend bool operator!=(binning_bitvector const &lhs, binning_bitvector const &rhs) noexcept
Test for inequality.
Definition: interleaved_bloom_filter.hpp:744
Manages membership queries for the seqan3::interleaved_bloom_filter.
Definition: interleaved_bloom_filter.hpp:587
binning_bitvector const & bulk_contains(size_t const value) &noexcept
Determines set membership of a given value.
Definition: interleaved_bloom_filter.hpp:639
~membership_agent_type()=default
Defaulted.
membership_agent_type(ibf_t const &ibf)
Construct a membership_agent_type from a seqan3::interleaved_bloom_filter.
Definition: interleaved_bloom_filter.hpp:612
membership_agent_type & operator=(membership_agent_type const &)=default
Defaulted.
binning_bitvector const & bulk_contains(size_t const value) &&noexcept=delete
Determines set membership of a given value.
membership_agent_type(membership_agent_type &&)=default
Defaulted.
membership_agent_type(membership_agent_type const &)=default
Defaulted.
membership_agent_type & operator=(membership_agent_type &&)=default
Defaulted.
membership_agent_type()=default
Defaulted.
binning_bitvector result_buffer
Stores the result of bulk_contains().
Definition: interleaved_bloom_filter.hpp:617
The IBF binning directory. A data structure that efficiently answers set-membership queries for multi...
Definition: interleaved_bloom_filter.hpp:134
interleaved_bloom_filter(interleaved_bloom_filter< data_layout::uncompressed > const &ibf)
Construct a compressed Interleaved Bloom Filter.
Definition: interleaved_bloom_filter.hpp:271
void emplace(size_t const value, bin_index const bin) noexcept
Inserts a value into a specific bin.
Definition: interleaved_bloom_filter.hpp:296
size_t hash_funs
The number of hash functions.
Definition: interleaved_bloom_filter.hpp:156
size_t bin_words
The number of 64-bit integers needed to store bins many bits (e.g. bins = 50 -> bin_words = 1).
Definition: interleaved_bloom_filter.hpp:154
interleaved_bloom_filter(interleaved_bloom_filter< data_layout::compressed > const &ibf)
Construct an uncompressed Interleaved Bloom Filter from a compressed one.
Definition: interleaved_bloom_filter.hpp:251
size_t bin_size_
The size of each bin in bits.
Definition: interleaved_bloom_filter.hpp:150
interleaved_bloom_filter & operator=(interleaved_bloom_filter const &)=default
Defaulted.
membership_agent_type membership_agent() const
Returns a seqan3::interleaved_bloom_filter::membership_agent_type to be used for lookup.
Definition: interleaved_bloom_filter.hpp:436
size_t hash_function_count() const noexcept
Returns the number of hash functions used in the Interleaved Bloom Filter.
Definition: interleaved_bloom_filter.hpp:465
constexpr size_t hash_and_fit(size_t h, size_t const seed) const
Perturbs a value and fits it into the vector.
Definition: interleaved_bloom_filter.hpp:173
constexpr data_type const & raw_data() const noexcept
Provides direct, unsafe access to the underlying data structure.
Definition: interleaved_bloom_filter.hpp:548
void clear(bin_index const bin) noexcept
Clears a specific bin.
Definition: interleaved_bloom_filter.hpp:320
interleaved_bloom_filter(seqan3::bin_count bins_, seqan3::bin_size size, seqan3::hash_function_count funs=seqan3::hash_function_count{2u})
Construct an uncompressed Interleaved Bloom Filter.
Definition: interleaved_bloom_filter.hpp:221
interleaved_bloom_filter()=default
Defaulted.
size_t hash_shift
The number of bits to shift the hash value before doing multiplicative hashing.
Definition: interleaved_bloom_filter.hpp:152
static constexpr std::array< size_t, 5 > hash_seeds
Precalculated seeds for multiplicative hashing. We use large irrational numbers for a uniform hashing...
Definition: interleaved_bloom_filter.hpp:160
counting_agent_type< value_t > counting_agent() const
Returns a seqan3::interleaved_bloom_filter::counting_agent_type to be used for counting.
Definition: interleaved_bloom_filter.hpp:453
constexpr data_type & raw_data() noexcept
Provides direct, unsafe access to the underlying data structure.
Definition: interleaved_bloom_filter.hpp:542
interleaved_bloom_filter & operator=(interleaved_bloom_filter &&)=default
Defaulted.
friend bool operator!=(interleaved_bloom_filter const &lhs, interleaved_bloom_filter const &rhs) noexcept
Test for inequality.
Definition: interleaved_bloom_filter.hpp:526
interleaved_bloom_filter(interleaved_bloom_filter &&)=default
Defaulted.
void increase_bin_number_to(bin_count const new_bins_)
Increases the number of bins stored in the Interleaved Bloom Filter.
Definition: interleaved_bloom_filter.hpp:381
size_t bin_count() const noexcept
Returns the number of bins that the Interleaved Bloom Filter manages.
Definition: interleaved_bloom_filter.hpp:473
void clear(rng_t &&bin_range) noexcept
Clears a range of bins.
Definition: interleaved_bloom_filter.hpp:343
size_t bin_size() const noexcept
Returns the size of a single bin that the Interleaved Bloom Filter manages.
Definition: interleaved_bloom_filter.hpp:481
size_t bins
The number of bins specified by the user.
Definition: interleaved_bloom_filter.hpp:146
size_t bit_size() const noexcept
Returns the size of the underlying bitvector.
Definition: interleaved_bloom_filter.hpp:489
interleaved_bloom_filter(interleaved_bloom_filter const &)=default
Defaulted.
~interleaved_bloom_filter()=default
Defaulted.
size_t technical_bins
The number of bins stored in the IBF (next multiple of 64 of bins).
Definition: interleaved_bloom_filter.hpp:148
friend bool operator==(interleaved_bloom_filter const &lhs, interleaved_bloom_filter const &rhs) noexcept
Test for equality.
Definition: interleaved_bloom_filter.hpp:503
static constexpr data_layout data_layout_mode
Indicates whether the Interleaved Bloom Filter is compressed.
Definition: interleaved_bloom_filter.hpp:191
data_type data
The bitvector.
Definition: interleaved_bloom_filter.hpp:158
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
data_layout
Determines if the Interleaved Bloom Filter is compressed.
Definition: interleaved_bloom_filter.hpp:28
@ uncompressed
The Interleaved Bloom Filter is uncompressed.
Definition: interleaved_bloom_filter.hpp:29
@ compressed
The Interleaved Bloom Filter is compressed.
Definition: interleaved_bloom_filter.hpp:30
constexpr size_t size
The size of a type pack.
Definition: type_pack/traits.hpp:146
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
SeqAn specific customisations in the standard namespace.
Provides basic data structure for strong types.
A strong type that represents the number of bins for the seqan3::interleaved_bloom_filter.
Definition: interleaved_bloom_filter.hpp:36
A strong type that represents the bin index for the seqan3::interleaved_bloom_filter.
Definition: interleaved_bloom_filter.hpp:57
A strong type that represents the number of bits for each bin in the seqan3::interleaved_bloom_filter...
Definition: interleaved_bloom_filter.hpp:43
A strong type that represents the number of hash functions for the seqan3::interleaved_bloom_filter.
Definition: interleaved_bloom_filter.hpp:50
strong_type for seed.
Definition: minimiser_hash.hpp:25