SeqAn3 3.3.0-rc.1
The Modern C++ library for sequence analysis.
minimiser.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <algorithm>
16#include <deque>
17
23
24namespace seqan3::detail
25{
26// ---------------------------------------------------------------------------------------------------------------------
27// minimiser_view class
28// ---------------------------------------------------------------------------------------------------------------------
29
48template <std::ranges::view urng1_t, std::ranges::view urng2_t = std::ranges::empty_view<seqan3::detail::empty_type>>
49class minimiser_view : public std::ranges::view_interface<minimiser_view<urng1_t, urng2_t>>
50{
51private:
52 static_assert(std::ranges::forward_range<urng1_t>, "The minimiser_view only works on forward_ranges.");
53 static_assert(std::ranges::forward_range<urng2_t>, "The minimiser_view only works on forward_ranges.");
54 static_assert(std::totally_ordered<std::ranges::range_reference_t<urng1_t>>,
55 "The reference type of the underlying range must model std::totally_ordered.");
56
58 using default_urng2_t = std::ranges::empty_view<seqan3::detail::empty_type>;
59
61 static constexpr bool second_range_is_given = !std::same_as<urng2_t, default_urng2_t>;
62
63 static_assert(!second_range_is_given
64 || std::totally_ordered_with<std::ranges::range_reference_t<urng1_t>,
65 std::ranges::range_reference_t<urng2_t>>,
66 "The reference types of the underlying ranges must model std::totally_ordered_with.");
67
69 static constexpr bool const_iterable =
71
73 urng1_t urange1{};
75 urng2_t urange2{};
76
78 size_t window_size{};
79
80 template <bool const_range>
81 class basic_iterator;
82
84 using sentinel = std::default_sentinel_t;
85
86public:
91 requires std::default_initializable<urng1_t> && std::default_initializable<urng2_t>
92 = default;
93 minimiser_view(minimiser_view const & rhs) = default;
94 minimiser_view(minimiser_view && rhs) = default;
95 minimiser_view & operator=(minimiser_view const & rhs) = default;
96 minimiser_view & operator=(minimiser_view && rhs) = default;
97 ~minimiser_view() = default;
98
104 explicit minimiser_view(urng1_t urange1, size_t const window_size) :
106 {}
107
115 template <typename other_urng1_t>
116 requires (std::ranges::viewable_range<other_urng1_t>
117 && std::constructible_from<urng1_t, std::ranges::ref_view<std::remove_reference_t<other_urng1_t>>>)
118 explicit minimiser_view(other_urng1_t && urange1, size_t const window_size) :
119 urange1{std::views::all(std::forward<other_urng1_t>(urange1))},
122 {}
123
131 explicit minimiser_view(urng1_t urange1, urng2_t urange2, size_t const window_size) :
132 urange1{std::move(urange1)},
133 urange2{std::move(urange2)},
135 {
136 if constexpr (second_range_is_given)
137 {
138 if (std::ranges::distance(urange1) != std::ranges::distance(urange2))
139 throw std::invalid_argument{"The two ranges do not have the same size."};
140 }
141 }
142
154 template <typename other_urng1_t, typename other_urng2_t>
155 requires (std::ranges::viewable_range<other_urng1_t>
156 && std::constructible_from<urng1_t, std::views::all_t<other_urng1_t>>
157 && std::ranges::viewable_range<other_urng2_t>
158 && std::constructible_from<urng2_t, std::views::all_t<other_urng2_t>>)
159 explicit minimiser_view(other_urng1_t && urange1, other_urng2_t && urange2, size_t const window_size) :
160 urange1{std::views::all(std::forward<other_urng1_t>(urange1))},
161 urange2{std::views::all(std::forward<other_urng2_t>(urange2))},
163 {
164 if constexpr (second_range_is_given)
165 {
166 if (std::ranges::distance(urange1) != std::ranges::distance(urange2))
167 throw std::invalid_argument{"The two ranges do not have the same size."};
168 }
169 }
171
189 {
191 }
192
195 requires const_iterable
196 {
198 }
199
215 sentinel end() const
216 {
217 return {};
218 }
220};
221
223template <std::ranges::view urng1_t, std::ranges::view urng2_t>
224template <bool const_range>
225class minimiser_view<urng1_t, urng2_t>::basic_iterator
226{
227private:
234
235 template <bool>
236 friend class basic_iterator;
237
238public:
243 using difference_type = std::ranges::range_difference_t<urng1_t>;
245 using value_type = std::ranges::range_value_t<urng1_t>;
247 using pointer = void;
255
259 basic_iterator() = default;
260 basic_iterator(basic_iterator const &) = default;
264 ~basic_iterator() = default;
265
268 requires const_range
269 :
270 minimiser_value{std::move(it.minimiser_value)},
271 urng1_iterator{std::move(it.urng1_iterator)},
272 urng1_sentinel{std::move(it.urng1_sentinel)},
273 urng2_iterator{std::move(it.urng2_iterator)},
274 window_values{std::move(it.window_values)}
275 {}
276
291 urng1_sentinel_t urng1_sentinel,
292 urng2_iterator_t urng2_iterator,
293 size_t window_size) :
294 urng1_iterator{std::move(urng1_iterator)},
295 urng1_sentinel{std::move(urng1_sentinel)},
296 urng2_iterator{std::move(urng2_iterator)}
297 {
298 size_t size = std::ranges::distance(urng1_iterator, urng1_sentinel);
299 window_size = std::min<size_t>(window_size, size);
300
301 window_first(window_size);
302 }
304
308
310 friend bool operator==(basic_iterator const & lhs, basic_iterator const & rhs)
311 {
312 return (lhs.urng1_iterator == rhs.urng1_iterator) && (rhs.urng2_iterator == rhs.urng2_iterator)
313 && (lhs.window_values.size() == rhs.window_values.size());
314 }
315
317 friend bool operator!=(basic_iterator const & lhs, basic_iterator const & rhs)
318 {
319 return !(lhs == rhs);
320 }
321
323 friend bool operator==(basic_iterator const & lhs, sentinel const &)
324 {
325 return lhs.urng1_iterator == lhs.urng1_sentinel;
326 }
327
329 friend bool operator==(sentinel const & lhs, basic_iterator const & rhs)
330 {
331 return rhs == lhs;
332 }
333
335 friend bool operator!=(sentinel const & lhs, basic_iterator const & rhs)
336 {
337 return !(lhs == rhs);
338 }
339
341 friend bool operator!=(basic_iterator const & lhs, sentinel const & rhs)
342 {
343 return !(lhs == rhs);
344 }
346
349 {
350 next_unique_minimiser();
351 return *this;
352 }
353
356 {
357 basic_iterator tmp{*this};
358 next_unique_minimiser();
359 return tmp;
360 }
361
363 value_type operator*() const noexcept
364 {
365 return minimiser_value;
366 }
367
369 constexpr urng1_iterator_t const & base() const & noexcept
370 {
371 return urng1_iterator;
372 }
373
375 constexpr urng1_iterator_t base() &&
376 {
377 return std::move(urng1_iterator);
378 }
379
380private:
382 value_type minimiser_value{};
383
385 size_t minimiser_position_offset{};
386
388 urng1_iterator_t urng1_iterator{};
390 urng1_sentinel_t urng1_sentinel{};
392 urng2_iterator_t urng2_iterator{};
393
395 std::deque<value_type> window_values{};
396
399 {
400 while (!next_minimiser())
401 {}
402 }
403
405 auto window_value() const
406 {
407 if constexpr (!second_range_is_given)
408 return *urng1_iterator;
409 else
410 return std::min(*urng1_iterator, *urng2_iterator);
411 }
412
415 {
416 ++urng1_iterator;
417 if constexpr (second_range_is_given)
418 ++urng2_iterator;
419 }
420
422 void window_first(size_t const window_size)
423 {
424 if (window_size == 0u)
425 return;
426
427 for (size_t i = 0u; i < window_size - 1u; ++i)
428 {
429 window_values.push_back(window_value());
430 advance_window();
431 }
432 window_values.push_back(window_value());
433 auto minimiser_it = std::ranges::min_element(window_values, std::less_equal<value_type>{});
434 minimiser_value = *minimiser_it;
435 minimiser_position_offset = std::distance(std::begin(window_values), minimiser_it);
436 }
437
445 {
446 advance_window();
447 if (urng1_iterator == urng1_sentinel)
448 return true;
449
450 value_type const new_value = window_value();
451
452 window_values.pop_front();
453 window_values.push_back(new_value);
454
455 if (minimiser_position_offset == 0)
456 {
457 auto minimiser_it = std::ranges::min_element(window_values, std::less_equal<value_type>{});
458 minimiser_value = *minimiser_it;
459 minimiser_position_offset = std::distance(std::begin(window_values), minimiser_it);
460 return true;
461 }
462
463 if (new_value < minimiser_value)
464 {
465 minimiser_value = new_value;
466 minimiser_position_offset = window_values.size() - 1;
467 return true;
468 }
469
470 --minimiser_position_offset;
471 return false;
472 }
473};
474
476template <std::ranges::viewable_range rng1_t>
478
480template <std::ranges::viewable_range rng1_t, std::ranges::viewable_range rng2_t>
481minimiser_view(rng1_t &&, rng2_t &&, size_t const window_size)
482 -> minimiser_view<std::views::all_t<rng1_t>, std::views::all_t<rng2_t>>;
483
484// ---------------------------------------------------------------------------------------------------------------------
485// minimiser_fn (adaptor definition)
486// ---------------------------------------------------------------------------------------------------------------------
487
492{
494 constexpr auto operator()(size_t const window_size) const
495 {
496 return adaptor_from_functor{*this, window_size};
497 }
498
507 template <std::ranges::range urng1_t>
508 constexpr auto operator()(urng1_t && urange1, size_t const window_size) const
509 {
510 static_assert(std::ranges::viewable_range<urng1_t>,
511 "The range parameter to views::minimiser cannot be a temporary of a non-view range.");
512 static_assert(std::ranges::forward_range<urng1_t>,
513 "The range parameter to views::minimiser must model std::ranges::forward_range.");
514
515 if (window_size == 1) // Would just return urange1 without any changes
516 throw std::invalid_argument{"The chosen window_size is not valid. "
517 "Please choose a value greater than 1 or use two ranges."};
518
519 return minimiser_view{urange1, window_size};
520 }
521};
523
524} // namespace seqan3::detail
525
526namespace seqan3::views
527{
586inline constexpr auto minimiser = detail::minimiser_fn{};
587
588} // namespace seqan3::views
Provides seqan3::detail::adaptor_from_functor.
T begin(T... args)
Template for range adaptor closure objects that store arguments and wrap a proto-adaptor.
Definition: adaptor_from_functor.hpp:57
Forward declaration.
Definition: single_pass_input.hpp:34
Iterator for calculating minimisers.
Definition: minimiser.hpp:226
basic_iterator operator++(int) noexcept
Post-increment.
Definition: minimiser.hpp:355
urng1_iterator_t urng1_iterator
Iterator to the rightmost value of one window.
Definition: minimiser.hpp:388
maybe_const_sentinel_t< const_range, urng1_t > urng1_sentinel_t
The sentinel type of the first underlying range.
Definition: minimiser.hpp:229
void pointer
The pointer type.
Definition: minimiser.hpp:247
basic_iterator & operator=(basic_iterator &&)=default
Defaulted.
friend bool operator==(basic_iterator const &lhs, basic_iterator const &rhs)
Compare to another basic_iterator.
Definition: minimiser.hpp:310
void next_unique_minimiser()
Increments iterator by 1.
Definition: minimiser.hpp:398
friend bool operator==(sentinel const &lhs, basic_iterator const &rhs)
Compare to the sentinel of the minimiser_view.
Definition: minimiser.hpp:329
friend bool operator!=(basic_iterator const &lhs, sentinel const &rhs)
Compare to the sentinel of the minimiser_view.
Definition: minimiser.hpp:341
urng2_iterator_t urng2_iterator
Iterator to the rightmost value of one window of the second range.
Definition: minimiser.hpp:392
basic_iterator & operator++() noexcept
Pre-increment.
Definition: minimiser.hpp:348
basic_iterator(basic_iterator<!const_range > const &it)
Allow iterator on a const range to be constructible from an iterator over a non-const range.
Definition: minimiser.hpp:267
value_type reference
Reference to value_type.
Definition: minimiser.hpp:249
basic_iterator(basic_iterator const &)=default
Defaulted.
value_type operator*() const noexcept
Return the minimiser.
Definition: minimiser.hpp:363
basic_iterator(basic_iterator &&)=default
Defaulted.
friend bool operator!=(sentinel const &lhs, basic_iterator const &rhs)
Compare to the sentinel of the minimiser_view.
Definition: minimiser.hpp:335
urng1_sentinel_t urng1_sentinel
brief Iterator to last element in range.
Definition: minimiser.hpp:390
constexpr urng1_iterator_t const & base() const &noexcept
Return the underlying iterator. It will point to the last element in the current window.
Definition: minimiser.hpp:369
void window_first(size_t const window_size)
Calculates minimisers for the first window.
Definition: minimiser.hpp:422
std::ranges::range_value_t< urng1_t > value_type
Value type of this iterator.
Definition: minimiser.hpp:245
std::deque< value_type > window_values
Stored values per window. It is necessary to store them, because a shift can remove the current minim...
Definition: minimiser.hpp:395
constexpr urng1_iterator_t base() &&
Return the underlying iterator. It will point to the last element in the current window.
Definition: minimiser.hpp:375
basic_iterator(urng1_iterator_t urng1_iterator, urng1_sentinel_t urng1_sentinel, urng2_iterator_t urng2_iterator, size_t window_size)
Construct from begin and end iterators of a given range over std::totally_ordered values,...
Definition: minimiser.hpp:290
maybe_const_iterator_t< const_range, urng1_t > urng1_iterator_t
The iterator type of the first underlying range.
Definition: minimiser.hpp:231
friend bool operator==(basic_iterator const &lhs, sentinel const &)
Compare to the sentinel of the minimiser_view.
Definition: minimiser.hpp:323
auto window_value() const
Returns new window value.
Definition: minimiser.hpp:405
bool next_minimiser()
Calculates the next minimiser value.
Definition: minimiser.hpp:444
basic_iterator & operator=(basic_iterator const &)=default
Defaulted.
friend bool operator!=(basic_iterator const &lhs, basic_iterator const &rhs)
Compare to another basic_iterator.
Definition: minimiser.hpp:317
maybe_const_iterator_t< const_range, urng2_t > urng2_iterator_t
The iterator type of the second underlying range.
Definition: minimiser.hpp:233
std::ranges::range_difference_t< urng1_t > difference_type
Type for distances between iterators.
Definition: minimiser.hpp:243
void advance_window()
Advances the window to the next position.
Definition: minimiser.hpp:414
The type returned by seqan3::views::minimiser.
Definition: minimiser.hpp:50
size_t window_size
The number of values in one window.
Definition: minimiser.hpp:78
std::default_sentinel_t sentinel
The sentinel type of the minimiser_view.
Definition: minimiser.hpp:84
minimiser_view()=default
Defaulted.
urng2_t urange2
The second underlying range.
Definition: minimiser.hpp:75
sentinel end() const
Returns an iterator to the element following the last element of the range.
Definition: minimiser.hpp:215
static constexpr bool second_range_is_given
Boolean variable, which is true, when second range is not of empty type.
Definition: minimiser.hpp:61
urng1_t urange1
The first underlying range.
Definition: minimiser.hpp:73
minimiser_view(other_urng1_t &&urange1, other_urng2_t &&urange2, size_t const window_size)
Construct from two non-views that can be view-wrapped and a given number of values in one window.
Definition: minimiser.hpp:159
std::ranges::empty_view< seqan3::detail::empty_type > default_urng2_t
The default argument of the second range.
Definition: minimiser.hpp:58
basic_iterator< true > begin() const
Returns an iterator to the first element of the range.
Definition: minimiser.hpp:194
minimiser_view(urng1_t urange1, urng2_t urange2, size_t const window_size)
Construct from two views and a given number of values in one window.
Definition: minimiser.hpp:131
minimiser_view(other_urng1_t &&urange1, size_t const window_size)
Construct from a non-view that can be view-wrapped and a given number of values in one window.
Definition: minimiser.hpp:118
basic_iterator< false > begin()
Returns an iterator to the first element of the range.
Definition: minimiser.hpp:188
static constexpr bool const_iterable
Whether the given ranges are const_iterable.
Definition: minimiser.hpp:69
Provides various transformation traits used by the range module.
T distance(T... args)
Provides seqan3::detail::empty_type.
std::ranges::sentinel_t< maybe_const_range_t< const_v, range_t > > maybe_const_sentinel_t
Returns the const sentinel of range_t if const_range is true; otherwise the non-const sentinel.
Definition: core/range/type_traits.hpp:49
std::ranges::iterator_t< maybe_const_range_t< const_range, range_t > > maybe_const_iterator_t
Returns the const iterator of range_t if const_range is true; otherwise the non-const iterator.
Definition: core/range/type_traits.hpp:44
constexpr auto all
Returns a view that includes all elements of the range argument.
Definition: all_view.hpp:204
constexpr auto minimiser
Computes minimisers for a range of comparable values. A minimiser is the smallest value in a window.
Definition: minimiser.hpp:586
constexpr size_t size
The size of a type pack.
Definition: type_pack/traits.hpp:146
Specifies requirements of an input range type for which the const version of that type satisfies the ...
Provides lazy template instantiation traits.
T min_element(T... args)
T min(T... args)
The internal SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
minimiser_view(rng1_t &&, size_t const window_size) -> minimiser_view< std::views::all_t< rng1_t > >
A deduction guide for the view class template.
The SeqAn namespace for views.
Definition: char_strictly_to.hpp:22
SeqAn specific customisations in the standard namespace.
T size(T... args)
seqan3::views::minimiser's range adaptor object type (non-closure).
Definition: minimiser.hpp:492
constexpr auto operator()(urng1_t &&urange1, size_t const window_size) const
Call the view's constructor with two arguments: the underlying view and an integer indicating how man...
Definition: minimiser.hpp:508
constexpr auto operator()(size_t const window_size) const
Store the number of values in one window and return a range adaptor closure object.
Definition: minimiser.hpp:494
strong_type for the window_size.
Definition: minimiser_hash.hpp:32
Additional non-standard concepts for ranges.