SeqAn3 3.3.0-rc.1
The Modern C++ library for sequence analysis.
debug_matrix.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <iomanip>
16
25
26namespace seqan3::detail
27{
28
60template <matrix matrix_t, typename first_sequence_t = std::nullopt_t, typename second_sequence_t = std::nullopt_t>
62{
63protected:
65 static constexpr bool has_first_sequence = !std::is_same_v<std::decay_t<first_sequence_t>, std::nullopt_t>;
67 static constexpr bool has_second_sequence = !std::is_same_v<std::decay_t<second_sequence_t>, std::nullopt_t>;
71 static constexpr bool is_traceback_matrix = std::is_same_v<std::decay_t<entry_t>, trace_directions>;
74 static constexpr bool is_optional_score = is_type_specialisation_of_v<entry_t, std::optional>;
75
76public:
89
93 debug_matrix() = default;
94 debug_matrix(debug_matrix const &) = default;
95 debug_matrix(debug_matrix &&) = default;
96 debug_matrix & operator=(debug_matrix const &) = default;
98 ~debug_matrix() = default;
99
103 debug_matrix(matrix_t matrix) : debug_matrix(std::forward<matrix_t>(matrix), std::nullopt, std::nullopt)
104 {}
105
111 debug_matrix(matrix_t matrix, first_sequence_t first_sequence, second_sequence_t second_sequence) :
112 _matrix{std::forward<matrix_t>(matrix)},
113 _first_sequence{std::forward<first_sequence_t>(first_sequence)},
114 _second_sequence{std::forward<second_sequence_t>(second_sequence)}
115 {
116 if constexpr (has_first_sequence)
117 {
118 assert(_matrix.cols() <= _first_sequence.size() + 1u);
119 }
120
121 if constexpr (has_second_sequence)
122 {
123 assert(_matrix.rows() <= _second_sequence.size() + 1u);
124 }
125 }
127
129 size_t rows() const noexcept
130 {
131 if (!_transpose)
132 return _rows.value_or(_matrix.rows());
133 else
134 return _cols.value_or(_matrix.cols());
135 }
136
138 size_t cols() const noexcept
139 {
140 if (!_transpose)
141 return _cols.value_or(_matrix.cols());
142 else
143 return _rows.value_or(_matrix.rows());
144 }
145
147 first_sequence_t const & first_sequence() const noexcept
148 {
149 if (!_transpose)
150 return _first_sequence;
151 else
152 return _second_sequence;
153 }
154
156 second_sequence_t const & second_sequence() const noexcept
157 {
158 if (!_transpose)
159 return _second_sequence;
160 else
161 return _first_sequence;
162 }
163
165 const_reference at(matrix_coordinate const & coordinate) const noexcept
166 {
167 size_t row = coordinate.row;
168 size_t col = coordinate.col;
169
170 assert(row < rows() && col < cols());
171
172 row_index_type const _row{!_transpose ? row : col};
173 column_index_type const _col{!_transpose ? col : row};
174 row_index_type const _mask_row{_transpose == _transpose_mask ? row : col};
175 column_index_type const _mask_col{_transpose == _transpose_mask ? col : row};
176
177 if (!_masking_matrix.has_value() || _masking_matrix.value().at({_mask_row, _mask_col}))
178 {
179 entry_t const & entry = _matrix.at({_row, _col});
180
182 return entry;
183
184 if constexpr (is_traceback_matrix)
185 {
186 trace_directions reverse{};
188 reverse |= trace_directions::up;
190 reverse |= trace_directions::left;
193 return reverse;
194 }
195 }
196
197 if constexpr (is_traceback_matrix)
199 else
200 return std::nullopt;
201 }
202
210 {
211 assert(masking_matrix.rows() == rows());
212 assert(masking_matrix.cols() == cols());
214 _masking_matrix = std::move(masking_matrix);
215 return *this;
216 }
217
222 debug_matrix & mask_matrix(std::vector<bool> masking_vector) noexcept
223 {
224 return mask_matrix(row_wise_matrix<bool>{number_rows{rows()}, number_cols{cols()}, std::move(masking_vector)});
225 }
226
232 debug_matrix & sub_matrix(size_t const new_rows, size_t const new_cols) noexcept
233 {
234 assert(new_rows <= rows());
235 assert(new_cols <= cols());
236 if (!_transpose)
237 {
238 _rows = new_rows;
239 _cols = new_cols;
240 }
241 else
242 {
243 _rows = new_cols;
244 _cols = new_rows;
245 }
246 return *this;
247 }
248
253 {
255 return *this;
256 }
257
258protected:
260 struct format_type; // forward declaration
262
263public:
273 template <typename ostream_t>
274 void stream_matrix(ostream_t & cout, fmtflags2 const flags) const noexcept
275 {
276 format_type const & symbols = (flags & fmtflags2::utf8) == fmtflags2::utf8 ? unicode : csv;
277 size_t const column_width =
278 this->column_width.has_value() ? this->column_width.value() : auto_column_width(flags);
279
280 auto char_first_sequence = [&]([[maybe_unused]] size_t const i) -> std::string
281 {
282 if constexpr (!has_first_sequence)
283 return " ";
284 else
285 return as_string(first_sequence()[i], flags);
286 };
287
288 auto char_second_sequence = [&]([[maybe_unused]] size_t const i) -> std::string
289 {
290 if constexpr (!has_second_sequence)
291 return " ";
292 else
293 return as_string(second_sequence()[i], flags);
294 };
295
296 auto print_cell = [&](std::string const & symbol)
297 {
298 // deal with unicode chars that mess up std::setw
299 size_t const length_bytes = symbol.size();
300 size_t const length = unicode_str_length(symbol);
301 size_t const offset = length_bytes - length;
302
303 cout << std::left << std::setw(column_width + offset) << symbol << symbols.col_sep;
304 };
305
306 auto print_first_cell = [&](std::string const & symbol)
307 {
308 cout << symbol << symbols.col_sep;
309 };
310
311 // |_|d|a|t|a|b|a|s|e|
312 auto print_first_row = [&]
313 {
314 print_first_cell(" ");
315 print_cell(symbols.epsilon);
316
317 for (size_t col = 0; col < cols() - 1; ++col)
318 print_cell(char_first_sequence(col));
319
320 cout << "\n";
321 };
322
323 // |-|-|-|-|-|-|-|-|-|
324 auto print_divider = [&]
325 {
326 cout << " " << symbols.row_col_sep;
327 for (size_t col = 0; col < cols(); ++col)
328 {
329 for (size_t i = 0; i < column_width; ++i)
330 cout << symbols.row_sep;
331
332 cout << symbols.row_col_sep;
333 }
334 cout << "\n";
335 };
336
337 print_first_row();
338 for (size_t row = 0; row < rows(); ++row)
339 {
340 if (symbols.row_sep[0] != '\0')
341 print_divider();
342
343 // one query letter + one row of scores / traces
344 if (row == 0)
345 print_first_cell(symbols.epsilon);
346 else
347 print_first_cell(char_second_sequence(row - 1));
348
349 for (size_t col = 0; col < cols(); ++col)
350 print_cell(entry_at({row_index_type{row}, column_index_type{col}}, flags));
351
352 cout << "\n";
353 }
354 }
355
357 size_t auto_column_width(fmtflags2 const flags) const noexcept
358 {
359 size_t col_width = 1;
360 for (size_t row = 0; row < rows(); ++row)
361 for (size_t col = 0; col < cols(); ++col)
362 col_width =
363 std::max(col_width,
365
366 return col_width;
367 }
368
369protected:
371 std::string entry_at(matrix_coordinate const coordinate, fmtflags2 flags) const noexcept
372 {
373 format_type const & symbols = (flags & fmtflags2::utf8) == fmtflags2::utf8 ? unicode : csv;
374
375 value_type const & entry = at(coordinate);
376 if (!is_traceback_matrix && entry == matrix_inf<value_type>)
377 return symbols.inf;
378
379 return as_string(entry, flags);
380 }
381
383 template <typename value_type>
384 static std::string as_string(value_type && entry, fmtflags2 const flags) noexcept
385 {
386 std::stringstream strstream;
387 debug_stream_type stream{strstream};
388 stream << flags << entry;
389 return strstream.str();
390 }
391
394 static size_t unicode_str_length(std::string const & str) noexcept
395 {
396 size_t length = 0u;
397 for (auto it = str.cbegin(); it < str.cend(); ++it, ++length)
398 {
399 uint8_t v = *it;
400 if ((v & 0b11100000) == 0b11000000)
401 ++it;
402 else if ((v & 0b11110000) == 0b11100000)
403 it += 2;
404 else if ((v & 0b11111000) == 0b11110000)
405 it += 3;
406 }
407 return length;
408 }
409
412 {
414 char const * epsilon{};
416 char const * col_sep{};
418 char const * row_sep{};
420 char const * row_col_sep{};
422 char const * inf{};
423 };
424
426 static constexpr format_type csv{" ", ";", "", "", ""};
428 static constexpr format_type unicode{"ε", "║", "═", "╬", "∞"};
429
430public:
433
434protected:
436 matrix_t _matrix;
438 first_sequence_t _first_sequence;
440 second_sequence_t _second_sequence;
451};
452
458template <matrix matrix_t>
460
463template <matrix matrix_t, typename first_sequence_t, typename second_sequence_t>
464debug_matrix(matrix_t &&, first_sequence_t &&, second_sequence_t &&)
467
468} // namespace seqan3::detail
469
470namespace seqan3
471{
482template <typename char_t, detail::matrix alignment_matrix_t>
483inline debug_stream_type<char_t> & operator<<(debug_stream_type<char_t> & s, alignment_matrix_t && matrix)
484{
485 detail::debug_matrix debug{std::forward<alignment_matrix_t>(matrix)};
486
487 std::stringstream sstream{};
488 debug.stream_matrix(sstream, s.flags2());
489 s << sstream.str();
490 return s;
491}
492
494template <typename char_t, std::ranges::input_range alignment_matrix_t>
496inline debug_stream_type<char_t> & operator<<(debug_stream_type<char_t> & s, alignment_matrix_t && matrix)
497{
498 return s << detail::debug_matrix{std::forward<alignment_matrix_t>(matrix)};
499}
500
501} // namespace seqan3
A "pretty printer" for most SeqAn data structures and related types.
Definition: debug_stream_type.hpp:78
fmtflags2 flags2() const
Retrieve the format flags from the stream.
Definition: debug_stream_type.hpp:201
A debug matrix to wrap alignment matrices and sequences and make them printable together.
Definition: debug_matrix.hpp:62
std::optional< size_t > _cols
The number of columns the debug matrix should have. Must be at most the size of the original matrix.
Definition: debug_matrix.hpp:444
debug_matrix & mask_matrix(row_wise_matrix< bool > masking_matrix) noexcept
Masks entries out of the current matrix. This operations changes the way this.at(i,...
Definition: debug_matrix.hpp:209
first_sequence_t const & first_sequence() const noexcept
The first sequence of the sequence alignment.
Definition: debug_matrix.hpp:147
debug_matrix(matrix_t &&, first_sequence_t &&, second_sequence_t &&) -> debug_matrix< matrix_t, first_sequence_t, second_sequence_t >
The type deduction guide for the constructor seqan3::detail::debug_matrix(matrix_t,...
static constexpr format_type unicode
The format when printing to a unicode stream.
Definition: debug_matrix.hpp:428
static std::string as_string(value_type &&entry, fmtflags2 const flags) noexcept
Convert a value into a std::string.
Definition: debug_matrix.hpp:384
size_t rows() const noexcept
The number of rows in the matrix.
Definition: debug_matrix.hpp:129
size_t cols() const noexcept
The number of columns in the matrix.
Definition: debug_matrix.hpp:138
bool _transpose
Whether the current matrix should be transposed.
Definition: debug_matrix.hpp:448
debug_matrix & operator=(debug_matrix &&)=default
Defaulted.
debug_matrix()=default
Defaulted.
debug_matrix(debug_matrix &&)=default
Defaulted.
bool _transpose_mask
Whether the masking matrix should be transposed.
Definition: debug_matrix.hpp:450
size_t auto_column_width(fmtflags2 const flags) const noexcept
Determines the largest width of all entries in the matrix, e.g. -152 has width 4.
Definition: debug_matrix.hpp:357
debug_matrix(debug_matrix const &)=default
Defaulted.
debug_matrix & sub_matrix(size_t const new_rows, size_t const new_cols) noexcept
Limits the view port of the current matrix.
Definition: debug_matrix.hpp:232
static constexpr bool is_traceback_matrix
Whether the value_type is trace_directions.
Definition: debug_matrix.hpp:71
debug_matrix & mask_matrix(std::vector< bool > masking_vector) noexcept
Creates the masking_matrix out of the given masking_vector and calls mask_matrix(row_wise_matrix<bool...
Definition: debug_matrix.hpp:222
std::conditional_t< is_traceback_matrix||is_optional_score, entry_t, std::optional< entry_t > > value_type
The type of an entry in the matrix.
Definition: debug_matrix.hpp:81
second_sequence_t const & second_sequence() const noexcept
The second sequence of the sequence alignment.
Definition: debug_matrix.hpp:156
static constexpr bool is_optional_score
Whether a score matrix already returns std::optional scores. (Where std::nullopt means unset/invalid/...
Definition: debug_matrix.hpp:74
~debug_matrix()=default
Defaulted.
static constexpr format_type csv
The format when printing to a ascii stream.
Definition: debug_matrix.hpp:426
std::optional< row_wise_matrix< bool > > _masking_matrix
The masking matrix.
Definition: debug_matrix.hpp:446
value_type reference
The type of a reference to an entry in the matrix.
Definition: debug_matrix.hpp:83
static constexpr bool has_second_sequence
Whether the current debug_matrix was given a second_sequence.
Definition: debug_matrix.hpp:67
std::optional< size_t > _rows
The number of rows the debug matrix should have. Must be at most the size of the original matrix.
Definition: debug_matrix.hpp:442
matrix_t _matrix
The matrix.
Definition: debug_matrix.hpp:436
first_sequence_t _first_sequence
The first sequence of the sequence alignment.
Definition: debug_matrix.hpp:438
debug_matrix & transpose_matrix() noexcept
Transposes the current matrix.
Definition: debug_matrix.hpp:252
debug_matrix(matrix_t matrix)
Construct the matrix out of an existing matrix.
Definition: debug_matrix.hpp:103
second_sequence_t _second_sequence
The second sequence of the sequence alignment.
Definition: debug_matrix.hpp:440
typename std::remove_reference_t< matrix_t >::value_type entry_t
The entry type.
Definition: debug_matrix.hpp:69
void stream_matrix(ostream_t &cout, fmtflags2 const flags) const noexcept
Prints this matrix into the given stream.
Definition: debug_matrix.hpp:274
debug_matrix & operator=(debug_matrix const &)=default
Defaulted.
debug_matrix(matrix_t &&) -> debug_matrix< matrix_t >
The type deduction guide for the constructor seqan3::detail::debug_matrix(matrix_t)
const_reference at(matrix_coordinate const &coordinate) const noexcept
A reference to the entry of the matrix at the given coordinate.
Definition: debug_matrix.hpp:165
typename std::remove_reference_t< matrix_t >::size_type size_type
The size type.
Definition: debug_matrix.hpp:87
std::string entry_at(matrix_coordinate const coordinate, fmtflags2 flags) const noexcept
Same as at(coordinate), but as string.
Definition: debug_matrix.hpp:371
debug_matrix(matrix_t matrix, first_sequence_t first_sequence, second_sequence_t second_sequence)
Construct the matrix out of an existing matrix and two sequences.
Definition: debug_matrix.hpp:111
static size_t unicode_str_length(std::string const &str) noexcept
The length of the str (traceback symbols are unicode aware).
Definition: debug_matrix.hpp:394
std::optional< size_t > column_width
What is the width (number of chars) of an entry. Defaults to auto_column_width.
Definition: debug_matrix.hpp:432
static constexpr bool has_first_sequence
Whether the current debug_matrix was given a first_sequence.
Definition: debug_matrix.hpp:65
Defines the requirements of a matrix (e.g. score matrices, trace matrices).
Definition: matrix_concept.hpp:61
A two dimensional matrix used inside of alignment algorithms.
Definition: two_dimensional_matrix.hpp:65
A helper concept definition for ranges that can be streamed to the seqan3::debug_stream.
Definition: range.hpp:39
Provides seqan3::debug_stream and related types.
Provides seqan3::debug_stream and related types.
debug_stream_type< char_t > & operator<<(debug_stream_type< char_t > &stream, alignment_t &&alignment)
Stream operator for alignments, which are represented as tuples of aligned sequences.
Definition: debug_stream_alignment.hpp:110
trace_directions
The possible directions a trace can have. The values can be combined by the logical |-operator.
Definition: trace_directions.hpp:29
@ row
The corresponding alignment coordinate will be incrementable/decrementable in the row index.
@ up
Trace comes from the above entry.
@ left
Trace comes from the left entry.
@ diagonal
Trace comes from the diagonal entry.
fmtflags2
Flags that change the behaviour of the seqan3::debug_stream.
Definition: debug_stream_type.hpp:31
@ utf8
Enables use of non-ASCII UTF8 characters in formatted output.
Definition: debug_stream_type.hpp:33
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
T left(T... args)
Provides seqan3::detail::matrix.
T max(T... args)
The internal SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
SeqAn specific customisations in the standard namespace.
T has_value(T... args)
Provides seqan3::debug_stream and related types.
Provides seqan3::debug_stream and related types.
Provides seqan3::detail::row_wise_matrix.
T setw(T... args)
T str(T... args)
A strong type for designated initialisation of the column index of a matrix.
Definition: matrix_coordinate.hpp:32
Format used by seqan3::detail::debug_matrix.
Definition: debug_matrix.hpp:412
char const * inf
Infinity symbol (a single symbol)
Definition: debug_matrix.hpp:422
char const * row_col_sep
Row separator symbol (a single symbol)
Definition: debug_matrix.hpp:420
char const * col_sep
Column separator symbol (a single symbol)
Definition: debug_matrix.hpp:416
char const * row_sep
Row separator symbol (a single symbol)
Definition: debug_matrix.hpp:418
char const * epsilon
Epsilon symbol (a single symbol)
Definition: debug_matrix.hpp:414
Strong type for setting the column dimension of a matrix.
Definition: two_dimensional_matrix.hpp:32
Strong type for setting the row dimension of a matrix.
Definition: two_dimensional_matrix.hpp:40
A strong type for designated initialisation of the row index of a matrix.
Definition: matrix_coordinate.hpp:61
Provides type traits for working with templates.
Provides the declaration of seqan3::detail::trace_directions.
T value_or(T... args)