147 template <
typename stream_type,
148 typename seq_legal_alph_type,
bool seq_qual_combined,
156 qual_type & qualities);
158 template <
typename stream_type,
166 qual_type && qualities);
168 template <
typename stream_type,
169 typename seq_legal_alph_type,
170 typename ref_seqs_type,
171 typename ref_ids_type,
174 typename offset_type,
175 typename ref_seq_type,
176 typename ref_id_type,
177 typename ref_offset_type,
184 typename tag_dict_type,
185 typename e_value_type,
186 typename bit_score_type>
189 ref_seqs_type & ref_seqs,
194 offset_type & offset,
195 ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
196 ref_id_type & ref_id,
197 ref_offset_type & ref_offset,
199 cigar_type & cigar_vector,
203 tag_dict_type & tag_dict,
204 e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
205 bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score));
207 template <
typename stream_type,
208 typename header_type,
211 typename ref_seq_type,
212 typename ref_id_type,
216 typename tag_dict_type,
217 typename e_value_type,
218 typename bit_score_type>
221 header_type && header,
225 int32_t
const offset,
226 ref_seq_type && SEQAN3_DOXYGEN_ONLY(ref_seq),
227 ref_id_type && ref_id,
234 tag_dict_type && tag_dict,
235 e_value_type && SEQAN3_DOXYGEN_ONLY(e_value),
236 bit_score_type && SEQAN3_DOXYGEN_ONLY(bit_score));
246 alignment_file_header<> default_header{};
249 bool ref_info_present_in_header{
false};
258 template <
typename t>
259 decltype(
auto) default_or(t && v) const noexcept
261 return std::forward<t>(v);
264 using format_sam_base::read_field;
266 template <
typename stream_view_type,
typename value_type>
268 stream_view_type && stream_view,
271 template <
typename stream_view_type>
272 void read_field(stream_view_type && stream_view, sam_tag_dictionary & target);
274 template <
typename stream_it_t, std::ranges::forward_range field_type>
275 void write_range(stream_it_t & stream_it, field_type && field_value);
277 template <
typename stream_it_t>
278 void write_range(stream_it_t & stream_it,
char const *
const field_value);
280 template <
typename stream_t, arithmetic field_type>
281 void write_field(stream_t & stream, field_type field_value);
283 template <
typename stream_t>
284 void write_tag_fields(stream_t & stream, sam_tag_dictionary
const & tag_dict,
char const separator);
288 template <
typename stream_type,
289 typename seq_legal_alph_type,
bool seq_qual_combined,
297 qual_type & qualities)
301 if constexpr (seq_qual_combined)
305 std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore,
306 std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore);
308 for (
auto sit = tmp_qual.
begin(), dit = std::ranges::begin(
sequence); sit != tmp_qual.
end(); ++sit, ++dit)
309 get<1>(*dit).assign_char(*sit);
314 std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore,
315 std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore);
318 if constexpr (!detail::decays_to_ignore_v<seq_type>)
319 if (std::ranges::distance(
sequence) == 0)
320 throw parse_error{
"The sequence information must not be empty."};
321 if constexpr (!detail::decays_to_ignore_v<id_type>)
322 if (std::ranges::distance(
id) == 0)
323 throw parse_error{
"The id information must not be empty."};
330 template <
typename stream_type,
338 qual_type && qualities)
349 default_or(qualities),
366 template <
typename stream_type,
367 typename seq_legal_alph_type,
368 typename ref_seqs_type,
369 typename ref_ids_type,
372 typename offset_type,
373 typename ref_seq_type,
374 typename ref_id_type,
375 typename ref_offset_type,
382 typename tag_dict_type,
383 typename e_value_type,
384 typename bit_score_type>
387 ref_seqs_type & ref_seqs,
392 offset_type & offset,
393 ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
394 ref_id_type & ref_id,
395 ref_offset_type & ref_offset,
397 cigar_type & cigar_vector,
401 tag_dict_type & tag_dict,
402 e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
403 bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score))
405 static_assert(detail::decays_to_ignore_v<ref_offset_type> ||
406 detail::is_type_specialisation_of_v<ref_offset_type, std::optional>,
407 "The ref_offset must be a specialisation of std::optional.");
413 int32_t ref_offset_tmp{};
414 std::ranges::range_value_t<decltype(header.
ref_ids())> ref_id_tmp{};
415 [[maybe_unused]] int32_t offset_tmp{};
416 [[maybe_unused]] int32_t soft_clipping_end{};
418 [[maybe_unused]] int32_t ref_length{0}, seq_length{0};
422 if (is_char<'@'>(*std::ranges::begin(stream_view)))
424 read_header(stream_view, header, ref_seqs);
426 if (std::ranges::begin(stream_view) == std::ranges::end(stream_view))
432 read_field(field_view,
id);
434 uint16_t flag_integral{};
435 read_field(field_view, flag_integral);
438 read_field(field_view, ref_id_tmp);
439 check_and_assign_ref_id(
ref_id, ref_id_tmp, header, ref_seqs);
441 read_field(field_view, ref_offset_tmp);
444 if (ref_offset_tmp == -1)
446 else if (ref_offset_tmp > -1)
448 else if (ref_offset_tmp < -1)
449 throw format_error{
"No negative values are allowed for field::ref_offset."};
451 read_field(field_view,
mapq);
455 if constexpr (!detail::decays_to_ignore_v<align_type> || !detail::decays_to_ignore_v<cigar_type>)
457 if (!is_char<'*'>(*std::ranges::begin(stream_view)))
459 std::tie(tmp_cigar_vector, ref_length, seq_length) = parse_cigar(field_view);
460 transfer_soft_clipping_to(tmp_cigar_vector, offset_tmp, soft_clipping_end);
465 std::ranges::next(std::ranges::begin(field_view));
470 detail::consume(field_view);
477 if constexpr (!detail::decays_to_ignore_v<mate_type>)
479 std::ranges::range_value_t<decltype(header.
ref_ids())> tmp_mate_ref_id{};
480 read_field(field_view, tmp_mate_ref_id);
482 if (tmp_mate_ref_id ==
"=")
484 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
487 check_and_assign_ref_id(get<0>(
mate), ref_id_tmp, header, ref_seqs);
491 check_and_assign_ref_id(get<0>(
mate), tmp_mate_ref_id, header, ref_seqs);
495 read_field(field_view, tmp_pnext);
498 get<1>(
mate) = --tmp_pnext;
499 else if (tmp_pnext < 0)
500 throw format_error{
"No negative values are allowed at the mate mapping position."};
503 read_field(field_view, get<2>(
mate));
507 for (
size_t i = 0; i < 3u; ++i)
509 detail::consume(field_view);
515 if (!is_char<'*'>(*std::ranges::begin(stream_view)))
517 auto constexpr is_legal_alph = is_in_alphabet<seq_legal_alph_type>;
520 if (!is_legal_alph(c))
523 " evaluated to false on " +
524 detail::make_printable(c)};
528 if constexpr (detail::decays_to_ignore_v<seq_type>)
530 if constexpr (!detail::decays_to_ignore_v<align_type>)
533 "If you want to read ALIGNMENT but not SEQ, the alignment"
534 " object must store a sequence container at the second (query) position.");
536 if (!tmp_cigar_vector.empty())
539 auto tmp_iter = std::ranges::begin(seq_stream);
540 std::ranges::advance(tmp_iter, offset_tmp);
542 for (; seq_length > 0; --seq_length)
544 get<1>(align).push_back(std::ranges::range_value_t<decltype(get<1>(align))>{}.assign_char(*tmp_iter));
548 std::ranges::advance(tmp_iter, soft_clipping_end);
557 detail::consume(seq_stream);
562 read_field(seq_stream,
seq);
564 if constexpr (!detail::decays_to_ignore_v<align_type>)
566 if (!tmp_cigar_vector.empty())
568 assign_unaligned(get<1>(align),
577 std::ranges::next(std::ranges::begin(field_view));
582 auto const tab_or_end = is_char<'\t'> || is_char<'\r'> || is_char<'\n'>;
585 if constexpr (!detail::decays_to_ignore_v<seq_type> && !detail::decays_to_ignore_v<qual_type>)
587 if (std::ranges::distance(
seq) != 0 && std::ranges::distance(
qual) != 0 &&
588 std::ranges::distance(
seq) != std::ranges::distance(
qual))
590 throw format_error{detail::to_string(
"Sequence length (", std::ranges::distance(
seq),
591 ") and quality length (", std::ranges::distance(
qual),
592 ") must be the same.")};
598 while (is_char<'\t'>(*std::ranges::begin(stream_view)))
600 std::ranges::next(std::ranges::begin(stream_view));
604 detail::consume(stream_view |
views::take_until(!(is_char<'\r'> || is_char<'\n'>)));
610 if constexpr (!detail::decays_to_ignore_v<align_type>)
612 int32_t ref_idx{(ref_id_tmp.empty()) ? -1 : 0};
614 if constexpr (!detail::decays_to_ignore_v<ref_seqs_type>)
616 if (!ref_id_tmp.empty())
618 assert(header.
ref_dict.count(ref_id_tmp) != 0);
619 ref_idx = header.
ref_dict[ref_id_tmp];
623 construct_alignment(align, tmp_cigar_vector, ref_idx, ref_seqs, ref_offset_tmp, ref_length);
626 if constexpr (!detail::decays_to_ignore_v<cigar_type>)
627 std::swap(cigar_vector, tmp_cigar_vector);
631 template <
typename stream_type,
632 typename header_type,
635 typename ref_seq_type,
636 typename ref_id_type,
640 typename tag_dict_type,
641 typename e_value_type,
642 typename bit_score_type>
645 header_type && header,
649 int32_t
const offset,
650 ref_seq_type && SEQAN3_DOXYGEN_ONLY(ref_seq),
651 ref_id_type && ref_id,
658 tag_dict_type && tag_dict,
659 e_value_type && SEQAN3_DOXYGEN_ONLY(e_value),
660 bit_score_type && SEQAN3_DOXYGEN_ONLY(bit_score))
678 static_assert((std::ranges::forward_range<seq_type> &&
679 alphabet<std::ranges::range_reference_t<seq_type>>),
680 "The seq object must be a std::ranges::forward_range over "
681 "letters that model seqan3::alphabet.");
683 static_assert((std::ranges::forward_range<id_type> &&
684 alphabet<std::ranges::range_reference_t<id_type>>),
685 "The id object must be a std::ranges::forward_range over "
686 "letters that model seqan3::alphabet.");
688 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
690 static_assert((std::ranges::forward_range<ref_id_type> ||
693 "The ref_id object must be a std::ranges::forward_range "
694 "over letters that model seqan3::alphabet.");
698 static_assert(!detail::decays_to_ignore_v<header_type>,
699 "If you give indices as reference id information the header must also be present.");
703 "The align object must be a std::pair of two ranges whose "
704 "value_type is comparable to seqan3::gap");
707 std::equality_comparable_with<
gap, std::ranges::range_reference_t<decltype(std::get<0>(align))>> &&
708 std::equality_comparable_with<
gap, std::ranges::range_reference_t<decltype(std::get<1>(align))>>),
709 "The align object must be a std::pair of two ranges whose "
710 "value_type is comparable to seqan3::gap");
712 static_assert((std::ranges::forward_range<qual_type> &&
713 alphabet<std::ranges::range_reference_t<qual_type>>),
714 "The qual object must be a std::ranges::forward_range "
715 "over letters that model seqan3::alphabet.");
718 "The mate object must be a std::tuple of size 3 with "
719 "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
720 "2) a std::integral or std::optional<std::integral>, and "
721 "3) a std::integral.");
723 static_assert(((std::ranges::forward_range<decltype(std::get<0>(
mate))> ||
729 "The mate object must be a std::tuple of size 3 with "
730 "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
731 "2) a std::integral or std::optional<std::integral>, and "
732 "3) a std::integral.");
736 static_assert(!detail::decays_to_ignore_v<header_type>,
737 "If you give indices as mate reference id information the header must also be present.");
740 "The tag_dict object must be of type seqan3::sam_tag_dictionary.");
745 if constexpr (!detail::decays_to_ignore_v<header_type> &&
746 !detail::decays_to_ignore_v<ref_id_type> &&
755 if constexpr (std::ranges::contiguous_range<decltype(
ref_id)> &&
756 std::ranges::sized_range<decltype(
ref_id)> &&
757 std::ranges::borrowed_range<decltype(
ref_id)>)
766 "The ref_id type is not convertible to the reference id information stored in the "
767 "reference dictionary of the header object.");
773 throw format_error{detail::to_string(
"The ref_id '",
ref_id,
"' was not in the list of references:",
779 throw format_error{
"The ref_offset object must be an std::integral >= 0."};
784 if constexpr (!detail::decays_to_ignore_v<header_type>)
788 write_header(stream, options, header);
789 header_was_written =
true;
796 std::cpp20::ostreambuf_iterator stream_it{stream};
797 char const separator{
'\t'};
799 write_range(stream_it, std::forward<id_type>(
id));
803 stream << static_cast<uint16_t>(
flag) << separator;
805 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
820 write_range(stream_it, std::forward<ref_id_type>(
ref_id));
831 stream << (
ref_offset.value_or(-1) + 1) << separator;
833 stream << static_cast<unsigned>(
mapq) << separator;
835 if (!std::ranges::empty(cigar_vector))
837 for (
auto & c : cigar_vector)
838 stream << c.to_string();
840 else if (!std::ranges::empty(get<0>(align)) && !std::ranges::empty(get<1>(align)))
847 for (
auto chr : get<1>(align))
852 write_range(stream_it, detail::get_cigar_string(std::forward<align_type>(align),
offset, off_end));
863 write_range(stream_it, (header.
ref_ids())[get<0>(
mate)]);
867 if (get<0>(
mate).has_value())
870 write_range(stream_it, header.
ref_ids()[get<0>(
mate).value_or(0)]);
876 write_range(stream_it, get<0>(
mate));
884 stream << (get<1>(
mate).value_or(-1) + 1) << separator;
888 stream << get<1>(
mate) << separator;
891 stream << get<2>(
mate) << separator;
893 write_range(stream_it, std::forward<seq_type>(
seq));
897 write_range(stream_it, std::forward<qual_type>(
qual));
899 write_tag_fields(stream, std::forward<tag_dict_type>(tag_dict), separator);
922 template <
typename stream_view_type,
typename value_type>
924 stream_view_type && stream_view,
928 while (std::ranges::begin(stream_view) != ranges::end(stream_view))
933 if (is_char<','>(*std::ranges::begin(stream_view)))
934 std::ranges::next(std::ranges::begin(stream_view));
956 template <
typename stream_view_type>
957 inline void format_sam::read_field(stream_view_type && stream_view, sam_tag_dictionary & target)
964 uint16_t tag =
static_cast<uint16_t
>(*std::ranges::begin(stream_view)) << 8;
965 std::ranges::next(std::ranges::begin(stream_view));
966 tag +=
static_cast<uint16_t
>(*std::ranges::begin(stream_view));
967 std::ranges::next(std::ranges::begin(stream_view));
968 std::ranges::next(std::ranges::begin(stream_view));
969 char type_id = *std::ranges::begin(stream_view);
970 std::ranges::next(std::ranges::begin(stream_view));
971 std::ranges::next(std::ranges::begin(stream_view));
977 target[tag] =
static_cast<char>(*std::ranges::begin(stream_view));
978 std::ranges::next(std::ranges::begin(stream_view));
984 read_field(stream_view, tmp);
991 read_field(stream_view, tmp);
997 target[tag] = stream_view | views::to<std::string>;
1007 char array_value_type_id = *std::ranges::begin(stream_view);
1008 std::ranges::next(std::ranges::begin(stream_view));
1009 std::ranges::next(std::ranges::begin(stream_view));
1011 switch (array_value_type_id)
1014 read_sam_dict_vector(target[tag], stream_view, int8_t{});
1017 read_sam_dict_vector(target[tag], stream_view, uint8_t{});
1020 read_sam_dict_vector(target[tag], stream_view, int16_t{});
1023 read_sam_dict_vector(target[tag], stream_view, uint16_t{});
1026 read_sam_dict_vector(target[tag], stream_view, int32_t{});
1029 read_sam_dict_vector(target[tag], stream_view, uint32_t{});
1032 read_sam_dict_vector(target[tag], stream_view,
float{});
1035 throw format_error{
std::string(
"The first character in the numerical ") +
1036 "id of a SAM tag must be one of [cCsSiIf] but '" + array_value_type_id +
1042 throw format_error{
std::string(
"The second character in the numerical id of a "
1043 "SAM tag must be one of [A,i,Z,H,B,f] but '") + type_id +
"' was given."};
1054 template <
typename stream_it_t, std::ranges::forward_range field_type>
1055 inline void format_sam::write_range(stream_it_t & stream_it, field_type && field_value)
1057 if (std::ranges::empty(field_value))
1069 template <
typename stream_it_t>
1070 inline void format_sam::write_range(stream_it_t & stream_it,
char const *
const field_value)
1080 template <
typename stream_t, arithmetic field_type>
1081 inline void format_sam::write_field(stream_t & stream, field_type field_value)
1084 if constexpr (std::same_as<field_type, int8_t> || std::same_as<field_type, uint8_t>)
1085 stream << static_cast<int16_t>(field_value);
1087 stream << field_value;
1097 template <
typename stream_t>
1098 inline void format_sam::write_tag_fields(stream_t & stream, sam_tag_dictionary
const & tag_dict,
char const separator)
1100 auto stream_variant_fn = [
this, &stream] (
auto && arg)
1104 if constexpr (!
container<T> || std::same_as<T, std::string>)
1110 if (arg.begin() != arg.end())
1112 for (
auto it = arg.begin(); it != (arg.end() - 1); ++it)
1114 write_field(stream, *it);
1118 write_field(stream, *(arg.end() - 1));
1123 for (
auto & [tag, variant] : tag_dict)
1125 stream << separator;
1127 char char0 = tag / 256;
1128 char char1 = tag % 256;
1130 stream << char0 << char1 <<
':' << detail::sam_tag_type_char[variant.
index()] <<
':';
1132 if (detail::sam_tag_type_char_extra[variant.
index()] !=
'\0')
1133 stream << detail::sam_tag_type_char_extra[variant.
index()] <<
',';
Adaptations of algorithms from the Ranges TS.
Provides seqan3::alignment_file_output_options.
Provides seqan3::views::char_to.
The alphabet of a gap character '-'.
Definition: gap.hpp:37
The SAM tag dictionary class that stores all optional SAM fields.
Definition: sam_tag_dictionary.hpp:325
Provides concepts for core language types and relations that don't have concepts in C++20 (yet).
sam_flag
An enum flag that describes the properties of an aligned read (given as a SAM record).
Definition: misc.hpp:71
@ none
None of the flags below are set.
constexpr auto is_space
Checks whether c is a space character.
Definition: predicate.hpp:146
constexpr sequenced_policy seq
Global execution policy object for sequenced execution policy.
Definition: execution.hpp:54
@ flag
The alignment flag (bit information), uint16_t value.
@ ref_offset
Sequence (REF_SEQ) relative start position (0-based), unsigned value.
@ mapq
The mapping quality of the SEQ alignment, usually a ohred-scaled score.
@ offset
Sequence (SEQ) relative start position (0-based), unsigned value.
@ mate
The mate pair information given as a std::tuple of reference name, offset and template length.
@ ref_id
The identifier of the (reference) sequence that SEQ was aligned to.
@ qual
The qualities, usually in phred-score notation.
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:116
seqan3::type_list< trait_t< pack_t >... > transform
Apply a transformation trait to every type in the pack and return a seqan3::type_list of the results.
Definition: traits.hpp:307
auto const to_char
A view that calls seqan3::to_char() on each element in the input range.
Definition: to_char.hpp:65
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:141
constexpr auto take_until_or_throw
A view adaptor that returns elements from the underlying range until the functor evaluates to true (t...
Definition: take_until.hpp:624
constexpr auto istreambuf
A view factory that returns a view over the stream buffer of an input stream.
Definition: istreambuf.hpp:113
constexpr auto take_until
A view adaptor that returns elements from the underlying range until the functor evaluates to true (o...
Definition: take_until.hpp:610
constexpr auto take_until_or_throw_and_consume
A view adaptor that returns elements from the underlying range until the functor evaluates to true (t...
Definition: take_until.hpp:652
auto const move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:68
constexpr auto take_until_and_consume
A view adaptor that returns elements from the underlying range until the functor evaluates to true (o...
Definition: take_until.hpp:638
Provides seqan3::detail::ignore_output_iterator for writing to null stream.
The generic alphabet concept that covers most data types used in ranges.
The (most general) container concept as defined by the standard library.
Resolves to std::ranges::implicitly_convertible_to<type1, type2>().
A more refined container concept than seqan3::container.
The generic concept for a sequence.
Whether a type behaves like a tuple.
Provides helper data structures for the seqan3::alignment_file_output.
Provides various utility functions.
Provides seqan3::fast_istreambuf_iterator and seqan3::fast_ostreambuf_iterator, as well as,...
Provides seqan3::views::istreambuf.
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
typename remove_cvref< t >::type remove_cvref_t
Return the input type with const, volatile and references removed (transformation_trait shortcut).
Definition: type_traits:56
Provides character predicates for tokenisation.
Provides various utility functions.
Provides various transformation traits used by the range module.
Adaptations of concepts from the Ranges TS.
Provides the seqan3::sam_tag_dictionary class and auxiliaries.
Provides seqan3::sequence_file_output_options.
Provides seqan3::views::slice.
The options type defines various option members that influence the behavior of all or some formats.
Definition: output_options.hpp:23
bool sam_require_header
Whether to require a header for SAM files.
Definition: output_options.hpp:41
bool add_carriage_return
The default plain text line-ending is "\n", but on Windows an additional carriage return is recommend...
Definition: output_options.hpp:27
Thrown if there is a parse error, such as reading an unexpected character from an input stream.
Definition: exception.hpp:48
The options type defines various option members that influence the behaviour of all or some formats.
Definition: output_options.hpp:22
Exposes the value_type of another type.
Definition: pre.hpp:58
Provides seqan3::views::take_until and seqan3::views::take_until_or_throw.
Provides seqan3::type_list and auxiliary type traits.
Provides seqan3::views::to.
Provides seqan3::views::to_char.
Auxiliary for pretty printing of exception messages.
Provides seqan3::tuple_like.