SeqAn3  3.0.2
The Modern C++ library for sequence analysis.
simd_algorithm.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <array>
16 #include <utility>
17 
24 #include <seqan3/std/concepts>
25 
26 namespace seqan3::detail
27 {
28 
31 template <simd::simd_concept simd_t, size_t... I>
32 constexpr simd_t fill_impl(typename simd_traits<simd_t>::scalar_type const scalar, std::index_sequence<I...>) noexcept
33 {
34  return simd_t{((void)I, scalar)...};
35 }
36 
39 template <simd::simd_concept simd_t, typename scalar_t, scalar_t... I>
40 constexpr simd_t iota_impl(scalar_t const offset, std::integer_sequence<scalar_t, I...>)
41 {
42  return simd_t{static_cast<scalar_t>(offset + I)...};
43 }
44 
59 template <size_t divisor, simd_concept simd_t>
60 constexpr simd_t extract_impl(simd_t const & src, uint8_t const mask)
61 {
62  simd_t dst{};
63  constexpr size_t chunk = simd_traits<simd_t>::length / divisor;
64  size_t offset = chunk * mask;
65  for (size_t i = 0; i < chunk; ++i)
66  dst[i] = src[i + offset];
67 
68  return dst;
69 }
70 
79 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
80 constexpr target_simd_t upcast_signed(source_simd_t const & src)
81 {
82  static_assert(simd_traits<target_simd_t>::max_length == simd_traits<source_simd_t>::max_length,
83  "Target vector has different byte size.");
84 
85  if constexpr (simd_traits<source_simd_t>::max_length == 16) // SSE4
86  return upcast_signed_sse4<target_simd_t>(src);
87  else if constexpr (simd_traits<source_simd_t>::max_length == 32) // AVX2
88  return upcast_signed_avx2<target_simd_t>(src);
89  else if constexpr (simd_traits<source_simd_t>::max_length == 64) // AVX512
90  return upcast_signed_avx512<target_simd_t>(src);
91  else
92  static_assert(simd_traits<source_simd_t>::max_length <= 32, "simd type is not supported.");
93 }
94 
103 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
104 constexpr target_simd_t upcast_unsigned(source_simd_t const & src)
105 {
106  static_assert(simd_traits<target_simd_t>::max_length == simd_traits<source_simd_t>::max_length,
107  "Target vector has different byte size.");
108 
109  if constexpr (simd_traits<source_simd_t>::max_length == 16) // SSE4
110  return upcast_unsigned_sse4<target_simd_t>(src);
111  else if constexpr (simd_traits<source_simd_t>::max_length == 32) // AVX2
112  return upcast_unsigned_avx2<target_simd_t>(src);
113  else if constexpr (simd_traits<source_simd_t>::max_length == 64) // AVX512
114  return upcast_unsigned_avx512<target_simd_t>(src);
115  else
116  static_assert(simd_traits<source_simd_t>::max_length <= 32, "simd type is not supported.");
117 }
118 
141 template <uint8_t index, simd::simd_concept simd_t>
142 constexpr simd_t extract_half(simd_t const & src)
143 {
144  static_assert(index < 2, "The index must be in the range of [0, 1]");
145 
146  return detail::extract_impl<2>(src, index);
147 }
148 
150 template <uint8_t index, simd::simd_concept simd_t>
151  requires detail::is_builtin_simd_v<simd_t> &&
152  detail::is_native_builtin_simd_v<simd_t>
153 constexpr simd_t extract_half(simd_t const & src)
154 {
155  static_assert(index < 2, "The index must be in the range of [0, 1]");
156 
157  if constexpr (simd_traits<simd_t>::length < 2) // In case there are less elements available return unchanged value.
158  return src;
159  else if constexpr (simd_traits<simd_t>::max_length == 16) // SSE4
160  return detail::extract_half_sse4<index>(src);
161  else if constexpr (simd_traits<simd_t>::max_length == 32) // AVX2
162  return detail::extract_half_avx2<index>(src);
163  else // Anything else
164  return detail::extract_impl<2>(src, index);
165 }
167 
190 template <uint8_t index, simd::simd_concept simd_t>
191 constexpr simd_t extract_quarter(simd_t const & src)
192 {
193  static_assert(index < 4, "The index must be in the range of [0, 1, 2, 3]");
194 
195  return detail::extract_impl<4>(src, index);
196 }
197 
199 template <uint8_t index, simd::simd_concept simd_t>
200  requires detail::is_builtin_simd_v<simd_t> &&
201  detail::is_native_builtin_simd_v<simd_t>
202 constexpr simd_t extract_quarter(simd_t const & src)
203 {
204  static_assert(index < 4, "The index must be in the range of [0, 1, 2, 3]");
205 
206  if constexpr (simd_traits<simd_t>::length < 4) // In case there are less elements available return unchanged value.
207  return src;
208  else if constexpr (simd_traits<simd_t>::max_length == 16) // SSE4
209  return detail::extract_quarter_sse4<index>(src);
210  else if constexpr (simd_traits<simd_t>::max_length == 32) // AVX2
211  return detail::extract_quarter_avx2<index>(src);
212  else // Anything else
213  return detail::extract_impl<4>(src, index);
214 }
216 
239 template <uint8_t index, simd::simd_concept simd_t>
240 constexpr simd_t extract_eighth(simd_t const & src)
241 {
242  return detail::extract_impl<8>(src, index);
243 }
244 
246 template <uint8_t index, simd::simd_concept simd_t>
247  requires detail::is_builtin_simd_v<simd_t> &&
248  detail::is_native_builtin_simd_v<simd_t>
249 constexpr simd_t extract_eighth(simd_t const & src)
250 {
251  static_assert(index < 8, "The index must be in the range of [0, 1, 2, 3, 4, 5, 6, 7]");
252 
253  if constexpr (simd_traits<simd_t>::length < 8) // In case there are less elements available return unchanged value.
254  return src;
255  else if constexpr (simd_traits<simd_t>::max_length == 16) // SSE4
256  return detail::extract_eighth_sse4<index>(src);
257  else if constexpr (simd_traits<simd_t>::max_length == 32) // AVX2
258  return detail::extract_eighth_avx2<index>(src);
259  else // Anything else
260  return detail::extract_impl<8>(src, index);
261 }
263 
264 } // namespace seqan3::detail
265 
266 namespace seqan3
267 {
268 
269 inline namespace simd
270 {
271 
281 template <simd::simd_concept simd_t>
282 constexpr simd_t fill(typename simd_traits<simd_t>::scalar_type const scalar) noexcept
283 {
284  constexpr size_t length = simd_traits<simd_t>::length;
285  return detail::fill_impl<simd_t>(scalar, std::make_index_sequence<length>{});
286 }
287 
297 template <simd::simd_concept simd_t>
298 constexpr simd_t iota(typename simd_traits<simd_t>::scalar_type const offset)
299 {
300  constexpr size_t length = simd_traits<simd_t>::length;
301  using scalar_type = typename simd_traits<simd_t>::scalar_type;
302  return detail::iota_impl<simd_t>(offset, std::make_integer_sequence<scalar_type, length>{});
303 }
304 
314 template <simd::simd_concept simd_t>
315 constexpr simd_t load(void const * mem_addr)
316 {
317  assert(mem_addr != nullptr);
318  simd_t tmp{};
319 
320  for (size_t i = 0; i < simd_traits<simd_t>::length; ++i)
321  tmp[i] = *(static_cast<typename simd_traits<simd_t>::scalar_type const *>(mem_addr) + i);
322 
323  return tmp;
324 }
325 
327 template <simd::simd_concept simd_t>
328  requires detail::is_builtin_simd_v<simd_t> &&
329  detail::is_native_builtin_simd_v<simd_t>
330 constexpr simd_t load(void const * mem_addr)
331 {
332  assert(mem_addr != nullptr);
333 
334  if constexpr (simd_traits<simd_t>::max_length == 16)
335  return detail::load_sse4<simd_t>(mem_addr);
336  else if constexpr (simd_traits<simd_t>::max_length == 32)
337  return detail::load_avx2<simd_t>(mem_addr);
338  else if constexpr (simd_traits<simd_t>::max_length == 64)
339  return detail::load_avx512<simd_t>(mem_addr);
340  else
341  static_assert(simd_traits<simd_t>::max_length >= 16 && simd_traits<simd_t>::max_length <= 64,
342  "Unsupported simd type.");
343 }
345 
363 template <simd::simd_concept simd_t>
364 constexpr void transpose(std::array<simd_t, simd_traits<simd_t>::length> & matrix)
365 {
367 
368  for (size_t i = 0; i < matrix.size(); ++i)
369  for (size_t j = 0; j < matrix.size(); ++j)
370  tmp[j][i] = matrix[i][j];
371 
372  std::swap(tmp, matrix);
373 }
374 
376 // Implementation for seqan builtin simd.
377 template <simd::simd_concept simd_t>
378  requires detail::is_builtin_simd_v<simd_t> &&
379  detail::is_native_builtin_simd_v<simd_t> &&
380  (simd_traits<simd_t>::max_length == simd_traits<simd_t>::length)
381 constexpr void transpose(std::array<simd_t, simd_traits<simd_t>::length> & matrix)
382 {
383  if constexpr (simd_traits<simd_t>::length == 16) // SSE4 implementation
384  detail::transpose_matrix_sse4(matrix);
385  else if constexpr (simd_traits<simd_t>::length == 32) // AVX2 implementation
386  detail::transpose_matrix_avx2(matrix);
387  else
388  transpose(matrix);
389 }
391 
400 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
401 constexpr target_simd_t upcast(source_simd_t const & src)
402 {
403  static_assert(simd_traits<target_simd_t>::length <= simd_traits<source_simd_t>::length,
404  "The length of the target simd type must be greater or equal than the length of the source simd type.");
405 
406  target_simd_t tmp{};
407  for (unsigned i = 0; i < simd_traits<target_simd_t>::length; ++i)
408  tmp[i] = static_cast<typename simd_traits<target_simd_t>::scalar_type>(src[i]);
409 
410  return tmp;
411 }
412 
414 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
415  requires detail::is_builtin_simd_v<target_simd_t> &&
416  detail::is_builtin_simd_v<source_simd_t> &&
417  detail::is_native_builtin_simd_v<source_simd_t>
418 constexpr target_simd_t upcast(source_simd_t const & src)
419 {
420  static_assert(simd_traits<target_simd_t>::length <= simd_traits<source_simd_t>::length,
421  "The length of the target simd type must be greater or equal than the length of the source simd type.");
422 
423  if constexpr (simd_traits<source_simd_t>::length == simd_traits<target_simd_t>::length)
424  {
425  static_assert(simd_traits<target_simd_t>::max_length == simd_traits<source_simd_t>::max_length,
426  "Target vector has a different byte size.");
427  return reinterpret_cast<target_simd_t>(src); // Same packing so we do not cast.
428  }
429  else if constexpr (std::signed_integral<typename simd_traits<source_simd_t>::scalar_type>)
430  {
431  return detail::upcast_signed<target_simd_t>(src);
432  }
433  else
434  {
435  static_assert(std::unsigned_integral<typename simd_traits<source_simd_t>::scalar_type>,
436  "Expected unsigned scalar type.");
437  return detail::upcast_unsigned<target_simd_t>(src);
438  }
439 }
441 
442 } // inline namespace simd
443 
444 } // namespace seqan3
Provides seqan3::detail::builtin_simd, seqan3::detail::is_builtin_simd and seqan3::simd::simd_traits<...
The Concepts library.
Provides seqan3::simd::simd_concept.
T fill(T... args)
@ offset
Sequence (SEQ) relative start position (0-based), unsigned value.
T iota(T... args)
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
SeqAn specific customisations in the standard namespace.
Provides specific algorithm implementations for AVX2 instruction set.
Provides specific algorithm implementations for AVX512 instruction set.
Provides specific algorithm implementations for SSE4 instruction set.
Provides seqan3::simd::simd_traits.
T swap(T... args)