17 #if defined(HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE) == \
18 defined(HWY_TARGET_TOGGLE)
19 #ifdef HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE
20 #undef HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE
22 #define HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE
33 #if HWY_TARGET == HWY_SCALAR
40 return (a[1] == b[1]) ? a[0] < b[0] : a[1] < b[1];
49 return (a[1] == b[1]) ? b[0] < a[0] : b[1] < a[1];
53 template <
class Order>
55 constexpr
bool Is128()
const {
return true; }
65 constexpr
size_t LanesPerKey()
const {
return 2; }
69 const FixedTag<T, 2>
d;
70 const auto temp =
LoadU(
d, a);
76 HWY_INLINE Vec<D> SetKey(D
d,
const TFromD<D>* key)
const {
86 HWY_INLINE Vec<D> ReverseKeys2(D ,
const Vec<D>
v)
const {
92 HWY_INLINE Vec<D> ReverseKeys4(D
d,
const Vec<D>
v)
const {
94 return ReverseKeys(
d,
v);
99 HWY_INLINE Vec<D> OddEvenPairs(D
d,
const Vec<D> odd,
100 const Vec<D> even)
const {
106 HWY_INLINE V OddEvenKeys(
const V odd,
const V even)
const {
111 HWY_INLINE Vec<D> ReverseKeys8(D, Vec<D>)
const {
116 HWY_INLINE Vec<D> ReverseKeys16(D, Vec<D>)
const {
122 HWY_INLINE Vec<D> SwapAdjacentPairs(D, Vec<D>)
const {
128 HWY_INLINE Vec<D> SwapAdjacentQuads(D, Vec<D>)
const {
134 HWY_INLINE Vec<D> OddEvenQuads(D, Vec<D>, Vec<D>)
const {
146 struct OrderAscending128 :
public Key128 {
147 using Order = SortAscending;
149 template <
typename T>
151 return (a[1] == b[1]) ? a[0] < b[0] : a[1] < b[1];
155 HWY_INLINE Mask<D> Compare(D
d, Vec<D> a, Vec<D> b)
const {
161 HWY_INLINE Mask<DFromV<V> > CompareLanes(V a, V b)
const {
166 HWY_INLINE Vec<D> First(D
d,
const Vec<D> a,
const Vec<D> b)
const {
171 HWY_INLINE Vec<D> Last(D
d,
const Vec<D> a,
const Vec<D> b)
const {
180 v = SetKey(
d, buf + 0);
181 for (
size_t i = LanesPerKey(); i <
N; i += LanesPerKey()) {
182 v = First(
d,
v, SetKey(
d, buf + i));
192 v = SetKey(
d, buf + 0);
193 for (
size_t i = LanesPerKey(); i <
N; i += LanesPerKey()) {
194 v = Last(
d,
v, SetKey(
d, buf + i));
211 struct OrderDescending128 :
public Key128 {
212 using Order = SortDescending;
214 template <
typename T>
216 return (a[1] == b[1]) ? b[0] < a[0] : b[1] < a[1];
220 HWY_INLINE Mask<D> Compare(D
d, Vec<D> a, Vec<D> b)
const {
226 HWY_INLINE Mask<DFromV<V> > CompareLanes(V a, V b)
const {
231 HWY_INLINE Vec<D> First(D
d,
const Vec<D> a,
const Vec<D> b)
const {
236 HWY_INLINE Vec<D> Last(D
d,
const Vec<D> a,
const Vec<D> b)
const {
245 v = SetKey(
d, buf + 0);
246 for (
size_t i = LanesPerKey(); i <
N; i += LanesPerKey()) {
247 v = First(
d,
v, SetKey(
d, buf + i));
257 v = SetKey(
d, buf + 0);
258 for (
size_t i = LanesPerKey(); i <
N; i += LanesPerKey()) {
259 v = Last(
d,
v, SetKey(
d, buf + i));
277 template <
class Base>
278 class Traits128 :
public Base {
279 #if HWY_TARGET <= HWY_AVX2
284 const Base* base =
static_cast<const Base*
>(
this);
286 const Vec<D> ltHL =
VecFromMask(
d, base->CompareLanes(a, b));
287 const Vec<D> ltLX = ShiftLeftLanes<1>(ltHL);
288 return OrAnd(ltHL, eqHL, ltLX);
296 #if HWY_TARGET <= HWY_AVX3
297 return V{_mm512_permutex_epi64(
v.raw, _MM_SHUFFLE(3, 3, 3, 3))};
299 return V{_mm256_permute4x64_epi64(
v.raw, _MM_SHUFFLE(3, 3, 3, 3))};
305 constexpr
bool Is128()
const {
return true; }
308 HWY_INLINE void Sort2(D
d, Vec<D>& a, Vec<D>& b)
const {
309 const Base* base =
static_cast<const Base*
>(
this);
311 const Vec<D> a_copy = a;
312 const auto lt = base->Compare(
d, a, b);
319 HWY_INLINE Vec<D> SortPairsDistance1(D
d, Vec<D>
v)
const {
320 const Base* base =
static_cast<const Base*
>(
this);
321 Vec<D> swapped = base->ReverseKeys2(
d,
v);
323 #if HWY_TARGET <= HWY_AVX2
324 const Vec<D> select = ReplicateTop4x(CompareTop(
d,
v, swapped));
327 Sort2(
d,
v, swapped);
328 return base->OddEvenKeys(swapped,
v);
334 HWY_INLINE Vec<D> SortPairsReverse4(D
d, Vec<D>
v)
const {
335 const Base* base =
static_cast<const Base*
>(
this);
336 Vec<D> swapped = base->ReverseKeys4(
d,
v);
339 #if HWY_TARGET <= HWY_AVX3
340 const Vec512<uint64_t> outHx = CompareTop(
d,
v, swapped);
343 alignas(64) uint64_t kIndices[8] = {7, 7, 5, 5, 5, 5, 7, 7};
344 const Vec512<uint64_t> select =
348 Sort2(
d,
v, swapped);
349 return base->OddEvenPairs(
d, swapped,
v);
355 HWY_INLINE Vec<D> SortPairsDistance4(D, Vec<D>)
const {
#define HWY_RESTRICT
Definition: base.h:63
#define HWY_INLINE
Definition: base.h:64
#define HWY_DASSERT(condition)
Definition: base.h:193
#define HWY_MAYBE_UNUSED
Definition: base.h:75
#define HWY_ASSERT(condition)
Definition: base.h:147
void Swap(T *a, T *b)
Definition: vqsort-inl.h:63
HWY_INLINE Vec128< T, N > IfThenElse(hwy::SizeTag< 1 >, Mask128< T, N > mask, Vec128< T, N > yes, Vec128< T, N > no)
Definition: x86_128-inl.h:680
d
Definition: rvv-inl.h:1656
HWY_API Vec128< T, N > OddEvenBlocks(Vec128< T, N >, Vec128< T, N > even)
Definition: arm_neon-inl.h:4038
HWY_API auto Lt(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:5252
HWY_API auto Eq(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:5244
HWY_API size_t Lanes(Simd< T, N, kPow2 > d)
Definition: arm_sve-inl.h:218
HWY_API Vec128< T, N > IfVecThenElse(Vec128< T, N > mask, Vec128< T, N > yes, Vec128< T, N > no)
Definition: arm_neon-inl.h:1505
HWY_API Vec128< T, N > VecFromMask(Simd< T, N, 0 > d, const Mask128< T, N > v)
Definition: arm_neon-inl.h:1681
HWY_API Vec128< T, N > TableLookupLanes(Vec128< T, N > v, Indices128< T, N > idx)
Definition: arm_neon-inl.h:3419
HWY_API void StoreU(const Vec128< uint8_t > v, Full128< uint8_t >, uint8_t *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:2224
HWY_API Vec128< T, N > SwapAdjacentBlocks(Vec128< T, N > v)
Definition: arm_neon-inl.h:4045
HWY_INLINE VFromD< D > Min128(D d, const VFromD< D > a, const VFromD< D > b)
Definition: arm_neon-inl.h:5203
svuint16_t Set(Simd< bfloat16_t, N, kPow2 > d, bfloat16_t arg)
Definition: arm_sve-inl.h:282
HWY_API Vec128< uint8_t > LoadU(Full128< uint8_t >, const uint8_t *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:2031
HWY_INLINE Mask128< T, N > Lt128(Simd< T, N, 0 > d, Vec128< T, N > a, Vec128< T, N > b)
Definition: arm_neon-inl.h:5172
HWY_API Vec128< T, N > OrAnd(Vec128< T, N > o, Vec128< T, N > a1, Vec128< T, N > a2)
Definition: arm_neon-inl.h:1498
HWY_API Vec128< T, N > ConcatUpperLower(Simd< T, N, 0 > d, Vec128< T, N > hi, Vec128< T, N > lo)
Definition: arm_neon-inl.h:3895
HWY_INLINE VFromD< D > Max128(D d, const VFromD< D > a, const VFromD< D > b)
Definition: arm_neon-inl.h:5208
HWY_API Indices128< T, N > SetTableIndices(Simd< T, N, 0 > d, const TI *idx)
Definition: arm_neon-inl.h:3413
HWY_API Vec128< T, N > LoadDup128(Simd< T, N, 0 > d, const T *const HWY_RESTRICT p)
Definition: arm_neon-inl.h:2217
N
Definition: rvv-inl.h:1656
HWY_API Vec128< T > ReverseBlocks(Full128< T >, const Vec128< T > v)
Definition: arm_neon-inl.h:4053
HWY_API void Store(Vec128< T, N > v, Simd< T, N, 0 > d, T *HWY_RESTRICT aligned)
Definition: arm_neon-inl.h:2397
const vfloat64m1_t v
Definition: rvv-inl.h:1656
Definition: aligned_allocator.h:27
constexpr HWY_API T LowestValue()
Definition: base.h:512
constexpr HWY_API T HighestValue()
Definition: base.h:525
#define HWY_NAMESPACE
Definition: set_macros-inl.h:80
Definition: traits128-inl.h:35
SortAscending Order
Definition: traits128-inl.h:36
HWY_INLINE bool Compare1(const T *a, const T *b)
Definition: traits128-inl.h:39
Definition: traits128-inl.h:44
SortDescending Order
Definition: traits128-inl.h:45
HWY_INLINE bool Compare1(const T *a, const T *b)
Definition: traits128-inl.h:48
Definition: traits128-inl.h:54
constexpr bool Is128() const
Definition: traits128-inl.h:55
constexpr size_t LanesPerKey() const
Definition: traits128-inl.h:56