Grok  9.7.5
test_util-inl.h
Go to the documentation of this file.
1 // Copyright 2019 Google LLC
2 // SPDX-License-Identifier: Apache-2.0
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 
16 // Target-specific helper functions for use by *_test.cc.
17 
18 #include <inttypes.h>
19 #include <stdint.h>
20 
21 #include "hwy/base.h"
22 #include "hwy/print-inl.h"
23 #include "hwy/tests/hwy_gtest.h"
24 #include "hwy/tests/test_util.h"
25 
26 // Per-target include guard
27 #if defined(HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_) == \
28  defined(HWY_TARGET_TOGGLE)
29 #ifdef HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_
30 #undef HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_
31 #else
32 #define HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_
33 #endif
34 
36 namespace hwy {
37 namespace HWY_NAMESPACE {
38 
39 // Compare expected vector to vector.
40 // HWY_INLINE works around a Clang SVE compiler bug where all but the first
41 // 128 bits (the NEON register) of actual are zero.
42 template <class D, typename T = TFromD<D>, class V = Vec<D>>
43 HWY_INLINE void AssertVecEqual(D d, const T* expected, VecArg<V> actual,
44  const char* filename, const int line) {
45  const size_t N = Lanes(d);
46  auto actual_lanes = AllocateAligned<T>(N);
47  Store(actual, d, actual_lanes.get());
48 
49  const auto info = hwy::detail::MakeTypeInfo<T>();
50  const char* target_name = hwy::TargetName(HWY_TARGET);
51  hwy::detail::AssertArrayEqual(info, expected, actual_lanes.get(), N,
52  target_name, filename, line);
53 }
54 
55 // Compare expected lanes to vector.
56 // HWY_INLINE works around a Clang SVE compiler bug where all but the first
57 // 128 bits (the NEON register) of actual are zero.
58 template <class D, typename T = TFromD<D>, class V = Vec<D>>
59 HWY_INLINE void AssertVecEqual(D d, VecArg<V> expected, VecArg<V> actual,
60  const char* filename, int line) {
61  auto expected_lanes = AllocateAligned<T>(Lanes(d));
62  Store(expected, d, expected_lanes.get());
63  AssertVecEqual(d, expected_lanes.get(), actual, filename, line);
64 }
65 
66 // Only checks the valid mask elements (those whose index < Lanes(d)).
67 template <class D>
69  const char* filename, int line) {
70  // lvalues prevented MSAN failure in farm_sve.
71  const Vec<D> va = VecFromMask(d, a);
72  const Vec<D> vb = VecFromMask(d, b);
73  AssertVecEqual(d, va, vb, filename, line);
74 
75  const char* target_name = hwy::TargetName(HWY_TARGET);
76  AssertEqual(CountTrue(d, a), CountTrue(d, b), target_name, filename, line);
77  AssertEqual(AllTrue(d, a), AllTrue(d, b), target_name, filename, line);
78  AssertEqual(AllFalse(d, a), AllFalse(d, b), target_name, filename, line);
79 
80  const size_t N = Lanes(d);
81  const Repartition<uint8_t, D> d8;
82  const size_t N8 = Lanes(d8);
83  auto bits_a = AllocateAligned<uint8_t>(HWY_MAX(8, N8));
84  auto bits_b = AllocateAligned<uint8_t>(HWY_MAX(8, N8));
85  memset(bits_a.get(), 0, N8);
86  memset(bits_b.get(), 0, N8);
87  const size_t num_bytes_a = StoreMaskBits(d, a, bits_a.get());
88  const size_t num_bytes_b = StoreMaskBits(d, b, bits_b.get());
89  AssertEqual(num_bytes_a, num_bytes_b, target_name, filename, line);
90  size_t i = 0;
91  // First check whole bytes (if that many elements are still valid)
92  for (; i < N / 8; ++i) {
93  if (bits_a[i] != bits_b[i]) {
94  fprintf(stderr, "Mismatch in byte %" PRIu64 ": %d != %d\n",
95  static_cast<uint64_t>(i), bits_a[i], bits_b[i]);
96  Print(d8, "expect", Load(d8, bits_a.get()), 0, N8);
97  Print(d8, "actual", Load(d8, bits_b.get()), 0, N8);
98  hwy::Abort(filename, line, "Masks not equal");
99  }
100  }
101  // Then the valid bit(s) in the last byte.
102  const size_t remainder = N % 8;
103  if (remainder != 0) {
104  const int mask = (1 << remainder) - 1;
105  const int valid_a = bits_a[i] & mask;
106  const int valid_b = bits_b[i] & mask;
107  if (valid_a != valid_b) {
108  fprintf(stderr, "Mismatch in last byte %" PRIu64 ": %d != %d\n",
109  static_cast<uint64_t>(i), valid_a, valid_b);
110  Print(d8, "expect", Load(d8, bits_a.get()), 0, N8);
111  Print(d8, "actual", Load(d8, bits_b.get()), 0, N8);
112  hwy::Abort(filename, line, "Masks not equal");
113  }
114  }
115 }
116 
117 // Only sets valid elements (those whose index < Lanes(d)). This helps catch
118 // tests that are not masking off the (undefined) upper mask elements.
119 //
120 // TODO(janwas): with HWY_NOINLINE GCC zeros the upper half of AVX2 masks.
121 template <class D>
123  return FirstN(d, Lanes(d));
124 }
125 
126 template <class D>
128  const auto zero = Zero(RebindToSigned<D>());
129  return RebindMask(d, Lt(zero, zero));
130 }
131 
132 #ifndef HWY_ASSERT_EQ
133 
134 #define HWY_ASSERT_EQ(expected, actual) \
135  hwy::AssertEqual(expected, actual, hwy::TargetName(HWY_TARGET), __FILE__, \
136  __LINE__)
137 
138 #define HWY_ASSERT_STRING_EQ(expected, actual) \
139  hwy::AssertStringEqual(expected, actual, hwy::TargetName(HWY_TARGET), \
140  __FILE__, __LINE__)
141 
142 #define HWY_ASSERT_VEC_EQ(d, expected, actual) \
143  AssertVecEqual(d, expected, actual, __FILE__, __LINE__)
144 
145 #define HWY_ASSERT_MASK_EQ(d, expected, actual) \
146  AssertMaskEqual(d, expected, actual, __FILE__, __LINE__)
147 
148 #endif // HWY_ASSERT_EQ
149 
150 namespace detail {
151 
152 // Helpers for instantiating tests with combinations of lane types / counts.
153 
154 // Calls Test for each CappedTag<T, N> where N is in [kMinLanes, kMul * kMinArg]
155 // and the resulting Lanes() is in [min_lanes, max_lanes]. The upper bound
156 // is required to ensure capped vectors remain extendable. Implemented by
157 // recursively halving kMul until it is zero.
158 template <typename T, size_t kMul, size_t kMinArg, class Test>
160  static void Do(size_t min_lanes, size_t max_lanes) {
162 
163  // If we already don't have enough lanes, stop.
164  const size_t lanes = Lanes(d);
165  if (lanes < min_lanes) return;
166 
167  if (lanes <= max_lanes) {
168  Test()(T(), d);
169  }
170  ForeachCappedR<T, kMul / 2, kMinArg, Test>::Do(min_lanes, max_lanes);
171  }
172 };
173 
174 // Base case to stop the recursion.
175 template <typename T, size_t kMinArg, class Test>
176 struct ForeachCappedR<T, 0, kMinArg, Test> {
177  static void Do(size_t, size_t) {}
178 };
179 
180 #if HWY_HAVE_SCALABLE
181 
182 template <typename T>
183 constexpr int MinPow2() {
184  // Highway follows RVV LMUL in that the smallest fraction is 1/8th (encoded
185  // as kPow2 == -3). The fraction also must not result in zero lanes for the
186  // smallest possible vector size, which is 128 bits even on RISC-V (with the
187  // application processor profile).
188  return HWY_MAX(-3, -static_cast<int>(CeilLog2(16 / sizeof(T))));
189 }
190 
191 // Iterates kPow2 upward through +3.
192 template <typename T, int kPow2, int kAddPow2, class Test>
193 struct ForeachShiftR {
194  static void Do(size_t min_lanes) {
195  const ScalableTag<T, kPow2 + kAddPow2> d;
196 
197  // Precondition: [kPow2, 3] + kAddPow2 is a valid fraction of the minimum
198  // vector size, so we always have enough lanes, except ForGEVectors.
199  if (Lanes(d) >= min_lanes) {
200  Test()(T(), d);
201  } else {
202  fprintf(stderr, "%d lanes < %d: T=%d pow=%d\n",
203  static_cast<int>(Lanes(d)), static_cast<int>(min_lanes),
204  static_cast<int>(sizeof(T)), kPow2 + kAddPow2);
205  HWY_ASSERT(min_lanes != 1);
206  }
207 
208  ForeachShiftR<T, kPow2 + 1, kAddPow2, Test>::Do(min_lanes);
209  }
210 };
211 
212 // Base case to stop the recursion.
213 template <typename T, int kAddPow2, class Test>
214 struct ForeachShiftR<T, 4, kAddPow2, Test> {
215  static void Do(size_t) {}
216 };
217 #else
218 // ForeachCappedR already handled all possible sizes.
219 #endif // HWY_HAVE_SCALABLE
220 
221 } // namespace detail
222 
223 // These 'adapters' call a test for all possible N or kPow2 subject to
224 // constraints such as "vectors must be extendable" or "vectors >= 128 bits".
225 // They may be called directly, or via For*Types. Note that for an adapter C,
226 // `C<Test>(T())` does not call the test - the correct invocation is
227 // `C<Test>()(T())`, or preferably `ForAllTypes(C<Test>())`. We check at runtime
228 // that operator() is called to prevent such bugs. Note that this is not
229 // thread-safe, but that is fine because C are typically local variables.
230 
231 // Calls Test for all power of two N in [1, Lanes(d) >> kPow2]. This is for
232 // ops that widen their input, e.g. Combine (not supported by HWY_SCALAR).
233 template <class Test, int kPow2 = 1>
235  mutable bool called_ = false;
236 
237  public:
239  if (!called_) {
240  HWY_ABORT("Test is incorrect, ensure operator() is called");
241  }
242  }
243 
244  template <typename T>
245  void operator()(T /*unused*/) const {
246  called_ = true;
247  constexpr size_t kMaxCapped = HWY_LANES(T);
248  // Skip CappedTag that are already full vectors.
249  const size_t max_lanes = Lanes(ScalableTag<T>()) >> kPow2;
250  (void)kMaxCapped;
251  (void)max_lanes;
252 #if HWY_TARGET == HWY_SCALAR
253  // not supported
254 #else
255  detail::ForeachCappedR<T, (kMaxCapped >> kPow2), 1, Test>::Do(1, max_lanes);
256 #if HWY_TARGET == HWY_RVV
257  // For each [MinPow2, 3 - kPow2]; counter is [MinPow2 + kPow2, 3].
258  detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2, -kPow2, Test>::Do(1);
259 #elif HWY_HAVE_SCALABLE
260  // For each [MinPow2, 0 - kPow2]; counter is [MinPow2 + kPow2 + 3, 3].
261  detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2 + 3, -kPow2 - 3,
262  Test>::Do(1);
263 #endif
264 #endif // HWY_SCALAR
265  }
266 };
267 
268 // Calls Test for all power of two N in [1 << kPow2, Lanes(d)]. This is for ops
269 // that narrow their input, e.g. UpperHalf.
270 template <class Test, int kPow2 = 1>
272  mutable bool called_ = false;
273 
274  public:
276  if (!called_) {
277  HWY_ABORT("Test is incorrect, ensure operator() is called");
278  }
279  }
280 
281  template <typename T>
282  void operator()(T /*unused*/) const {
283  called_ = true;
284  constexpr size_t kMinLanes = size_t{1} << kPow2;
285  constexpr size_t kMaxCapped = HWY_LANES(T);
286  // For shrinking, an upper limit is unnecessary.
287  constexpr size_t max_lanes = kMaxCapped;
288 
289  (void)kMinLanes;
290  (void)max_lanes;
291  (void)max_lanes;
292 #if HWY_TARGET == HWY_SCALAR
293  // not supported
294 #else
295  detail::ForeachCappedR<T, (kMaxCapped >> kPow2), kMinLanes, Test>::Do(
296  kMinLanes, max_lanes);
297 #if HWY_TARGET == HWY_RVV
298  // For each [MinPow2 + kPow2, 3]; counter is [MinPow2 + kPow2, 3].
299  detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2, 0, Test>::Do(
300  kMinLanes);
301 #elif HWY_HAVE_SCALABLE
302  // For each [MinPow2 + kPow2, 0]; counter is [MinPow2 + kPow2 + 3, 3].
303  detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2 + 3, -3, Test>::Do(
304  kMinLanes);
305 #endif
306 #endif // HWY_TARGET == HWY_SCALAR
307  }
308 };
309 
310 // Calls Test for all supported power of two vectors of at least kMinBits.
311 // Examples: AES or 64x64 require 128 bits, casts may require 64 bits.
312 template <size_t kMinBits, class Test>
314  mutable bool called_ = false;
315 
316  public:
318  if (!called_) {
319  HWY_ABORT("Test is incorrect, ensure operator() is called");
320  }
321  }
322 
323  template <typename T>
324  void operator()(T /*unused*/) const {
325  called_ = true;
326  constexpr size_t kMaxCapped = HWY_LANES(T);
327  constexpr size_t kMinLanes = kMinBits / 8 / sizeof(T);
328  // An upper limit is unnecessary.
329  constexpr size_t max_lanes = kMaxCapped;
330  (void)max_lanes;
331 #if HWY_TARGET == HWY_SCALAR
332  (void)kMinLanes; // not supported
333 #else
334  detail::ForeachCappedR<T, HWY_LANES(T) / kMinLanes, kMinLanes, Test>::Do(
335  kMinLanes, max_lanes);
336 #if HWY_TARGET == HWY_RVV
337  // Can be 0 (handled below) if kMinBits > 64.
338  constexpr size_t kRatio = 128 / kMinBits;
339  constexpr int kMinPow2 =
340  kRatio == 0 ? 0 : -static_cast<int>(CeilLog2(kRatio));
341  // For each [kMinPow2, 3]; counter is [kMinPow2, 3].
342  detail::ForeachShiftR<T, kMinPow2, 0, Test>::Do(kMinLanes);
343 #elif HWY_HAVE_SCALABLE
344  // Can be 0 (handled below) if kMinBits > 128.
345  constexpr size_t kRatio = 128 / kMinBits;
346  constexpr int kMinPow2 =
347  kRatio == 0 ? 0 : -static_cast<int>(CeilLog2(kRatio));
348  // For each [kMinPow2, 0]; counter is [kMinPow2 + 3, 3].
349  detail::ForeachShiftR<T, kMinPow2 + 3, -3, Test>::Do(kMinLanes);
350 #endif
351 #endif // HWY_TARGET == HWY_SCALAR
352  }
353 };
354 
355 template <class Test>
357 
358 // Calls Test for all N that can be promoted (not the same as Extendable because
359 // HWY_SCALAR has one lane). Also used for ZipLower, but not ZipUpper.
360 template <class Test, int kPow2 = 1>
362  mutable bool called_ = false;
363 
364  public:
366  if (!called_) {
367  HWY_ABORT("Test is incorrect, ensure operator() is called");
368  }
369  }
370 
371  template <typename T>
372  void operator()(T /*unused*/) const {
373  called_ = true;
374  constexpr size_t kFactor = size_t{1} << kPow2;
375  static_assert(kFactor >= 2 && kFactor * sizeof(T) <= sizeof(uint64_t), "");
376  constexpr size_t kMaxCapped = HWY_LANES(T);
377  constexpr size_t kMinLanes = kFactor;
378  // Skip CappedTag that are already full vectors.
379  const size_t max_lanes = Lanes(ScalableTag<T>()) >> kPow2;
380  (void)kMaxCapped;
381  (void)kMinLanes;
382  (void)max_lanes;
383 #if HWY_TARGET == HWY_SCALAR
385 #else
386  // TODO(janwas): call Extendable if kMinLanes check not required?
387  detail::ForeachCappedR<T, (kMaxCapped >> kPow2), 1, Test>::Do(kMinLanes,
388  max_lanes);
389 #if HWY_TARGET == HWY_RVV
390  // For each [MinPow2, 3 - kPow2]; counter is [MinPow2 + kPow2, 3].
391  detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2, -kPow2, Test>::Do(
392  kMinLanes);
393 #elif HWY_HAVE_SCALABLE
394  // For each [MinPow2, 0 - kPow2]; counter is [MinPow2 + kPow2 + 3, 3].
395  detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2 + 3, -kPow2 - 3,
396  Test>::Do(kMinLanes);
397 #endif
398 #endif // HWY_SCALAR
399  }
400 };
401 
402 // Calls Test for all N than can be demoted (not the same as Shrinkable because
403 // HWY_SCALAR has one lane).
404 template <class Test, int kPow2 = 1>
406  mutable bool called_ = false;
407 
408  public:
410  if (!called_) {
411  HWY_ABORT("Test is incorrect, ensure operator() is called");
412  }
413  }
414 
415  template <typename T>
416  void operator()(T /*unused*/) const {
417  called_ = true;
418  constexpr size_t kMinLanes = size_t{1} << kPow2;
419  constexpr size_t kMaxCapped = HWY_LANES(T);
420  // For shrinking, an upper limit is unnecessary.
421  constexpr size_t max_lanes = kMaxCapped;
422 
423  (void)kMinLanes;
424  (void)max_lanes;
425  (void)max_lanes;
426 #if HWY_TARGET == HWY_SCALAR
428 #else
429  detail::ForeachCappedR<T, (kMaxCapped >> kPow2), kMinLanes, Test>::Do(
430  kMinLanes, max_lanes);
431 
432 // TODO(janwas): call Extendable if kMinLanes check not required?
433 #if HWY_TARGET == HWY_RVV
434  // For each [MinPow2 + kPow2, 3]; counter is [MinPow2 + kPow2, 3].
435  detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2, 0, Test>::Do(
436  kMinLanes);
437 #elif HWY_HAVE_SCALABLE
438  // For each [MinPow2 + kPow2, 0]; counter is [MinPow2 + kPow2 + 3, 3].
439  detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2 + 3, -3, Test>::Do(
440  kMinLanes);
441 #endif
442 #endif // HWY_TARGET == HWY_SCALAR
443  }
444 };
445 
446 // For LowerHalf/Quarter.
447 template <class Test, int kPow2 = 1>
449  mutable bool called_ = false;
450 
451  public:
453  if (!called_) {
454  HWY_ABORT("Test is incorrect, ensure operator() is called");
455  }
456  }
457 
458  template <typename T>
459  void operator()(T /*unused*/) const {
460  called_ = true;
461 #if HWY_TARGET == HWY_SCALAR
463 #else
464  constexpr size_t kMinLanes = size_t{1} << kPow2;
465  // For shrinking, an upper limit is unnecessary.
466  constexpr size_t kMaxCapped = HWY_LANES(T);
467  detail::ForeachCappedR<T, (kMaxCapped >> kPow2), kMinLanes, Test>::Do(
468  kMinLanes, kMaxCapped);
469 
470 // TODO(janwas): call Extendable if kMinLanes check not required?
471 #if HWY_TARGET == HWY_RVV
472  // For each [MinPow2 + kPow2, 3]; counter is [MinPow2 + kPow2, 3].
473  detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2, 0, Test>::Do(
474  kMinLanes);
475 #elif HWY_HAVE_SCALABLE
476  // For each [MinPow2 + kPow2, 0]; counter is [MinPow2 + kPow2 + 3, 3].
477  detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2 + 3, -3, Test>::Do(
478  kMinLanes);
479 #endif
480 #endif // HWY_TARGET == HWY_SCALAR
481  }
482 };
483 
484 // Calls Test for all power of two N in [1, Lanes(d)]. This is the default
485 // for ops that do not narrow nor widen their input, nor require 128 bits.
486 template <class Test>
488  mutable bool called_ = false;
489 
490  public:
492  if (!called_) {
493  HWY_ABORT("Test is incorrect, ensure operator() is called");
494  }
495  }
496 
497  template <typename T>
498  void operator()(T t) const {
499  called_ = true;
501  }
502 };
503 
504 // Type lists to shorten call sites:
505 
506 template <class Func>
507 void ForSignedTypes(const Func& func) {
508  func(int8_t());
509  func(int16_t());
510  func(int32_t());
511 #if HWY_HAVE_INTEGER64
512  func(int64_t());
513 #endif
514 }
515 
516 template <class Func>
517 void ForUnsignedTypes(const Func& func) {
518  func(uint8_t());
519  func(uint16_t());
520  func(uint32_t());
521 #if HWY_HAVE_INTEGER64
522  func(uint64_t());
523 #endif
524 }
525 
526 template <class Func>
527 void ForIntegerTypes(const Func& func) {
528  ForSignedTypes(func);
529  ForUnsignedTypes(func);
530 }
531 
532 template <class Func>
533 void ForFloatTypes(const Func& func) {
534  func(float());
535 #if HWY_HAVE_FLOAT64
536  func(double());
537 #endif
538 }
539 
540 template <class Func>
541 void ForAllTypes(const Func& func) {
542  ForIntegerTypes(func);
543  ForFloatTypes(func);
544 }
545 
546 template <class Func>
547 void ForUI8(const Func& func) {
548  func(uint8_t());
549  func(int8_t());
550 }
551 
552 template <class Func>
553 void ForUI16(const Func& func) {
554  func(uint16_t());
555  func(int16_t());
556 }
557 
558 template <class Func>
559 void ForUIF16(const Func& func) {
560  ForUI16(func);
561 #if HWY_HAVE_FLOAT16
562  func(float16_t());
563 #endif
564 }
565 
566 template <class Func>
567 void ForUI32(const Func& func) {
568  func(uint32_t());
569  func(int32_t());
570 }
571 
572 template <class Func>
573 void ForUIF32(const Func& func) {
574  ForUI32(func);
575  func(float());
576 }
577 
578 template <class Func>
579 void ForUI64(const Func& func) {
580 #if HWY_HAVE_INTEGER64
581  func(uint64_t());
582  func(int64_t());
583 #endif
584 }
585 
586 template <class Func>
587 void ForUIF64(const Func& func) {
588  ForUI64(func);
589 #if HWY_HAVE_FLOAT64
590  func(double());
591 #endif
592 }
593 
594 template <class Func>
595 void ForUI3264(const Func& func) {
596  ForUI32(func);
597  ForUI64(func);
598 }
599 
600 template <class Func>
601 void ForUIF3264(const Func& func) {
602  ForUIF32(func);
603  ForUIF64(func);
604 }
605 
606 template <class Func>
607 void ForUI163264(const Func& func) {
608  ForUI16(func);
609  ForUI3264(func);
610 }
611 
612 template <class Func>
613 void ForUIF163264(const Func& func) {
614  ForUIF16(func);
615  ForUIF3264(func);
616 }
617 
618 // For tests that involve loops, adjust the trip count so that emulated tests
619 // finish quickly (but always at least 2 iterations to ensure some diversity).
620 constexpr size_t AdjustedReps(size_t max_reps) {
621 #if HWY_ARCH_RVV
622  return HWY_MAX(max_reps / 32, 2);
623 #elif HWY_IS_DEBUG_BUILD
624  return HWY_MAX(max_reps / 8, 2);
625 #elif HWY_ARCH_ARM
626  return HWY_MAX(max_reps / 4, 2);
627 #else
628  return HWY_MAX(max_reps, 2);
629 #endif
630 }
631 
632 // Same as above, but the loop trip count will be 1 << max_pow2.
633 constexpr size_t AdjustedLog2Reps(size_t max_pow2) {
634  // If "negative" (unsigned wraparound), use original.
635 #if HWY_ARCH_RVV
636  return HWY_MIN(max_pow2 - 4, max_pow2);
637 #elif HWY_IS_DEBUG_BUILD
638  return HWY_MIN(max_pow2 - 1, max_pow2);
639 #elif HWY_ARCH_ARM
640  return HWY_MIN(max_pow2 - 1, max_pow2);
641 #else
642  return max_pow2;
643 #endif
644 }
645 
646 // NOLINTNEXTLINE(google-readability-namespace-comments)
647 } // namespace HWY_NAMESPACE
648 } // namespace hwy
650 
651 #endif // per-target include guard
#define HWY_MAX(a, b)
Definition: base.h:128
#define HWY_NOINLINE
Definition: base.h:65
#define HWY_MIN(a, b)
Definition: base.h:127
#define HWY_ABORT(format,...)
Definition: base.h:143
#define HWY_INLINE
Definition: base.h:64
#define HWY_ASSERT(condition)
Definition: base.h:147
Definition: test_util-inl.h:405
~ForDemoteVectors()
Definition: test_util-inl.h:409
void operator()(T) const
Definition: test_util-inl.h:416
bool called_
Definition: test_util-inl.h:406
Definition: test_util-inl.h:234
void operator()(T) const
Definition: test_util-inl.h:245
bool called_
Definition: test_util-inl.h:235
~ForExtendableVectors()
Definition: test_util-inl.h:238
Definition: test_util-inl.h:313
bool called_
Definition: test_util-inl.h:314
~ForGEVectors()
Definition: test_util-inl.h:317
void operator()(T) const
Definition: test_util-inl.h:324
Definition: test_util-inl.h:448
~ForHalfVectors()
Definition: test_util-inl.h:452
bool called_
Definition: test_util-inl.h:449
void operator()(T) const
Definition: test_util-inl.h:459
Definition: test_util-inl.h:487
bool called_
Definition: test_util-inl.h:488
void operator()(T t) const
Definition: test_util-inl.h:498
~ForPartialVectors()
Definition: test_util-inl.h:491
Definition: test_util-inl.h:361
~ForPromoteVectors()
Definition: test_util-inl.h:365
bool called_
Definition: test_util-inl.h:362
void operator()(T) const
Definition: test_util-inl.h:372
Definition: test_util-inl.h:271
void operator()(T) const
Definition: test_util-inl.h:282
bool called_
Definition: test_util-inl.h:272
~ForShrinkableVectors()
Definition: test_util-inl.h:275
#define HWY_TARGET
Definition: detect_targets.h:328
d
Definition: rvv-inl.h:1656
V VecArg
Definition: ops/shared-inl.h:301
HWY_API Mask128< TTo, N > RebindMask(Simd< TTo, N, 0 > dto, Mask128< TFrom, N > m)
Definition: arm_neon-inl.h:1688
constexpr size_t AdjustedReps(size_t max_reps)
Definition: test_util-inl.h:620
HWY_API auto Lt(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:5252
void ForUIF32(const Func &func)
Definition: test_util-inl.h:573
void ForUI163264(const Func &func)
Definition: test_util-inl.h:607
HWY_API Mask128< T, N > FirstN(const Simd< T, N, 0 > d, size_t num)
Definition: arm_neon-inl.h:1896
HWY_API size_t StoreMaskBits(Simd< T, N, 0 >, const Mask128< T, N > mask, uint8_t *bits)
Definition: arm_neon-inl.h:4761
HWY_API bool AllTrue(const Simd< T, N, 0 > d, const Mask128< T, N > m)
Definition: arm_neon-inl.h:4790
void ForUIF3264(const Func &func)
Definition: test_util-inl.h:601
void ForUIF163264(const Func &func)
Definition: test_util-inl.h:613
constexpr size_t AdjustedLog2Reps(size_t max_pow2)
Definition: test_util-inl.h:633
typename detail::CappedTagChecker< T, kLimit >::type CappedTag
Definition: ops/shared-inl.h:173
HWY_API Vec128< T, N > Load(Simd< T, N, 0 > d, const T *HWY_RESTRICT p)
Definition: arm_neon-inl.h:2205
HWY_API Vec128< T, N > Zero(Simd< T, N, 0 > d)
Definition: arm_neon-inl.h:733
HWY_API size_t Lanes(Simd< T, N, kPow2 > d)
Definition: arm_sve-inl.h:218
void ForUI32(const Func &func)
Definition: test_util-inl.h:567
void ForAllTypes(const Func &func)
Definition: test_util-inl.h:541
Rebind< MakeSigned< TFromD< D > >, D > RebindToSigned
Definition: ops/shared-inl.h:199
void ForFloatTypes(const Func &func)
Definition: test_util-inl.h:533
void Print(const D d, const char *caption, VecArg< V > v, size_t lane_u=0, size_t max_lanes=7)
Definition: print-inl.h:59
HWY_API size_t CountTrue(Full128< T >, const Mask128< T > mask)
Definition: arm_neon-inl.h:4742
HWY_API Vec128< T, N > VecFromMask(Simd< T, N, 0 > d, const Mask128< T, N > v)
Definition: arm_neon-inl.h:1681
HWY_INLINE void AssertVecEqual(D d, const T *expected, VecArg< V > actual, const char *filename, const int line)
Definition: test_util-inl.h:43
void ForIntegerTypes(const Func &func)
Definition: test_util-inl.h:527
HWY_API bool AllFalse(const Full128< T > d, const Mask128< T > m)
Definition: arm_neon-inl.h:4771
void ForUI8(const Func &func)
Definition: test_util-inl.h:547
void ForUI3264(const Func &func)
Definition: test_util-inl.h:595
typename detail::ScalableTagChecker< T, kPow2 >::type ScalableTag
Definition: ops/shared-inl.h:162
void ForUIF64(const Func &func)
Definition: test_util-inl.h:587
void ForUI16(const Func &func)
Definition: test_util-inl.h:553
void ForUI64(const Func &func)
Definition: test_util-inl.h:579
void ForSignedTypes(const Func &func)
Definition: test_util-inl.h:507
HWY_NOINLINE void AssertMaskEqual(D d, VecArg< Mask< D >> a, VecArg< Mask< D >> b, const char *filename, int line)
Definition: test_util-inl.h:68
void ForUIF16(const Func &func)
Definition: test_util-inl.h:559
typename D::template Repartition< T > Repartition
Definition: ops/shared-inl.h:207
decltype(MaskFromVec(Zero(D()))) Mask
Definition: generic_ops-inl.h:38
N
Definition: rvv-inl.h:1656
HWY_API void Store(Vec128< T, N > v, Simd< T, N, 0 > d, T *HWY_RESTRICT aligned)
Definition: arm_neon-inl.h:2397
HWY_INLINE Mask< D > MaskFalse(const D d)
Definition: test_util-inl.h:127
HWY_INLINE Mask< D > MaskTrue(const D d)
Definition: test_util-inl.h:122
void ForUnsignedTypes(const Func &func)
Definition: test_util-inl.h:517
decltype(Zero(D())) Vec
Definition: generic_ops-inl.h:32
HWY_TEST_DLLEXPORT void AssertArrayEqual(const TypeInfo &info, const void *expected_void, const void *actual_void, size_t N, const char *target_name, const char *filename, int line)
Definition: aligned_allocator.h:27
FuncOutput(*)(const void *, FuncInput) Func
Definition: nanobenchmark.h:105
static HWY_MAYBE_UNUSED const char * TargetName(uint32_t target)
Definition: targets.h:80
constexpr size_t CeilLog2(TI x)
Definition: base.h:700
HWY_INLINE void AssertEqual(const T expected, const T actual, const char *target_name, const char *filename, int line, size_t lane=0)
Definition: test_util.h:151
HWY_DLLEXPORT HWY_NORETURN void int line
Definition: base.h:763
#define HWY_LANES(T)
Definition: set_macros-inl.h:83
#define HWY_NAMESPACE
Definition: set_macros-inl.h:80
static void Do(size_t, size_t)
Definition: test_util-inl.h:177
Definition: test_util-inl.h:159
static void Do(size_t min_lanes, size_t max_lanes)
Definition: test_util-inl.h:160
Definition: base.h:248
HWY_AFTER_NAMESPACE()
HWY_BEFORE_NAMESPACE()