Grok  9.7.5
result-inl.h
Go to the documentation of this file.
1 // Copyright 2021 Google LLC
2 // SPDX-License-Identifier: Apache-2.0
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 
17 
18 // Normal include guard for non-SIMD parts
19 #ifndef HIGHWAY_HWY_CONTRIB_SORT_RESULT_INL_H_
20 #define HIGHWAY_HWY_CONTRIB_SORT_RESULT_INL_H_
21 
22 #include <time.h>
23 
24 #include <algorithm> // std::sort
25 #include <string>
26 
27 #include "hwy/base.h"
28 #include "hwy/nanobenchmark.h"
29 
30 namespace hwy {
31 
32 struct Timestamp {
34  double t;
35 };
36 
37 double SecondsSince(const Timestamp& t0) {
38  const Timestamp t1;
39  return t1.t - t0.t;
40 }
41 
42 constexpr size_t kReps = 30;
43 
44 // Returns trimmed mean (we don't want to run an out-of-L3-cache sort often
45 // enough for the mode to be reliable).
46 double SummarizeMeasurements(std::vector<double>& seconds) {
47  std::sort(seconds.begin(), seconds.end());
48  double sum = 0;
49  int count = 0;
50  for (size_t i = kReps / 4; i < seconds.size() - kReps / 2; ++i) {
51  sum += seconds[i];
52  count += 1;
53  }
54  return sum / count;
55 }
56 
57 } // namespace hwy
58 #endif // HIGHWAY_HWY_CONTRIB_SORT_RESULT_INL_H_
59 
60 // Per-target
61 #if defined(HIGHWAY_HWY_CONTRIB_SORT_RESULT_TOGGLE) == \
62  defined(HWY_TARGET_TOGGLE)
63 #ifdef HIGHWAY_HWY_CONTRIB_SORT_RESULT_TOGGLE
64 #undef HIGHWAY_HWY_CONTRIB_SORT_RESULT_TOGGLE
65 #else
66 #define HIGHWAY_HWY_CONTRIB_SORT_RESULT_TOGGLE
67 #endif
68 
70 namespace hwy {
71 namespace HWY_NAMESPACE {
72 
73 struct Result {
74  Result() {}
75  Result(const uint32_t target, const Algo algo, Dist dist, bool is128,
76  size_t num, size_t num_threads, double sec, size_t sizeof_t,
77  const char* type_name)
78  : target(target),
79  algo(algo),
80  dist(dist),
81  is128(is128),
82  num(num),
84  sec(sec),
87 
88  void Print() const {
89  const double bytes = static_cast<double>(num) *
90  static_cast<double>(num_threads) *
91  static_cast<double>(sizeof_t);
92  printf("%10s: %12s: %7s: %9s: %.2E %4.0f MB/s (%2zu threads)\n",
94  is128 ? "u128" : type_name.c_str(), DistName(dist),
95  static_cast<double>(num), bytes * 1E-6 / sec, num_threads);
96  }
97 
98  uint32_t target;
101  bool is128;
102  size_t num = 0;
103  size_t num_threads = 0;
104  double sec = 0.0;
105  size_t sizeof_t = 0;
106  std::string type_name;
107 };
108 
109 template <typename T, class Traits>
110 Result MakeResult(const Algo algo, Dist dist, Traits st, size_t num,
111  size_t num_threads, double sec) {
112  char string100[100];
113  hwy::detail::TypeName(hwy::detail::MakeTypeInfo<T>(), 1, string100);
114  return Result(HWY_TARGET, algo, dist, st.Is128(), num, num_threads, sec,
115  sizeof(T), string100);
116 }
117 
118 template <class Traits, typename T>
119 bool VerifySort(Traits st, const InputStats<T>& input_stats, const T* out,
120  size_t num, const char* caller) {
121  constexpr size_t N1 = st.Is128() ? 2 : 1;
122  HWY_ASSERT(num >= N1);
123 
124  InputStats<T> output_stats;
125  // Ensure it matches the sort order
126  for (size_t i = 0; i < num - N1; i += N1) {
127  output_stats.Notify(out[i]);
128  if (N1 == 2) output_stats.Notify(out[i + 1]);
129  // Reverse order instead of checking !Compare1 so we accept equal keys.
130  if (st.Compare1(out + i + N1, out + i)) {
131  printf("%s: i=%d of %d: N1=%d %5.0f %5.0f vs. %5.0f %5.0f\n\n", caller,
132  static_cast<int>(i), static_cast<int>(num), static_cast<int>(N1),
133  double(out[i + 1]), double(out[i + 0]), double(out[i + N1 + 1]),
134  double(out[i + N1]));
135  HWY_ABORT("%d-bit sort is incorrect\n",
136  static_cast<int>(sizeof(T) * 8 * N1));
137  }
138  }
139  output_stats.Notify(out[num - N1]);
140  if (N1 == 2) output_stats.Notify(out[num - N1 + 1]);
141 
142  return input_stats == output_stats;
143 }
144 
145 // NOLINTNEXTLINE(google-readability-namespace-comments)
146 } // namespace HWY_NAMESPACE
147 } // namespace hwy
149 
150 #endif // HIGHWAY_HWY_CONTRIB_SORT_RESULT_TOGGLE
#define HWY_ABORT(format,...)
Definition: base.h:143
#define HWY_ASSERT(condition)
Definition: base.h:147
Definition: algo-inl.h:74
void Notify(T value)
Definition: algo-inl.h:76
#define HWY_TARGET
Definition: detect_targets.h:328
bool VerifySort(Traits st, const InputStats< T > &input_stats, const T *out, size_t num, const char *caller)
Definition: result-inl.h:119
Result MakeResult(const Algo algo, Dist dist, Traits st, size_t num, size_t num_threads, double sec)
Definition: result-inl.h:110
HWY_DLLEXPORT void TypeName(const TypeInfo &info, size_t N, char *string100)
HWY_DLLEXPORT double Now()
Definition: aligned_allocator.h:27
const char * AlgoName(Algo algo)
Definition: algo-inl.h:137
double SecondsSince(const Timestamp &t0)
Definition: result-inl.h:37
Dist
Definition: algo-inl.h:55
const char * DistName(Dist dist)
Definition: algo-inl.h:61
static HWY_MAYBE_UNUSED const char * TargetName(uint32_t target)
Definition: targets.h:80
constexpr size_t kReps
Definition: result-inl.h:42
Algo
Definition: algo-inl.h:116
double SummarizeMeasurements(std::vector< double > &seconds)
Definition: result-inl.h:46
HWY_AFTER_NAMESPACE()
HWY_BEFORE_NAMESPACE()
#define HWY_NAMESPACE
Definition: set_macros-inl.h:80
Definition: result-inl.h:73
double sec
Definition: result-inl.h:104
uint32_t target
Definition: result-inl.h:98
size_t sizeof_t
Definition: result-inl.h:105
Algo algo
Definition: result-inl.h:99
bool is128
Definition: result-inl.h:101
Dist dist
Definition: result-inl.h:100
std::string type_name
Definition: result-inl.h:106
size_t num
Definition: result-inl.h:102
void Print() const
Definition: result-inl.h:88
size_t num_threads
Definition: result-inl.h:103
Result()
Definition: result-inl.h:74
Result(const uint32_t target, const Algo algo, Dist dist, bool is128, size_t num, size_t num_threads, double sec, size_t sizeof_t, const char *type_name)
Definition: result-inl.h:75
Definition: result-inl.h:32
double t
Definition: result-inl.h:34
Timestamp()
Definition: result-inl.h:33