Grok  9.7.5
targets.h
Go to the documentation of this file.
1 // Copyright 2020 Google LLC
2 // SPDX-License-Identifier: Apache-2.0
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 
16 #ifndef HIGHWAY_HWY_TARGETS_H_
17 #define HIGHWAY_HWY_TARGETS_H_
18 
19 #include <vector>
20 
21 // For SIMD module implementations and their callers. Defines which targets to
22 // generate and call.
23 
24 #include "hwy/base.h"
25 #include "hwy/detect_targets.h"
26 #include "hwy/highway_export.h"
27 
28 #if !HWY_ARCH_RVV
29 #include <atomic>
30 #endif
31 
32 namespace hwy {
33 
34 // Returns (cached) bitfield of enabled targets that are supported on this CPU.
35 // Implemented in targets.cc; unconditionally compiled to support the use case
36 // of binary-only distributions. The HWY_SUPPORTED_TARGETS wrapper may allow
37 // eliding calls to this function.
39 
40 // Evaluates to a function call, or literal if there is a single target.
41 #if (HWY_TARGETS & (HWY_TARGETS - 1)) == 0
42 #define HWY_SUPPORTED_TARGETS HWY_TARGETS
43 #else
44 #define HWY_SUPPORTED_TARGETS hwy::SupportedTargets()
45 #endif
46 
47 // Disable from runtime dispatch the mask of compiled in targets. Targets that
48 // were not enabled at compile time are ignored. This function is useful to
49 // disable a target supported by the CPU that is known to have bugs or when a
50 // lower target is desired. For this reason, attempts to disable targets which
51 // are in HWY_ENABLED_BASELINE have no effect so SupportedTargets() always
52 // returns at least the baseline target.
53 HWY_DLLEXPORT void DisableTargets(uint32_t disabled_targets);
54 
55 // Set the mock mask of CPU supported targets instead of the actual CPU
56 // supported targets computed in SupportedTargets(). The return value of
57 // SupportedTargets() will still be affected by the DisableTargets() mask
58 // regardless of this mock, to prevent accidentally adding targets that are
59 // known to be buggy in the current CPU. Call with a mask of 0 to disable the
60 // mock and use the actual CPU supported targets instead.
62 
63 // Returns whether the SupportedTargets() function was called since the last
64 // SetSupportedTargetsForTest() call.
66 
67 // Return the list of targets in HWY_TARGETS supported by the CPU as a list of
68 // individual HWY_* target macros such as HWY_SCALAR or HWY_NEON. This list
69 // is affected by the current SetSupportedTargetsForTest() mock if any.
70 HWY_INLINE std::vector<uint32_t> SupportedAndGeneratedTargets() {
71  std::vector<uint32_t> ret;
72  for (uint32_t targets = SupportedTargets() & HWY_TARGETS; targets != 0;
73  targets = targets & (targets - 1)) {
74  uint32_t current_target = targets & ~(targets - 1);
75  ret.push_back(current_target);
76  }
77  return ret;
78 }
79 
80 static inline HWY_MAYBE_UNUSED const char* TargetName(uint32_t target) {
81  switch (target) {
82 #if HWY_ARCH_X86
83  case HWY_SSSE3:
84  return "SSSE3";
85  case HWY_SSE4:
86  return "SSE4";
87  case HWY_AVX2:
88  return "AVX2";
89  case HWY_AVX3:
90  return "AVX3";
91  case HWY_AVX3_DL:
92  return "AVX3_DL";
93 #endif
94 
95 #if HWY_ARCH_ARM
96  case HWY_SVE2:
97  return "SVE2";
98  case HWY_SVE:
99  return "SVE";
100  case HWY_NEON:
101  return "Neon";
102 #endif
103 
104 #if HWY_ARCH_PPC
105  case HWY_PPC8:
106  return "Power8";
107 #endif
108 
109 #if HWY_ARCH_WASM
110  case HWY_WASM:
111  return "Wasm";
112 #endif
113 
114 #if HWY_ARCH_RVV
115  case HWY_RVV:
116  return "RVV";
117 #endif
118 
119  case HWY_SCALAR:
120  return "Scalar";
121 
122  default:
123  return "Unknown"; // must satisfy gtest IsValidParamName()
124  }
125 }
126 
127 // The maximum number of dynamic targets on any architecture is defined by
128 // HWY_MAX_DYNAMIC_TARGETS and depends on the arch.
129 
130 // For the ChosenTarget mask and index we use a different bit arrangement than
131 // in the HWY_TARGETS mask. Only the targets involved in the current
132 // architecture are used in this mask, and therefore only the least significant
133 // (HWY_MAX_DYNAMIC_TARGETS + 2) bits of the uint32_t mask are used. The least
134 // significant bit is set when the mask is not initialized, the next
135 // HWY_MAX_DYNAMIC_TARGETS more significant bits are a range of bits from the
136 // HWY_TARGETS or SupportedTargets() mask for the given architecture shifted to
137 // that position and the next more significant bit is used for the scalar
138 // target. Because of this we need to define equivalent values for HWY_TARGETS
139 // in this representation.
140 // This mask representation allows to use ctz() on this mask and obtain a small
141 // number that's used as an index of the table for dynamic dispatch. In this
142 // way the first entry is used when the mask is uninitialized, the following
143 // HWY_MAX_DYNAMIC_TARGETS are for dynamic dispatch and the last one is for
144 // scalar.
145 
146 // The HWY_SCALAR bit in the ChosenTarget mask format.
147 #define HWY_CHOSEN_TARGET_MASK_SCALAR (1u << (HWY_MAX_DYNAMIC_TARGETS + 1))
148 
149 // Converts from a HWY_TARGETS mask to a ChosenTarget mask format for the
150 // current architecture.
151 #define HWY_CHOSEN_TARGET_SHIFT(X) \
152  ((((X) >> (HWY_HIGHEST_TARGET_BIT + 1 - HWY_MAX_DYNAMIC_TARGETS)) & \
153  ((1u << HWY_MAX_DYNAMIC_TARGETS) - 1)) \
154  << 1)
155 
156 // The HWY_TARGETS mask in the ChosenTarget mask format.
157 #define HWY_CHOSEN_TARGET_MASK_TARGETS \
158  (HWY_CHOSEN_TARGET_SHIFT(HWY_TARGETS) | HWY_CHOSEN_TARGET_MASK_SCALAR | 1u)
159 
160 #if HWY_ARCH_X86
161 // Maximum number of dynamic targets, changing this value is an ABI incompatible
162 // change
163 #define HWY_MAX_DYNAMIC_TARGETS 10
164 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_X86
165 // These must match the order in which the HWY_TARGETS are defined
166 // starting by the least significant (HWY_HIGHEST_TARGET_BIT + 1 -
167 // HWY_MAX_DYNAMIC_TARGETS) bit. This list must contain exactly
168 // HWY_MAX_DYNAMIC_TARGETS elements and does not include SCALAR. The first entry
169 // corresponds to the best target. Don't include a "," at the end of the list.
170 #define HWY_CHOOSE_TARGET_LIST(func_name) \
171  nullptr, /* reserved */ \
172  nullptr, /* reserved */ \
173  HWY_CHOOSE_AVX3_DL(func_name), /* AVX3_DL */ \
174  HWY_CHOOSE_AVX3(func_name), /* AVX3 */ \
175  HWY_CHOOSE_AVX2(func_name), /* AVX2 */ \
176  nullptr, /* AVX */ \
177  HWY_CHOOSE_SSE4(func_name), /* SSE4 */ \
178  HWY_CHOOSE_SSSE3(func_name), /* SSSE3 */ \
179  nullptr, /* SSE3 */ \
180  nullptr /* SSE2 */
181 
182 #elif HWY_ARCH_ARM
183 // See HWY_ARCH_X86 above for details.
184 #define HWY_MAX_DYNAMIC_TARGETS 4
185 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_ARM
186 #define HWY_CHOOSE_TARGET_LIST(func_name) \
187  HWY_CHOOSE_SVE2(func_name), /* SVE2 */ \
188  HWY_CHOOSE_SVE(func_name), /* SVE */ \
189  nullptr, /* reserved */ \
190  HWY_CHOOSE_NEON(func_name) /* NEON */
191 
192 #elif HWY_ARCH_PPC
193 // See HWY_ARCH_X86 above for details.
194 #define HWY_MAX_DYNAMIC_TARGETS 5
195 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_PPC
196 #define HWY_CHOOSE_TARGET_LIST(func_name) \
197  nullptr, /* reserved */ \
198  nullptr, /* reserved */ \
199  HWY_CHOOSE_PPC8(func_name), /* PPC8 */ \
200  nullptr, /* VSX */ \
201  nullptr /* AltiVec */
202 
203 #elif HWY_ARCH_WASM
204 // See HWY_ARCH_X86 above for details.
205 #define HWY_MAX_DYNAMIC_TARGETS 4
206 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_WASM
207 #define HWY_CHOOSE_TARGET_LIST(func_name) \
208  nullptr, /* reserved */ \
209  nullptr, /* reserved */ \
210  HWY_CHOOSE_WASM2(func_name), /* WASM2 */ \
211  HWY_CHOOSE_WASM(func_name) /* WASM */
212 
213 #elif HWY_ARCH_RVV
214 // See HWY_ARCH_X86 above for details.
215 #define HWY_MAX_DYNAMIC_TARGETS 4
216 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_RVV
217 #define HWY_CHOOSE_TARGET_LIST(func_name) \
218  nullptr, /* reserved */ \
219  nullptr, /* reserved */ \
220  nullptr, /* reserved */ \
221  HWY_CHOOSE_RVV(func_name) /* RVV */
222 
223 #else
224 // Unknown architecture, will use HWY_SCALAR without dynamic dispatch, though
225 // still creating single-entry tables in HWY_EXPORT to ensure portability.
226 #define HWY_MAX_DYNAMIC_TARGETS 1
227 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_SCALAR
228 #endif
229 
230 struct ChosenTarget {
231  public:
232  // Update the ChosenTarget mask based on the current CPU supported
233  // targets.
235 
236  // Reset the ChosenTarget to the uninitialized state.
237  void DeInit() { StoreMask(1); }
238 
239  // Whether the ChosenTarget was initialized. This is useful to know whether
240  // any HWY_DYNAMIC_DISPATCH function was called.
241  bool IsInitialized() const { return LoadMask() != 1; }
242 
243  // Return the index in the dynamic dispatch table to be used by the current
244  // CPU. Note that this method must be in the header file so it uses the value
245  // of HWY_CHOSEN_TARGET_MASK_TARGETS defined in the translation unit that
246  // calls it, which may be different from others. This allows to only consider
247  // those targets that were actually compiled in this module.
248  size_t HWY_INLINE GetIndex() const {
251  }
252 
253  private:
254  // TODO(janwas): remove #if once <atomic> is available
255 #if HWY_ARCH_RVV
256  uint32_t LoadMask() const { return mask_; }
257  void StoreMask(uint32_t mask) { mask_ = mask; }
258 
259  uint32_t mask_{1}; // Initialized to 1 so GetIndex() returns 0.
260 #else
261  uint32_t LoadMask() const { return mask_.load(); }
262  void StoreMask(uint32_t mask) { mask_.store(mask); }
263 
264  std::atomic<uint32_t> mask_{1}; // Initialized to 1 so GetIndex() returns 0.
265 #endif // HWY_ARCH_RVV
266 };
267 
268 // For internal use (e.g. by FunctionCache and DisableTargets).
270 
271 } // namespace hwy
272 
273 #endif // HIGHWAY_HWY_TARGETS_H_
#define HWY_INLINE
Definition: base.h:64
#define HWY_MAYBE_UNUSED
Definition: base.h:75
#define HWY_AVX3_DL
Definition: detect_targets.h:58
#define HWY_NEON
Definition: detect_targets.h:75
#define HWY_PPC8
Definition: detect_targets.h:80
#define HWY_SVE2
Definition: detect_targets.h:72
#define HWY_AVX3
Definition: detect_targets.h:59
#define HWY_AVX2
Definition: detect_targets.h:60
#define HWY_SCALAR
Definition: detect_targets.h:98
#define HWY_WASM
Definition: detect_targets.h:86
#define HWY_SVE
Definition: detect_targets.h:73
#define HWY_RVV
Definition: detect_targets.h:92
#define HWY_TARGETS
Definition: detect_targets.h:375
#define HWY_SSE4
Definition: detect_targets.h:62
#define HWY_SSSE3
Definition: detect_targets.h:63
#define HWY_DLLEXPORT
Definition: highway_export.h:18
Definition: aligned_allocator.h:27
HWY_DLLEXPORT void SetSupportedTargetsForTest(uint32_t targets)
HWY_DLLEXPORT ChosenTarget & GetChosenTarget()
HWY_API size_t Num0BitsBelowLS1Bit_Nonzero32(const uint32_t x)
Definition: base.h:598
static HWY_MAYBE_UNUSED const char * TargetName(uint32_t target)
Definition: targets.h:80
HWY_DLLEXPORT uint32_t SupportedTargets()
HWY_INLINE std::vector< uint32_t > SupportedAndGeneratedTargets()
Definition: targets.h:70
HWY_DLLEXPORT void DisableTargets(uint32_t disabled_targets)
HWY_DLLEXPORT bool SupportedTargetsCalledForTest()
Definition: targets.h:230
uint32_t LoadMask() const
Definition: targets.h:261
void StoreMask(uint32_t mask)
Definition: targets.h:262
bool IsInitialized() const
Definition: targets.h:241
HWY_DLLEXPORT void Update()
size_t HWY_INLINE GetIndex() const
Definition: targets.h:248
void DeInit()
Definition: targets.h:237
std::atomic< uint32_t > mask_
Definition: targets.h:264
#define HWY_CHOSEN_TARGET_MASK_TARGETS
Definition: targets.h:157