Grok
9.7.5
src
lib
jp2
highway
hwy
ops
set_macros-inl.h
Go to the documentation of this file.
1
// Copyright 2020 Google LLC
2
// SPDX-License-Identifier: Apache-2.0
3
//
4
// Licensed under the Apache License, Version 2.0 (the "License");
5
// you may not use this file except in compliance with the License.
6
// You may obtain a copy of the License at
7
//
8
// http://www.apache.org/licenses/LICENSE-2.0
9
//
10
// Unless required by applicable law or agreed to in writing, software
11
// distributed under the License is distributed on an "AS IS" BASIS,
12
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
// See the License for the specific language governing permissions and
14
// limitations under the License.
15
16
// Sets macros based on HWY_TARGET.
17
18
// This include guard is toggled by foreach_target, so avoid the usual _H_
19
// suffix to prevent copybara from renaming it.
20
#if defined(HWY_SET_MACROS_PER_TARGET) == defined(HWY_TARGET_TOGGLE)
21
#ifdef HWY_SET_MACROS_PER_TARGET
22
#undef HWY_SET_MACROS_PER_TARGET
23
#else
24
#define HWY_SET_MACROS_PER_TARGET
25
#endif
26
27
#endif
// HWY_SET_MACROS_PER_TARGET
28
29
#include "
hwy/detect_targets.h
"
30
31
#undef HWY_NAMESPACE
32
#undef HWY_ALIGN
33
#undef HWY_MAX_BYTES
34
#undef HWY_LANES
35
36
#undef HWY_HAVE_SCALABLE
37
#undef HWY_HAVE_INTEGER64
38
#undef HWY_HAVE_FLOAT16
39
#undef HWY_HAVE_FLOAT64
40
#undef HWY_MEM_OPS_MIGHT_FAULT
41
#undef HWY_CAP_GE256
42
#undef HWY_CAP_GE512
43
44
#undef HWY_TARGET_STR
45
46
#if defined(HWY_DISABLE_PCLMUL_AES)
47
#define HWY_TARGET_STR_PCLMUL_AES ""
48
#else
49
#define HWY_TARGET_STR_PCLMUL_AES ",pclmul,aes"
50
#endif
51
52
#if defined(HWY_DISABLE_BMI2_FMA)
53
#define HWY_TARGET_STR_BMI2_FMA ""
54
#else
55
#define HWY_TARGET_STR_BMI2_FMA ",bmi,bmi2,fma"
56
#endif
57
58
#if defined(HWY_DISABLE_F16C)
59
#define HWY_TARGET_STR_F16C ""
60
#else
61
#define HWY_TARGET_STR_F16C ",f16c"
62
#endif
63
64
#define HWY_TARGET_STR_SSSE3 "sse2,ssse3"
65
66
#define HWY_TARGET_STR_SSE4 \
67
HWY_TARGET_STR_SSSE3 ",sse4.1,sse4.2"
HWY_TARGET_STR_PCLMUL_AES
68
// Include previous targets, which are the half-vectors of the next target.
69
#define HWY_TARGET_STR_AVX2 \
70
HWY_TARGET_STR_SSE4 ",avx,avx2"
HWY_TARGET_STR_BMI2_FMA HWY_TARGET_STR_F16C
71
#define HWY_TARGET_STR_AVX3 \
72
HWY_TARGET_STR_AVX2 ",avx512f,avx512vl,avx512dq,avx512bw"
73
74
// Before include guard so we redefine HWY_TARGET_STR on each include,
75
// governed by the current HWY_TARGET.
76
//-----------------------------------------------------------------------------
77
// SSSE3
78
#if HWY_TARGET == HWY_SSSE3
79
80
#define HWY_NAMESPACE N_SSSE3
81
#define HWY_ALIGN alignas(16)
82
#define HWY_MAX_BYTES 16
83
#define HWY_LANES(T) (16 / sizeof(T))
84
85
#define HWY_HAVE_SCALABLE 0
86
#define HWY_HAVE_INTEGER64 1
87
#define HWY_HAVE_FLOAT16 1
88
#define HWY_HAVE_FLOAT64 1
89
#define HWY_MEM_OPS_MIGHT_FAULT 1
90
#define HWY_CAP_AES 0
91
#define HWY_CAP_GE256 0
92
#define HWY_CAP_GE512 0
93
94
#define HWY_TARGET_STR HWY_TARGET_STR_SSSE3
95
//-----------------------------------------------------------------------------
96
// SSE4
97
#elif HWY_TARGET == HWY_SSE4
98
99
#define HWY_NAMESPACE N_SSE4
100
#define HWY_ALIGN alignas(16)
101
#define HWY_MAX_BYTES 16
102
#define HWY_LANES(T) (16 / sizeof(T))
103
104
#define HWY_HAVE_SCALABLE 0
105
#define HWY_HAVE_INTEGER64 1
106
#define HWY_HAVE_FLOAT16 1
107
#define HWY_HAVE_FLOAT64 1
108
#define HWY_MEM_OPS_MIGHT_FAULT 1
109
#define HWY_CAP_GE256 0
110
#define HWY_CAP_GE512 0
111
112
#define HWY_TARGET_STR HWY_TARGET_STR_SSE4
113
114
//-----------------------------------------------------------------------------
115
// AVX2
116
#elif HWY_TARGET == HWY_AVX2
117
118
#define HWY_NAMESPACE N_AVX2
119
#define HWY_ALIGN alignas(32)
120
#define HWY_MAX_BYTES 32
121
#define HWY_LANES(T) (32 / sizeof(T))
122
123
#define HWY_HAVE_SCALABLE 0
124
#define HWY_HAVE_INTEGER64 1
125
#define HWY_HAVE_FLOAT16 1
126
#define HWY_HAVE_FLOAT64 1
127
#define HWY_MEM_OPS_MIGHT_FAULT 1
128
#define HWY_CAP_GE256 1
129
#define HWY_CAP_GE512 0
130
131
#define HWY_TARGET_STR HWY_TARGET_STR_AVX2
132
133
//-----------------------------------------------------------------------------
134
// AVX3[_DL]
135
#elif HWY_TARGET == HWY_AVX3 || HWY_TARGET == HWY_AVX3_DL
136
137
#define HWY_ALIGN alignas(64)
138
#define HWY_MAX_BYTES 64
139
#define HWY_LANES(T) (64 / sizeof(T))
140
141
#define HWY_HAVE_SCALABLE 0
142
#define HWY_HAVE_INTEGER64 1
143
#define HWY_HAVE_FLOAT16 1
144
#define HWY_HAVE_FLOAT64 1
145
#define HWY_MEM_OPS_MIGHT_FAULT 0
146
#define HWY_CAP_GE256 1
147
#define HWY_CAP_GE512 1
148
149
#if HWY_TARGET == HWY_AVX3
150
151
#define HWY_NAMESPACE N_AVX3
152
#define HWY_TARGET_STR HWY_TARGET_STR_AVX3
153
154
#elif HWY_TARGET == HWY_AVX3_DL
155
156
#define HWY_NAMESPACE N_AVX3_DL
157
#define HWY_TARGET_STR \
158
HWY_TARGET_STR_AVX3 \
159
",vpclmulqdq,avx512vbmi2,vaes,avxvnni,avx512bitalg,avx512vpopcntdq"
160
161
#else
162
#error "Logic error"
163
#endif
// HWY_TARGET == HWY_AVX3_DL
164
165
//-----------------------------------------------------------------------------
166
// PPC8
167
#elif HWY_TARGET == HWY_PPC8
168
169
#define HWY_ALIGN alignas(16)
170
#define HWY_MAX_BYTES 16
171
#define HWY_LANES(T) (16 / sizeof(T))
172
173
#define HWY_HAVE_SCALABLE 0
174
#define HWY_HAVE_INTEGER64 1
175
#define HWY_HAVE_FLOAT16 0
176
#define HWY_HAVE_FLOAT64 1
177
#define HWY_MEM_OPS_MIGHT_FAULT 1
178
#define HWY_CAP_GE256 0
179
#define HWY_CAP_GE512 0
180
181
#define HWY_NAMESPACE N_PPC8
182
183
#define HWY_TARGET_STR "altivec,vsx"
184
185
//-----------------------------------------------------------------------------
186
// NEON
187
#elif HWY_TARGET == HWY_NEON
188
189
#define HWY_ALIGN alignas(16)
190
#define HWY_MAX_BYTES 16
191
#define HWY_LANES(T) (16 / sizeof(T))
192
193
#define HWY_HAVE_SCALABLE 0
194
#define HWY_HAVE_INTEGER64 1
195
#define HWY_HAVE_FLOAT16 1
196
#define HWY_CAP_GE256 0
197
#define HWY_CAP_GE512 0
198
199
#if HWY_ARCH_ARM_A64
200
#define HWY_HAVE_FLOAT64 1
201
#else
202
#define HWY_HAVE_FLOAT64 0
203
#endif
204
#define HWY_MEM_OPS_MIGHT_FAULT 1
205
206
#define HWY_NAMESPACE N_NEON
207
208
// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
209
210
//-----------------------------------------------------------------------------
211
// SVE[2]
212
#elif HWY_TARGET == HWY_SVE2 || HWY_TARGET == HWY_SVE
213
214
// SVE only requires lane alignment, not natural alignment of the entire vector.
215
#define HWY_ALIGN alignas(8)
216
217
#define HWY_MAX_BYTES 256
218
219
// Value ensures MaxLanes() is the tightest possible upper bound to reduce
220
// overallocation.
221
#define HWY_LANES(T) ((HWY_MAX_BYTES) / sizeof(T))
222
223
#define HWY_HAVE_SCALABLE 1
224
#define HWY_HAVE_INTEGER64 1
225
#define HWY_HAVE_FLOAT16 1
226
#define HWY_HAVE_FLOAT64 1
227
#define HWY_MEM_OPS_MIGHT_FAULT 0
228
#define HWY_CAP_GE256 0
229
#define HWY_CAP_GE512 0
230
231
#if HWY_TARGET == HWY_SVE2
232
#define HWY_NAMESPACE N_SVE2
233
#else
234
#define HWY_NAMESPACE N_SVE
235
#endif
236
237
// HWY_TARGET_STR remains undefined
238
239
//-----------------------------------------------------------------------------
240
// WASM
241
#elif HWY_TARGET == HWY_WASM
242
243
#define HWY_ALIGN alignas(16)
244
#define HWY_MAX_BYTES 16
245
#define HWY_LANES(T) (16 / sizeof(T))
246
247
#define HWY_HAVE_SCALABLE 0
248
#define HWY_HAVE_INTEGER64 1
249
#define HWY_HAVE_FLOAT16 1
250
#define HWY_HAVE_FLOAT64 0
251
#define HWY_MEM_OPS_MIGHT_FAULT 1
252
#define HWY_CAP_GE256 0
253
#define HWY_CAP_GE512 0
254
255
#define HWY_NAMESPACE N_WASM
256
257
#define HWY_TARGET_STR "simd128"
258
259
//-----------------------------------------------------------------------------
260
// WASM2
261
#elif HWY_TARGET == HWY_WASM2
262
263
#define HWY_ALIGN alignas(32)
264
#define HWY_MAX_BYTES 32
265
#define HWY_LANES(T) (32 / sizeof(T))
266
267
#define HWY_HAVE_SCALABLE 0
268
#define HWY_HAVE_INTEGER64 1
269
#define HWY_HAVE_FLOAT16 1
270
#define HWY_HAVE_FLOAT64 0
271
#define HWY_MEM_OPS_MIGHT_FAULT 1
272
#define HWY_CAP_GE256 0
273
#define HWY_CAP_GE512 0
274
275
#define HWY_NAMESPACE N_WASM2
276
277
#define HWY_TARGET_STR "simd128"
278
279
//-----------------------------------------------------------------------------
280
// RVV
281
#elif HWY_TARGET == HWY_RVV
282
283
// RVV only requires lane alignment, not natural alignment of the entire vector,
284
// and the compiler already aligns builtin types, so nothing to do here.
285
#define HWY_ALIGN
286
287
// The spec requires VLEN <= 2^16 bits, so the limit is 2^16 bytes (LMUL=8).
288
#define HWY_MAX_BYTES 65536
289
290
// = HWY_MAX_BYTES divided by max LMUL=8 because MaxLanes includes the actual
291
// LMUL. This is the tightest possible upper bound.
292
#define HWY_LANES(T) (8192 / sizeof(T))
293
294
#define HWY_HAVE_SCALABLE 1
295
#define HWY_HAVE_INTEGER64 1
296
#define HWY_HAVE_FLOAT64 1
297
#define HWY_MEM_OPS_MIGHT_FAULT 0
298
#define HWY_CAP_GE256 0
299
#define HWY_CAP_GE512 0
300
301
#if defined(__riscv_zfh)
302
#define HWY_HAVE_FLOAT16 1
303
#else
304
#define HWY_HAVE_FLOAT16 0
305
#endif
306
307
#define HWY_NAMESPACE N_RVV
308
309
// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
310
// (rv64gcv is not a valid target)
311
312
//-----------------------------------------------------------------------------
313
// SCALAR
314
#elif HWY_TARGET == HWY_SCALAR
315
316
#define HWY_ALIGN
317
#define HWY_MAX_BYTES 8
318
#define HWY_LANES(T) 1
319
320
#define HWY_HAVE_SCALABLE 0
321
#define HWY_HAVE_INTEGER64 1
322
#define HWY_HAVE_FLOAT16 1
323
#define HWY_HAVE_FLOAT64 1
324
#define HWY_MEM_OPS_MIGHT_FAULT 0
325
#define HWY_CAP_GE256 0
326
#define HWY_CAP_GE512 0
327
328
#define HWY_NAMESPACE N_SCALAR
329
330
// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
331
332
#else
333
#pragma message("HWY_TARGET does not match any known target"
)
334
#endif
// HWY_TARGET
335
336
// Override this to 1 in asan/msan builds, which will still fault.
337
#if HWY_IS_ASAN || HWY_IS_MSAN
338
#undef HWY_MEM_OPS_MIGHT_FAULT
339
#define HWY_MEM_OPS_MIGHT_FAULT 1
340
#endif
341
342
// Clang <9 requires this be invoked at file scope, before any namespace.
343
#undef HWY_BEFORE_NAMESPACE
344
#if defined(HWY_TARGET_STR)
345
#define HWY_BEFORE_NAMESPACE() \
346
HWY_PUSH_ATTRIBUTES(HWY_TARGET_STR) \
347
static_assert(true, "For requiring trailing semicolon"
)
348
#else
349
// avoids compiler warning if no HWY_TARGET_STR
350
#define HWY_BEFORE_NAMESPACE() \
351
static_assert(true, "For requiring trailing semicolon"
)
352
#endif
353
354
// Clang <9 requires any namespaces be closed before this macro.
355
#undef HWY_AFTER_NAMESPACE
356
#if defined(HWY_TARGET_STR)
357
#define HWY_AFTER_NAMESPACE() \
358
HWY_POP_ATTRIBUTES \
359
static_assert(true, "For requiring trailing semicolon"
)
360
#else
361
// avoids compiler warning if no HWY_TARGET_STR
362
#define HWY_AFTER_NAMESPACE() \
363
static_assert(true, "For requiring trailing semicolon"
)
364
#endif
365
366
#undef HWY_ATTR
367
#if defined(HWY_TARGET_STR) && HWY_HAS_ATTRIBUTE(target)
368
#define HWY_ATTR __attribute__((target(HWY_TARGET_STR)))
369
#else
370
#define HWY_ATTR
371
#endif
detect_targets.h
Generated by
1.9.1