Grok  9.7.5
skeleton-inl.h
Go to the documentation of this file.
1 // Copyright 2020 Google LLC
2 // SPDX-License-Identifier: Apache-2.0
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 
16 // Demo of functions that might be called from multiple SIMD modules (either
17 // other -inl.h files, or a .cc file between begin/end_target-inl). This is
18 // optional - all SIMD code can reside in .cc files. However, this allows
19 // splitting code into different files while still inlining instead of requiring
20 // calling through function pointers.
21 
22 // Include guard (still compiled once per target)
23 #if defined(HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_) == defined(HWY_TARGET_TOGGLE)
24 #ifdef HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_
25 #undef HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_
26 #else
27 #define HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_
28 #endif
29 
30 // It is fine to #include normal or *-inl headers.
31 #include <stddef.h>
32 
33 #include "hwy/highway.h"
34 
36 namespace skeleton {
37 namespace HWY_NAMESPACE {
38 
39 using namespace hwy::HWY_NAMESPACE;
40 
41 // Example of a type-agnostic (caller-specified lane type) and width-agnostic
42 // (uses best available instruction set) function in a header.
43 //
44 // Computes x[i] = mul_array[i] * x_array[i] + add_array[i] for i < size.
45 template <class D, typename T>
46 HWY_MAYBE_UNUSED void MulAddLoop(const D d, const T* HWY_RESTRICT mul_array,
47  const T* HWY_RESTRICT add_array,
48  const size_t size, T* HWY_RESTRICT x_array) {
49  for (size_t i = 0; i < size; i += Lanes(d)) {
50  const auto mul = Load(d, mul_array + i);
51  const auto add = Load(d, add_array + i);
52  auto x = Load(d, x_array + i);
53  x = MulAdd(mul, x, add);
54  Store(x, d, x_array + i);
55  }
56 }
57 
58 // NOLINTNEXTLINE(google-readability-namespace-comments)
59 } // namespace HWY_NAMESPACE
60 } // namespace skeleton
62 
63 #endif // include guard
#define HWY_RESTRICT
Definition: base.h:63
#define HWY_MAYBE_UNUSED
Definition: base.h:75
Definition: copy-inl.h:31
d
Definition: rvv-inl.h:1656
HWY_API Vec128< float, N > MulAdd(const Vec128< float, N > mul, const Vec128< float, N > x, const Vec128< float, N > add)
Definition: arm_neon-inl.h:1290
HWY_API Vec128< T, N > Load(Simd< T, N, 0 > d, const T *HWY_RESTRICT p)
Definition: arm_neon-inl.h:2205
HWY_API size_t Lanes(Simd< T, N, kPow2 > d)
Definition: arm_sve-inl.h:218
HWY_API void Store(Vec128< T, N > v, Simd< T, N, 0 > d, T *HWY_RESTRICT aligned)
Definition: arm_neon-inl.h:2397
HWY_MAYBE_UNUSED void MulAddLoop(const D d, const T *HWY_RESTRICT mul_array, const T *HWY_RESTRICT add_array, const size_t size, T *HWY_RESTRICT x_array)
Definition: skeleton-inl.h:46
Definition: skeleton-inl.h:36
#define HWY_NAMESPACE
Definition: set_macros-inl.h:80
HWY_AFTER_NAMESPACE()
HWY_BEFORE_NAMESPACE()