Vector Optimized Library of Kernels  2.4
Architecture-tuned implementations of math kernels
volk_32i_s32f_convert_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
64 #ifndef INCLUDED_volk_32i_s32f_convert_32f_u_H
65 #define INCLUDED_volk_32i_s32f_convert_32f_u_H
66 
67 #include <inttypes.h>
68 #include <stdio.h>
69 
70 #ifdef LV_HAVE_AVX512F
71 #include <immintrin.h>
72 
73 static inline void volk_32i_s32f_convert_32f_u_avx512f(float* outputVector,
74  const int32_t* inputVector,
75  const float scalar,
76  unsigned int num_points)
77 {
78  unsigned int number = 0;
79  const unsigned int onesixteenthPoints = num_points / 16;
80 
81  float* outputVectorPtr = outputVector;
82  const float iScalar = 1.0 / scalar;
83  __m512 invScalar = _mm512_set1_ps(iScalar);
84  int32_t* inputPtr = (int32_t*)inputVector;
85  __m512i inputVal;
86  __m512 ret;
87 
88  for (; number < onesixteenthPoints; number++) {
89  // Load the values
90  inputVal = _mm512_loadu_si512((__m512i*)inputPtr);
91 
92  ret = _mm512_cvtepi32_ps(inputVal);
93  ret = _mm512_mul_ps(ret, invScalar);
94 
95  _mm512_storeu_ps(outputVectorPtr, ret);
96 
97  outputVectorPtr += 16;
98  inputPtr += 16;
99  }
100 
101  number = onesixteenthPoints * 16;
102  for (; number < num_points; number++) {
103  outputVector[number] = ((float)(inputVector[number])) * iScalar;
104  }
105 }
106 #endif /* LV_HAVE_AVX512F */
107 
108 
109 #ifdef LV_HAVE_AVX2
110 #include <immintrin.h>
111 
112 static inline void volk_32i_s32f_convert_32f_u_avx2(float* outputVector,
113  const int32_t* inputVector,
114  const float scalar,
115  unsigned int num_points)
116 {
117  unsigned int number = 0;
118  const unsigned int oneEightPoints = num_points / 8;
119 
120  float* outputVectorPtr = outputVector;
121  const float iScalar = 1.0 / scalar;
122  __m256 invScalar = _mm256_set1_ps(iScalar);
123  int32_t* inputPtr = (int32_t*)inputVector;
124  __m256i inputVal;
125  __m256 ret;
126 
127  for (; number < oneEightPoints; number++) {
128  // Load the 4 values
129  inputVal = _mm256_loadu_si256((__m256i*)inputPtr);
130 
131  ret = _mm256_cvtepi32_ps(inputVal);
132  ret = _mm256_mul_ps(ret, invScalar);
133 
134  _mm256_storeu_ps(outputVectorPtr, ret);
135 
136  outputVectorPtr += 8;
137  inputPtr += 8;
138  }
139 
140  number = oneEightPoints * 8;
141  for (; number < num_points; number++) {
142  outputVector[number] = ((float)(inputVector[number])) * iScalar;
143  }
144 }
145 #endif /* LV_HAVE_AVX2 */
146 
147 
148 #ifdef LV_HAVE_SSE2
149 #include <emmintrin.h>
150 
151 static inline void volk_32i_s32f_convert_32f_u_sse2(float* outputVector,
152  const int32_t* inputVector,
153  const float scalar,
154  unsigned int num_points)
155 {
156  unsigned int number = 0;
157  const unsigned int quarterPoints = num_points / 4;
158 
159  float* outputVectorPtr = outputVector;
160  const float iScalar = 1.0 / scalar;
161  __m128 invScalar = _mm_set_ps1(iScalar);
162  int32_t* inputPtr = (int32_t*)inputVector;
163  __m128i inputVal;
164  __m128 ret;
165 
166  for (; number < quarterPoints; number++) {
167  // Load the 4 values
168  inputVal = _mm_loadu_si128((__m128i*)inputPtr);
169 
170  ret = _mm_cvtepi32_ps(inputVal);
171  ret = _mm_mul_ps(ret, invScalar);
172 
173  _mm_storeu_ps(outputVectorPtr, ret);
174 
175  outputVectorPtr += 4;
176  inputPtr += 4;
177  }
178 
179  number = quarterPoints * 4;
180  for (; number < num_points; number++) {
181  outputVector[number] = ((float)(inputVector[number])) * iScalar;
182  }
183 }
184 #endif /* LV_HAVE_SSE2 */
185 
186 
187 #ifdef LV_HAVE_GENERIC
188 
189 static inline void volk_32i_s32f_convert_32f_generic(float* outputVector,
190  const int32_t* inputVector,
191  const float scalar,
192  unsigned int num_points)
193 {
194  float* outputVectorPtr = outputVector;
195  const int32_t* inputVectorPtr = inputVector;
196  unsigned int number = 0;
197  const float iScalar = 1.0 / scalar;
198 
199  for (number = 0; number < num_points; number++) {
200  *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
201  }
202 }
203 #endif /* LV_HAVE_GENERIC */
204 
205 #endif /* INCLUDED_volk_32i_s32f_convert_32f_u_H */
206 
207 
208 #ifndef INCLUDED_volk_32i_s32f_convert_32f_a_H
209 #define INCLUDED_volk_32i_s32f_convert_32f_a_H
210 
211 #include <inttypes.h>
212 #include <stdio.h>
213 
214 #ifdef LV_HAVE_AVX512F
215 #include <immintrin.h>
216 
217 static inline void volk_32i_s32f_convert_32f_a_avx512f(float* outputVector,
218  const int32_t* inputVector,
219  const float scalar,
220  unsigned int num_points)
221 {
222  unsigned int number = 0;
223  const unsigned int onesixteenthPoints = num_points / 16;
224 
225  float* outputVectorPtr = outputVector;
226  const float iScalar = 1.0 / scalar;
227  __m512 invScalar = _mm512_set1_ps(iScalar);
228  int32_t* inputPtr = (int32_t*)inputVector;
229  __m512i inputVal;
230  __m512 ret;
231 
232  for (; number < onesixteenthPoints; number++) {
233  // Load the values
234  inputVal = _mm512_load_si512((__m512i*)inputPtr);
235 
236  ret = _mm512_cvtepi32_ps(inputVal);
237  ret = _mm512_mul_ps(ret, invScalar);
238 
239  _mm512_store_ps(outputVectorPtr, ret);
240 
241  outputVectorPtr += 16;
242  inputPtr += 16;
243  }
244 
245  number = onesixteenthPoints * 16;
246  for (; number < num_points; number++) {
247  outputVector[number] = ((float)(inputVector[number])) * iScalar;
248  }
249 }
250 #endif /* LV_HAVE_AVX512F */
251 
252 #ifdef LV_HAVE_AVX2
253 #include <immintrin.h>
254 
255 static inline void volk_32i_s32f_convert_32f_a_avx2(float* outputVector,
256  const int32_t* inputVector,
257  const float scalar,
258  unsigned int num_points)
259 {
260  unsigned int number = 0;
261  const unsigned int oneEightPoints = num_points / 8;
262 
263  float* outputVectorPtr = outputVector;
264  const float iScalar = 1.0 / scalar;
265  __m256 invScalar = _mm256_set1_ps(iScalar);
266  int32_t* inputPtr = (int32_t*)inputVector;
267  __m256i inputVal;
268  __m256 ret;
269 
270  for (; number < oneEightPoints; number++) {
271  // Load the 4 values
272  inputVal = _mm256_load_si256((__m256i*)inputPtr);
273 
274  ret = _mm256_cvtepi32_ps(inputVal);
275  ret = _mm256_mul_ps(ret, invScalar);
276 
277  _mm256_store_ps(outputVectorPtr, ret);
278 
279  outputVectorPtr += 8;
280  inputPtr += 8;
281  }
282 
283  number = oneEightPoints * 8;
284  for (; number < num_points; number++) {
285  outputVector[number] = ((float)(inputVector[number])) * iScalar;
286  }
287 }
288 #endif /* LV_HAVE_AVX2 */
289 
290 
291 #ifdef LV_HAVE_SSE2
292 #include <emmintrin.h>
293 
294 static inline void volk_32i_s32f_convert_32f_a_sse2(float* outputVector,
295  const int32_t* inputVector,
296  const float scalar,
297  unsigned int num_points)
298 {
299  unsigned int number = 0;
300  const unsigned int quarterPoints = num_points / 4;
301 
302  float* outputVectorPtr = outputVector;
303  const float iScalar = 1.0 / scalar;
304  __m128 invScalar = _mm_set_ps1(iScalar);
305  int32_t* inputPtr = (int32_t*)inputVector;
306  __m128i inputVal;
307  __m128 ret;
308 
309  for (; number < quarterPoints; number++) {
310  // Load the 4 values
311  inputVal = _mm_load_si128((__m128i*)inputPtr);
312 
313  ret = _mm_cvtepi32_ps(inputVal);
314  ret = _mm_mul_ps(ret, invScalar);
315 
316  _mm_store_ps(outputVectorPtr, ret);
317 
318  outputVectorPtr += 4;
319  inputPtr += 4;
320  }
321 
322  number = quarterPoints * 4;
323  for (; number < num_points; number++) {
324  outputVector[number] = ((float)(inputVector[number])) * iScalar;
325  }
326 }
327 #endif /* LV_HAVE_SSE2 */
328 
329 
330 #ifdef LV_HAVE_GENERIC
331 
332 static inline void volk_32i_s32f_convert_32f_a_generic(float* outputVector,
333  const int32_t* inputVector,
334  const float scalar,
335  unsigned int num_points)
336 {
337  float* outputVectorPtr = outputVector;
338  const int32_t* inputVectorPtr = inputVector;
339  unsigned int number = 0;
340  const float iScalar = 1.0 / scalar;
341 
342  for (number = 0; number < num_points; number++) {
343  *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
344  }
345 }
346 #endif /* LV_HAVE_GENERIC */
347 
348 
349 #endif /* INCLUDED_volk_32i_s32f_convert_32f_a_H */
volk_32i_s32f_convert_32f_a_generic
static void volk_32i_s32f_convert_32f_a_generic(float *outputVector, const int32_t *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32i_s32f_convert_32f.h:332
volk_32i_s32f_convert_32f_a_sse2
static void volk_32i_s32f_convert_32f_a_sse2(float *outputVector, const int32_t *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32i_s32f_convert_32f.h:294
volk_32i_s32f_convert_32f_generic
static void volk_32i_s32f_convert_32f_generic(float *outputVector, const int32_t *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32i_s32f_convert_32f.h:189
volk_32i_s32f_convert_32f_u_sse2
static void volk_32i_s32f_convert_32f_u_sse2(float *outputVector, const int32_t *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32i_s32f_convert_32f.h:151