54 #ifndef INCLUDED_volk_8ic_deinterleave_16i_x2_a_H
55 #define INCLUDED_volk_8ic_deinterleave_16i_x2_a_H
61 #include <immintrin.h>
63 static inline void volk_8ic_deinterleave_16i_x2_a_avx2(int16_t* iBuffer,
66 unsigned int num_points)
68 unsigned int number = 0;
69 const int8_t* complexVectorPtr = (int8_t*)complexVector;
70 int16_t* iBufferPtr = iBuffer;
71 int16_t* qBufferPtr = qBuffer;
72 __m256i MoveMask = _mm256_set_epi8(15,
104 __m256i complexVal, iOutputVal, qOutputVal;
105 __m128i iOutputVal0, qOutputVal0;
107 unsigned int sixteenthPoints = num_points / 16;
109 for (number = 0; number < sixteenthPoints; number++) {
110 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
111 complexVectorPtr += 32;
113 complexVal = _mm256_shuffle_epi8(complexVal, MoveMask);
114 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
116 iOutputVal0 = _mm256_extracti128_si256(complexVal, 0);
117 qOutputVal0 = _mm256_extracti128_si256(complexVal, 1);
119 iOutputVal = _mm256_cvtepi8_epi16(iOutputVal0);
120 iOutputVal = _mm256_slli_epi16(iOutputVal, 8);
122 qOutputVal = _mm256_cvtepi8_epi16(qOutputVal0);
123 qOutputVal = _mm256_slli_epi16(qOutputVal, 8);
125 _mm256_store_si256((__m256i*)iBufferPtr, iOutputVal);
126 _mm256_store_si256((__m256i*)qBufferPtr, qOutputVal);
132 number = sixteenthPoints * 16;
133 for (; number < num_points; number++) {
135 ((int16_t)*complexVectorPtr++) *
137 *qBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;
142 #ifdef LV_HAVE_SSE4_1
143 #include <smmintrin.h>
145 static inline void volk_8ic_deinterleave_16i_x2_a_sse4_1(int16_t* iBuffer,
148 unsigned int num_points)
150 unsigned int number = 0;
151 const int8_t* complexVectorPtr = (int8_t*)complexVector;
152 int16_t* iBufferPtr = iBuffer;
153 int16_t* qBufferPtr = qBuffer;
154 __m128i iMoveMask = _mm_set_epi8(0x80,
170 __m128i qMoveMask = _mm_set_epi8(
171 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 13, 11, 9, 7, 5, 3, 1);
172 __m128i complexVal, iOutputVal, qOutputVal;
174 unsigned int eighthPoints = num_points / 8;
176 for (number = 0; number < eighthPoints; number++) {
177 complexVal = _mm_load_si128((__m128i*)complexVectorPtr);
178 complexVectorPtr += 16;
180 iOutputVal = _mm_shuffle_epi8(complexVal,
182 qOutputVal = _mm_shuffle_epi8(complexVal, qMoveMask);
184 iOutputVal = _mm_cvtepi8_epi16(iOutputVal);
187 _mm_slli_epi16(iOutputVal, 8);
190 qOutputVal = _mm_cvtepi8_epi16(qOutputVal);
191 qOutputVal = _mm_slli_epi16(qOutputVal, 8);
193 _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);
194 _mm_store_si128((__m128i*)qBufferPtr, qOutputVal);
200 number = eighthPoints * 8;
201 for (; number < num_points; number++) {
203 ((int16_t)*complexVectorPtr++) *
205 *qBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;
212 #include <immintrin.h>
217 unsigned int num_points)
219 unsigned int number = 0;
220 const int8_t* complexVectorPtr = (int8_t*)complexVector;
221 int16_t* iBufferPtr = iBuffer;
222 int16_t* qBufferPtr = qBuffer;
223 __m128i iMoveMask = _mm_set_epi8(0x80,
239 __m128i qMoveMask = _mm_set_epi8(
240 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 13, 11, 9, 7, 5, 3, 1);
241 __m256i complexVal, iOutputVal, qOutputVal;
242 __m128i complexVal1, complexVal0;
243 __m128i iOutputVal1, iOutputVal0, qOutputVal1, qOutputVal0;
245 unsigned int sixteenthPoints = num_points / 16;
247 for (number = 0; number < sixteenthPoints; number++) {
248 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
249 complexVectorPtr += 32;
252 complexVal1 = _mm256_extractf128_si256(complexVal, 1);
253 complexVal0 = _mm256_extractf128_si256(complexVal, 0);
255 iOutputVal1 = _mm_shuffle_epi8(
256 complexVal1, iMoveMask);
257 iOutputVal0 = _mm_shuffle_epi8(complexVal0, iMoveMask);
258 qOutputVal1 = _mm_shuffle_epi8(complexVal1, qMoveMask);
259 qOutputVal0 = _mm_shuffle_epi8(complexVal0, qMoveMask);
262 _mm_cvtepi8_epi16(iOutputVal1);
265 _mm_slli_epi16(iOutputVal1, 8);
267 iOutputVal0 = _mm_cvtepi8_epi16(iOutputVal0);
268 iOutputVal0 = _mm_slli_epi16(iOutputVal0, 8);
270 qOutputVal1 = _mm_cvtepi8_epi16(qOutputVal1);
271 qOutputVal1 = _mm_slli_epi16(qOutputVal1, 8);
272 qOutputVal0 = _mm_cvtepi8_epi16(qOutputVal0);
273 qOutputVal0 = _mm_slli_epi16(qOutputVal0, 8);
276 __m256i dummy = _mm256_setzero_si256();
277 iOutputVal = _mm256_insertf128_si256(dummy, iOutputVal0, 0);
278 iOutputVal = _mm256_insertf128_si256(iOutputVal, iOutputVal1, 1);
279 qOutputVal = _mm256_insertf128_si256(dummy, qOutputVal0, 0);
280 qOutputVal = _mm256_insertf128_si256(qOutputVal, qOutputVal1, 1);
282 _mm256_store_si256((__m256i*)iBufferPtr, iOutputVal);
283 _mm256_store_si256((__m256i*)qBufferPtr, qOutputVal);
289 number = sixteenthPoints * 16;
290 for (; number < num_points; number++) {
292 ((int16_t)*complexVectorPtr++) *
294 *qBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;
300 #ifdef LV_HAVE_GENERIC
305 unsigned int num_points)
307 const int8_t* complexVectorPtr = (
const int8_t*)complexVector;
308 int16_t* iBufferPtr = iBuffer;
309 int16_t* qBufferPtr = qBuffer;
311 for (number = 0; number < num_points; number++) {
312 *iBufferPtr++ = (int16_t)(*complexVectorPtr++) * 256;
313 *qBufferPtr++ = (int16_t)(*complexVectorPtr++) * 256;
321 #ifndef INCLUDED_volk_8ic_deinterleave_16i_x2_u_H
322 #define INCLUDED_volk_8ic_deinterleave_16i_x2_u_H
324 #include <inttypes.h>
328 #include <immintrin.h>
330 static inline void volk_8ic_deinterleave_16i_x2_u_avx2(int16_t* iBuffer,
333 unsigned int num_points)
335 unsigned int number = 0;
336 const int8_t* complexVectorPtr = (int8_t*)complexVector;
337 int16_t* iBufferPtr = iBuffer;
338 int16_t* qBufferPtr = qBuffer;
339 __m256i MoveMask = _mm256_set_epi8(15,
371 __m256i complexVal, iOutputVal, qOutputVal;
372 __m128i iOutputVal0, qOutputVal0;
374 unsigned int sixteenthPoints = num_points / 16;
376 for (number = 0; number < sixteenthPoints; number++) {
377 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
378 complexVectorPtr += 32;
380 complexVal = _mm256_shuffle_epi8(complexVal, MoveMask);
381 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
383 iOutputVal0 = _mm256_extracti128_si256(complexVal, 0);
384 qOutputVal0 = _mm256_extracti128_si256(complexVal, 1);
386 iOutputVal = _mm256_cvtepi8_epi16(iOutputVal0);
387 iOutputVal = _mm256_slli_epi16(iOutputVal, 8);
389 qOutputVal = _mm256_cvtepi8_epi16(qOutputVal0);
390 qOutputVal = _mm256_slli_epi16(qOutputVal, 8);
392 _mm256_storeu_si256((__m256i*)iBufferPtr, iOutputVal);
393 _mm256_storeu_si256((__m256i*)qBufferPtr, qOutputVal);
399 number = sixteenthPoints * 16;
400 for (; number < num_points; number++) {
402 ((int16_t)*complexVectorPtr++) *
404 *qBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;
static void volk_8ic_deinterleave_16i_x2_generic(int16_t *iBuffer, int16_t *qBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition: volk_8ic_deinterleave_16i_x2.h:302
static void volk_8ic_deinterleave_16i_x2_a_avx(int16_t *iBuffer, int16_t *qBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition: volk_8ic_deinterleave_16i_x2.h:214
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:61