Vector Optimized Library of Kernels  2.5.1
Architecture-tuned implementations of math kernels
volk_32fc_s32f_power_spectrum_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
53 #ifndef INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H
54 #define INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H
55 
56 #include <inttypes.h>
57 #include <math.h>
58 #include <stdio.h>
59 
60 #ifdef LV_HAVE_GENERIC
61 
62 static inline void
64  const lv_32fc_t* complexFFTInput,
65  const float normalizationFactor,
66  unsigned int num_points)
67 {
68  // Calculate the Power of the complex point
69  const float normFactSq = 1.0 / (normalizationFactor * normalizationFactor);
70 
71  // Calculate dBm
72  // 50 ohm load assumption
73  // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
74  // 75 ohm load assumption
75  // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
76 
77  /*
78  * For generic reference, the code below is a volk-optimized
79  * approach that also leverages a faster log2 calculation
80  * to calculate the log10:
81  * n*log10(x) = n*log2(x)/log2(10) = (n/log2(10)) * log2(x)
82  *
83  * Generic code:
84  *
85  * const float real = *inputPtr++ * iNormalizationFactor;
86  * const float imag = *inputPtr++ * iNormalizationFactor;
87  * realFFTDataPointsPtr = 10.0*log10f(((real * real) + (imag * imag)) + 1e-20);
88  * realFFTDataPointsPtr++;
89  *
90  */
91 
92  // Calc mag^2
93  volk_32fc_magnitude_squared_32f(logPowerOutput, complexFFTInput, num_points);
94 
95  // Finish ((real * real) + (imag * imag)) calculation:
96  volk_32f_s32f_multiply_32f(logPowerOutput, logPowerOutput, normFactSq, num_points);
97 
98  // The following calculates 10*log10(x) = 10*log2(x)/log2(10) = (10/log2(10))
99  // * log2(x)
100  volk_32f_log2_32f(logPowerOutput, logPowerOutput, num_points);
101  volk_32f_s32f_multiply_32f(
102  logPowerOutput, logPowerOutput, volk_log2to10factor, num_points);
103 }
104 #endif /* LV_HAVE_GENERIC */
105 
106 #ifdef LV_HAVE_SSE3
107 #include <pmmintrin.h>
108 
109 #ifdef LV_HAVE_LIB_SIMDMATH
110 #include <simdmath.h>
111 #endif /* LV_HAVE_LIB_SIMDMATH */
112 
113 static inline void
115  const lv_32fc_t* complexFFTInput,
116  const float normalizationFactor,
117  unsigned int num_points)
118 {
119  const float* inputPtr = (const float*)complexFFTInput;
120  float* destPtr = logPowerOutput;
121  uint64_t number = 0;
122  const float iNormalizationFactor = 1.0 / normalizationFactor;
123 #ifdef LV_HAVE_LIB_SIMDMATH
124  __m128 magScalar = _mm_set_ps1(10.0);
125  magScalar = _mm_div_ps(magScalar, logf4(magScalar));
126 
127  __m128 invNormalizationFactor = _mm_set_ps1(iNormalizationFactor);
128 
129  __m128 power;
130  __m128 input1, input2;
131  const uint64_t quarterPoints = num_points / 4;
132  for (; number < quarterPoints; number++) {
133  // Load the complex values
134  input1 = _mm_load_ps(inputPtr);
135  inputPtr += 4;
136  input2 = _mm_load_ps(inputPtr);
137  inputPtr += 4;
138 
139  // Apply the normalization factor
140  input1 = _mm_mul_ps(input1, invNormalizationFactor);
141  input2 = _mm_mul_ps(input2, invNormalizationFactor);
142 
143  // Multiply each value by itself
144  // (r1*r1), (i1*i1), (r2*r2), (i2*i2)
145  input1 = _mm_mul_ps(input1, input1);
146  // (r3*r3), (i3*i3), (r4*r4), (i4*i4)
147  input2 = _mm_mul_ps(input2, input2);
148 
149  // Horizontal add, to add (r*r) + (i*i) for each complex value
150  // (r1*r1)+(i1*i1), (r2*r2) + (i2*i2), (r3*r3)+(i3*i3), (r4*r4)+(i4*i4)
151  power = _mm_hadd_ps(input1, input2);
152 
153  // Calculate the natural log power
154  power = logf4(power);
155 
156  // Convert to log10 and multiply by 10.0
157  power = _mm_mul_ps(power, magScalar);
158 
159  // Store the floating point results
160  _mm_store_ps(destPtr, power);
161 
162  destPtr += 4;
163  }
164 
165  number = quarterPoints * 4;
166 #endif /* LV_HAVE_LIB_SIMDMATH */
167  // Calculate the FFT for any remaining points
168 
169  for (; number < num_points; number++) {
170  // Calculate dBm
171  // 50 ohm load assumption
172  // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
173  // 75 ohm load assumption
174  // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
175 
176  const float real = *inputPtr++ * iNormalizationFactor;
177  const float imag = *inputPtr++ * iNormalizationFactor;
178 
179  *destPtr = volk_log2to10factor * log2f_non_ieee(((real * real) + (imag * imag)));
180 
181  destPtr++;
182  }
183 }
184 #endif /* LV_HAVE_SSE3 */
185 
186 #ifdef LV_HAVE_NEON
187 #include <arm_neon.h>
189 
190 static inline void
192  const lv_32fc_t* complexFFTInput,
193  const float normalizationFactor,
194  unsigned int num_points)
195 {
196  float* logPowerOutputPtr = logPowerOutput;
197  const lv_32fc_t* complexFFTInputPtr = complexFFTInput;
198  const float iNormalizationFactor = 1.0 / normalizationFactor;
199  unsigned int number;
200  unsigned int quarter_points = num_points / 4;
201  float32x4x2_t fft_vec;
202  float32x4_t log_pwr_vec;
203  float32x4_t mag_squared_vec;
204 
205  const float inv_ln10_10 = 4.34294481903f; // 10.0/ln(10.)
206 
207  for (number = 0; number < quarter_points; number++) {
208  // Load
209  fft_vec = vld2q_f32((float*)complexFFTInputPtr);
210  // Prefetch next 4
211  __VOLK_PREFETCH(complexFFTInputPtr + 4);
212  // Normalize
213  fft_vec.val[0] = vmulq_n_f32(fft_vec.val[0], iNormalizationFactor);
214  fft_vec.val[1] = vmulq_n_f32(fft_vec.val[1], iNormalizationFactor);
215  mag_squared_vec = _vmagnitudesquaredq_f32(fft_vec);
216  log_pwr_vec = vmulq_n_f32(_vlogq_f32(mag_squared_vec), inv_ln10_10);
217  // Store
218  vst1q_f32(logPowerOutputPtr, log_pwr_vec);
219  // Move pointers ahead
220  complexFFTInputPtr += 4;
221  logPowerOutputPtr += 4;
222  }
223 
224  // deal with the rest
225  for (number = quarter_points * 4; number < num_points; number++) {
226  const float real = lv_creal(*complexFFTInputPtr) * iNormalizationFactor;
227  const float imag = lv_cimag(*complexFFTInputPtr) * iNormalizationFactor;
228 
229  *logPowerOutputPtr =
230  volk_log2to10factor * log2f_non_ieee(((real * real) + (imag * imag)));
231  complexFFTInputPtr++;
232  logPowerOutputPtr++;
233  }
234 }
235 
236 #endif /* LV_HAVE_NEON */
237 
238 #endif /* INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H */
static void volk_32fc_s32f_power_spectrum_32f_a_sse3(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, unsigned int num_points)
Definition: volk_32fc_s32f_power_spectrum_32f.h:114
static void volk_32fc_s32f_power_spectrum_32f_generic(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, unsigned int num_points)
Definition: volk_32fc_s32f_power_spectrum_32f.h:63
static void volk_32fc_s32f_power_spectrum_32f_neon(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, unsigned int num_points)
Definition: volk_32fc_s32f_power_spectrum_32f.h:191
#define volk_log2to10factor
Definition: volk_common.h:160
static float log2f_non_ieee(float f)
Definition: volk_common.h:150
#define __VOLK_PREFETCH(addr)
Definition: volk_common.h:62
#define lv_cimag(x)
Definition: volk_complex.h:89
#define lv_creal(x)
Definition: volk_complex.h:87
float complex lv_32fc_t
Definition: volk_complex.h:65
static float32x4_t _vlogq_f32(float32x4_t x)
Definition: volk_neon_intrinsics.h:157
static float32x4_t _vmagnitudesquaredq_f32(float32x4x2_t cmplxValue)
Definition: volk_neon_intrinsics.h:87