Vector Optimized Library of Kernels  2.2
Architecture-tuned implementations of math kernels
volk_32fc_s32f_power_spectrum_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
53 #ifndef INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H
54 #define INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H
55 
56 #include <inttypes.h>
57 #include <math.h>
58 #include <stdio.h>
59 
60 #ifdef LV_HAVE_SSE3
61 #include <pmmintrin.h>
62 
63 #ifdef LV_HAVE_LIB_SIMDMATH
64 #include <simdmath.h>
65 #endif /* LV_HAVE_LIB_SIMDMATH */
66 
67 static inline void
69  const lv_32fc_t* complexFFTInput,
70  const float normalizationFactor,
71  unsigned int num_points)
72 {
73  const float* inputPtr = (const float*)complexFFTInput;
74  float* destPtr = logPowerOutput;
75  uint64_t number = 0;
76  const float iNormalizationFactor = 1.0 / normalizationFactor;
77 #ifdef LV_HAVE_LIB_SIMDMATH
78  __m128 magScalar = _mm_set_ps1(10.0);
79  magScalar = _mm_div_ps(magScalar, logf4(magScalar));
80 
81  __m128 invNormalizationFactor = _mm_set_ps1(iNormalizationFactor);
82 
83  __m128 power;
84  __m128 input1, input2;
85  const uint64_t quarterPoints = num_points / 4;
86  for (; number < quarterPoints; number++) {
87  // Load the complex values
88  input1 = _mm_load_ps(inputPtr);
89  inputPtr += 4;
90  input2 = _mm_load_ps(inputPtr);
91  inputPtr += 4;
92 
93  // Apply the normalization factor
94  input1 = _mm_mul_ps(input1, invNormalizationFactor);
95  input2 = _mm_mul_ps(input2, invNormalizationFactor);
96 
97  // Multiply each value by itself
98  // (r1*r1), (i1*i1), (r2*r2), (i2*i2)
99  input1 = _mm_mul_ps(input1, input1);
100  // (r3*r3), (i3*i3), (r4*r4), (i4*i4)
101  input2 = _mm_mul_ps(input2, input2);
102 
103  // Horizontal add, to add (r*r) + (i*i) for each complex value
104  // (r1*r1)+(i1*i1), (r2*r2) + (i2*i2), (r3*r3)+(i3*i3), (r4*r4)+(i4*i4)
105  power = _mm_hadd_ps(input1, input2);
106 
107  // Calculate the natural log power
108  power = logf4(power);
109 
110  // Convert to log10 and multiply by 10.0
111  power = _mm_mul_ps(power, magScalar);
112 
113  // Store the floating point results
114  _mm_store_ps(destPtr, power);
115 
116  destPtr += 4;
117  }
118 
119  number = quarterPoints * 4;
120 #endif /* LV_HAVE_LIB_SIMDMATH */
121  // Calculate the FFT for any remaining points
122 
123  for (; number < num_points; number++) {
124  // Calculate dBm
125  // 50 ohm load assumption
126  // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
127  // 75 ohm load assumption
128  // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
129 
130  const float real = *inputPtr++ * iNormalizationFactor;
131  const float imag = *inputPtr++ * iNormalizationFactor;
132 
133  *destPtr = 10.0 * log10f(((real * real) + (imag * imag)) + 1e-20);
134 
135  destPtr++;
136  }
137 }
138 #endif /* LV_HAVE_SSE3 */
139 
140 #ifdef LV_HAVE_NEON
141 #include <arm_neon.h>
143 
144 static inline void
146  const lv_32fc_t* complexFFTInput,
147  const float normalizationFactor,
148  unsigned int num_points)
149 {
150  float* logPowerOutputPtr = logPowerOutput;
151  const lv_32fc_t* complexFFTInputPtr = complexFFTInput;
152  const float iNormalizationFactor = 1.0 / normalizationFactor;
153  unsigned int number;
154  unsigned int quarter_points = num_points / 4;
155  float32x4x2_t fft_vec;
156  float32x4_t log_pwr_vec;
157  float32x4_t mag_squared_vec;
158 
159  const float inv_ln10_10 = 4.34294481903f; // 10.0/ln(10.)
160 
161  for (number = 0; number < quarter_points; number++) {
162  // Load
163  fft_vec = vld2q_f32((float*)complexFFTInputPtr);
164  // Prefetch next 4
165  __VOLK_PREFETCH(complexFFTInputPtr + 4);
166  // Normalize
167  fft_vec.val[0] = vmulq_n_f32(fft_vec.val[0], iNormalizationFactor);
168  fft_vec.val[1] = vmulq_n_f32(fft_vec.val[1], iNormalizationFactor);
169  mag_squared_vec = _vmagnitudesquaredq_f32(fft_vec);
170  log_pwr_vec = vmulq_n_f32(_vlogq_f32(mag_squared_vec), inv_ln10_10);
171  // Store
172  vst1q_f32(logPowerOutputPtr, log_pwr_vec);
173  // Move pointers ahead
174  complexFFTInputPtr += 4;
175  logPowerOutputPtr += 4;
176  }
177 
178  // deal with the rest
179  for (number = quarter_points * 4; number < num_points; number++) {
180  const float real = lv_creal(*complexFFTInputPtr) * iNormalizationFactor;
181  const float imag = lv_cimag(*complexFFTInputPtr) * iNormalizationFactor;
182  *logPowerOutputPtr = 10.0 * log10f(((real * real) + (imag * imag)) + 1e-20);
183  complexFFTInputPtr++;
184  logPowerOutputPtr++;
185  }
186 }
187 
188 #endif /* LV_HAVE_NEON */
189 
190 #ifdef LV_HAVE_GENERIC
191 
192 static inline void
194  const lv_32fc_t* complexFFTInput,
195  const float normalizationFactor,
196  unsigned int num_points)
197 {
198  // Calculate the Power of the complex point
199  const float* inputPtr = (float*)complexFFTInput;
200  float* realFFTDataPointsPtr = logPowerOutput;
201  const float iNormalizationFactor = 1.0 / normalizationFactor;
202  unsigned int point;
203  for (point = 0; point < num_points; point++) {
204  // Calculate dBm
205  // 50 ohm load assumption
206  // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
207  // 75 ohm load assumption
208  // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
209 
210  const float real = *inputPtr++ * iNormalizationFactor;
211  const float imag = *inputPtr++ * iNormalizationFactor;
212 
213  *realFFTDataPointsPtr = 10.0 * log10f(((real * real) + (imag * imag)) + 1e-20);
214  realFFTDataPointsPtr++;
215  }
216 }
217 #endif /* LV_HAVE_GENERIC */
218 
219 #endif /* INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H */
static void volk_32fc_s32f_power_spectrum_32f_neon(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, unsigned int num_points)
Definition: volk_32fc_s32f_power_spectrum_32f.h:145
static void volk_32fc_s32f_power_spectrum_32f_a_sse3(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, unsigned int num_points)
Definition: volk_32fc_s32f_power_spectrum_32f.h:68
static void volk_32fc_s32f_power_spectrum_32f_generic(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, unsigned int num_points)
Definition: volk_32fc_s32f_power_spectrum_32f.h:193
static float32x4_t _vmagnitudesquaredq_f32(float32x4x2_t cmplxValue)
Definition: volk_neon_intrinsics.h:87
#define __VOLK_PREFETCH(addr)
Definition: volk_common.h:62
float complex lv_32fc_t
Definition: volk_complex.h:70
static float32x4_t _vlogq_f32(float32x4_t x)
Definition: volk_neon_intrinsics.h:157
#define lv_creal(x)
Definition: volk_complex.h:92
#define lv_cimag(x)
Definition: volk_complex.h:94