73 #ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_a_H 74 #define INCLUDED_volk_32fc_deinterleave_32f_x2_a_H 80 #include <immintrin.h> 84 unsigned int num_points)
86 const float* complexVectorPtr = (
float*)complexVector;
87 float* iBufferPtr = iBuffer;
88 float* qBufferPtr = qBuffer;
90 unsigned int number = 0;
92 const unsigned int eighthPoints = num_points / 8;
93 __m256 cplxValue1, cplxValue2, complex1, complex2, iValue, qValue;
94 for (; number < eighthPoints; number++) {
95 cplxValue1 = _mm256_load_ps(complexVectorPtr);
96 complexVectorPtr += 8;
98 cplxValue2 = _mm256_load_ps(complexVectorPtr);
99 complexVectorPtr += 8;
101 complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
102 complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
105 iValue = _mm256_shuffle_ps(complex1, complex2, 0x88);
107 qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
109 _mm256_store_ps(iBufferPtr, iValue);
110 _mm256_store_ps(qBufferPtr, qValue);
116 number = eighthPoints * 8;
117 for (; number < num_points; number++) {
118 *iBufferPtr++ = *complexVectorPtr++;
119 *qBufferPtr++ = *complexVectorPtr++;
125 #include <xmmintrin.h> 130 unsigned int num_points)
132 const float* complexVectorPtr = (
float*)complexVector;
133 float* iBufferPtr = iBuffer;
134 float* qBufferPtr = qBuffer;
136 unsigned int number = 0;
137 const unsigned int quarterPoints = num_points / 4;
138 __m128 cplxValue1, cplxValue2, iValue, qValue;
139 for (; number < quarterPoints; number++) {
140 cplxValue1 = _mm_load_ps(complexVectorPtr);
141 complexVectorPtr += 4;
143 cplxValue2 = _mm_load_ps(complexVectorPtr);
144 complexVectorPtr += 4;
147 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
149 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3, 1, 3, 1));
151 _mm_store_ps(iBufferPtr, iValue);
152 _mm_store_ps(qBufferPtr, qValue);
158 number = quarterPoints * 4;
159 for (; number < num_points; number++) {
160 *iBufferPtr++ = *complexVectorPtr++;
161 *qBufferPtr++ = *complexVectorPtr++;
168 #include <arm_neon.h> 173 unsigned int num_points)
175 unsigned int number = 0;
176 unsigned int quarter_points = num_points / 4;
177 const float* complexVectorPtr = (
float*)complexVector;
178 float* iBufferPtr = iBuffer;
179 float* qBufferPtr = qBuffer;
180 float32x4x2_t complexInput;
182 for (number = 0; number < quarter_points; number++) {
183 complexInput = vld2q_f32(complexVectorPtr);
184 vst1q_f32(iBufferPtr, complexInput.val[0]);
185 vst1q_f32(qBufferPtr, complexInput.val[1]);
186 complexVectorPtr += 8;
191 for (number = quarter_points * 4; number < num_points; number++) {
192 *iBufferPtr++ = *complexVectorPtr++;
193 *qBufferPtr++ = *complexVectorPtr++;
199 #ifdef LV_HAVE_GENERIC 204 unsigned int num_points)
206 const float* complexVectorPtr = (
float*)complexVector;
207 float* iBufferPtr = iBuffer;
208 float* qBufferPtr = qBuffer;
210 for (number = 0; number < num_points; number++) {
211 *iBufferPtr++ = *complexVectorPtr++;
212 *qBufferPtr++ = *complexVectorPtr++;
220 #ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_u_H 221 #define INCLUDED_volk_32fc_deinterleave_32f_x2_u_H 223 #include <inttypes.h> 227 #include <immintrin.h> 231 unsigned int num_points)
233 const float* complexVectorPtr = (
float*)complexVector;
234 float* iBufferPtr = iBuffer;
235 float* qBufferPtr = qBuffer;
237 unsigned int number = 0;
239 const unsigned int eighthPoints = num_points / 8;
240 __m256 cplxValue1, cplxValue2, complex1, complex2, iValue, qValue;
241 for (; number < eighthPoints; number++) {
242 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
243 complexVectorPtr += 8;
245 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
246 complexVectorPtr += 8;
248 complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
249 complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
252 iValue = _mm256_shuffle_ps(complex1, complex2, 0x88);
254 qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
256 _mm256_storeu_ps(iBufferPtr, iValue);
257 _mm256_storeu_ps(qBufferPtr, qValue);
263 number = eighthPoints * 8;
264 for (; number < num_points; number++) {
265 *iBufferPtr++ = *complexVectorPtr++;
266 *qBufferPtr++ = *complexVectorPtr++;
static void volk_32fc_deinterleave_32f_x2_neon(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_32f_x2.h:170
static void volk_32fc_deinterleave_32f_x2_a_sse(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_32f_x2.h:127
static void volk_32fc_deinterleave_32f_x2_u_avx(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_32f_x2.h:228
static void volk_32fc_deinterleave_32f_x2_generic(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_32f_x2.h:201
static void volk_32fc_deinterleave_32f_x2_a_avx(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_32f_x2.h:81
float complex lv_32fc_t
Definition: volk_complex.h:70