73 #ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H 74 #define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H 82 #include <immintrin.h> 85 volk_32fc_s32f_deinterleave_real_16i_a_avx2(int16_t* iBuffer,
88 unsigned int num_points)
90 unsigned int number = 0;
91 const unsigned int eighthPoints = num_points / 8;
93 const float* complexVectorPtr = (
float*)complexVector;
94 int16_t* iBufferPtr = iBuffer;
96 __m256 vScalar = _mm256_set1_ps(scalar);
98 __m256 cplxValue1, cplxValue2, iValue;
102 __m256i idx = _mm256_set_epi32(3, 3, 3, 3, 5, 1, 4, 0);
104 for (; number < eighthPoints; number++) {
105 cplxValue1 = _mm256_load_ps(complexVectorPtr);
106 complexVectorPtr += 8;
108 cplxValue2 = _mm256_load_ps(complexVectorPtr);
109 complexVectorPtr += 8;
112 iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
114 iValue = _mm256_mul_ps(iValue, vScalar);
116 iValue = _mm256_round_ps(iValue, _MM_FROUND_TO_ZERO);
117 a = _mm256_cvtps_epi32(iValue);
118 a = _mm256_packs_epi32(a, a);
119 a = _mm256_permutevar8x32_epi32(a, idx);
120 b = _mm256_extracti128_si256(a, 0);
122 _mm_store_si128((__m128i*)iBufferPtr, b);
126 number = eighthPoints * 8;
127 iBufferPtr = &iBuffer[number];
128 for (; number < num_points; number++) {
129 *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
138 #include <xmmintrin.h> 144 unsigned int num_points)
146 unsigned int number = 0;
147 const unsigned int quarterPoints = num_points / 4;
149 const float* complexVectorPtr = (
float*)complexVector;
150 int16_t* iBufferPtr = iBuffer;
152 __m128 vScalar = _mm_set_ps1(scalar);
154 __m128 cplxValue1, cplxValue2, iValue;
158 for (; number < quarterPoints; number++) {
159 cplxValue1 = _mm_load_ps(complexVectorPtr);
160 complexVectorPtr += 4;
162 cplxValue2 = _mm_load_ps(complexVectorPtr);
163 complexVectorPtr += 4;
166 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
168 iValue = _mm_mul_ps(iValue, vScalar);
170 _mm_store_ps(floatBuffer, iValue);
171 *iBufferPtr++ = (int16_t)(floatBuffer[0]);
172 *iBufferPtr++ = (int16_t)(floatBuffer[1]);
173 *iBufferPtr++ = (int16_t)(floatBuffer[2]);
174 *iBufferPtr++ = (int16_t)(floatBuffer[3]);
177 number = quarterPoints * 4;
178 iBufferPtr = &iBuffer[number];
179 for (; number < num_points; number++) {
180 *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
188 #ifdef LV_HAVE_GENERIC 194 unsigned int num_points)
196 const float* complexVectorPtr = (
float*)complexVector;
197 int16_t* iBufferPtr = iBuffer;
198 unsigned int number = 0;
199 for (number = 0; number < num_points; number++) {
200 *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
209 #ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_u_H 210 #define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_u_H 212 #include <inttypes.h> 217 #include <immintrin.h> 220 volk_32fc_s32f_deinterleave_real_16i_u_avx2(int16_t* iBuffer,
223 unsigned int num_points)
225 unsigned int number = 0;
226 const unsigned int eighthPoints = num_points / 8;
228 const float* complexVectorPtr = (
float*)complexVector;
229 int16_t* iBufferPtr = iBuffer;
231 __m256 vScalar = _mm256_set1_ps(scalar);
233 __m256 cplxValue1, cplxValue2, iValue;
237 __m256i idx = _mm256_set_epi32(3, 3, 3, 3, 5, 1, 4, 0);
239 for (; number < eighthPoints; number++) {
240 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
241 complexVectorPtr += 8;
243 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
244 complexVectorPtr += 8;
247 iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
249 iValue = _mm256_mul_ps(iValue, vScalar);
251 iValue = _mm256_round_ps(iValue, _MM_FROUND_TO_ZERO);
252 a = _mm256_cvtps_epi32(iValue);
253 a = _mm256_packs_epi32(a, a);
254 a = _mm256_permutevar8x32_epi32(a, idx);
255 b = _mm256_extracti128_si256(a, 0);
257 _mm_storeu_si128((__m128i*)iBufferPtr, b);
261 number = eighthPoints * 8;
262 iBufferPtr = &iBuffer[number];
263 for (; number < num_points; number++) {
264 *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
static void volk_32fc_s32f_deinterleave_real_16i_a_sse(int16_t *iBuffer, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_32fc_s32f_deinterleave_real_16i.h:141
static void volk_32fc_s32f_deinterleave_real_16i_generic(int16_t *iBuffer, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_32fc_s32f_deinterleave_real_16i.h:191
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:56
float complex lv_32fc_t
Definition: volk_complex.h:70