64 #ifndef INCLUDED_volk_64f_convert_32f_u_H 65 #define INCLUDED_volk_64f_convert_32f_u_H 70 #ifdef LV_HAVE_AVX512F 71 #include <immintrin.h> 73 static inline void volk_64f_convert_32f_u_avx512f(
float* outputVector,
74 const double* inputVector,
75 unsigned int num_points)
77 unsigned int number = 0;
79 const unsigned int oneSixteenthPoints = num_points / 16;
81 const double* inputVectorPtr = (
const double*)inputVector;
82 float* outputVectorPtr = outputVector;
84 __m512d inputVal1, inputVal2;
86 for (; number < oneSixteenthPoints; number++) {
87 inputVal1 = _mm512_loadu_pd(inputVectorPtr);
89 inputVal2 = _mm512_loadu_pd(inputVectorPtr);
92 ret1 = _mm512_cvtpd_ps(inputVal1);
93 ret2 = _mm512_cvtpd_ps(inputVal2);
95 _mm256_storeu_ps(outputVectorPtr, ret1);
98 _mm256_storeu_ps(outputVectorPtr, ret2);
102 number = oneSixteenthPoints * 16;
103 for (; number < num_points; number++) {
104 outputVector[number] = (float)(inputVector[number]);
111 #include <immintrin.h> 114 const double* inputVector,
115 unsigned int num_points)
117 unsigned int number = 0;
119 const unsigned int oneEightPoints = num_points / 8;
121 const double* inputVectorPtr = (
const double*)inputVector;
122 float* outputVectorPtr = outputVector;
124 __m256d inputVal1, inputVal2;
126 for (; number < oneEightPoints; number++) {
127 inputVal1 = _mm256_loadu_pd(inputVectorPtr);
129 inputVal2 = _mm256_loadu_pd(inputVectorPtr);
132 ret1 = _mm256_cvtpd_ps(inputVal1);
133 ret2 = _mm256_cvtpd_ps(inputVal2);
135 _mm_storeu_ps(outputVectorPtr, ret1);
136 outputVectorPtr += 4;
138 _mm_storeu_ps(outputVectorPtr, ret2);
139 outputVectorPtr += 4;
142 number = oneEightPoints * 8;
143 for (; number < num_points; number++) {
144 outputVector[number] = (float)(inputVector[number]);
151 #include <emmintrin.h> 154 const double* inputVector,
155 unsigned int num_points)
157 unsigned int number = 0;
159 const unsigned int quarterPoints = num_points / 4;
161 const double* inputVectorPtr = (
const double*)inputVector;
162 float* outputVectorPtr = outputVector;
164 __m128d inputVal1, inputVal2;
166 for (; number < quarterPoints; number++) {
167 inputVal1 = _mm_loadu_pd(inputVectorPtr);
169 inputVal2 = _mm_loadu_pd(inputVectorPtr);
172 ret = _mm_cvtpd_ps(inputVal1);
173 ret2 = _mm_cvtpd_ps(inputVal2);
175 ret = _mm_movelh_ps(ret, ret2);
177 _mm_storeu_ps(outputVectorPtr, ret);
178 outputVectorPtr += 4;
181 number = quarterPoints * 4;
182 for (; number < num_points; number++) {
183 outputVector[number] = (float)(inputVector[number]);
189 #ifdef LV_HAVE_GENERIC 192 const double* inputVector,
193 unsigned int num_points)
195 float* outputVectorPtr = outputVector;
196 const double* inputVectorPtr = inputVector;
197 unsigned int number = 0;
199 for (number = 0; number < num_points; number++) {
200 *outputVectorPtr++ = ((float)(*inputVectorPtr++));
207 #ifndef INCLUDED_volk_64f_convert_32f_a_H 208 #define INCLUDED_volk_64f_convert_32f_a_H 210 #include <inttypes.h> 213 #ifdef LV_HAVE_AVX512F 214 #include <immintrin.h> 216 static inline void volk_64f_convert_32f_a_avx512f(
float* outputVector,
217 const double* inputVector,
218 unsigned int num_points)
220 unsigned int number = 0;
222 const unsigned int oneSixteenthPoints = num_points / 16;
224 const double* inputVectorPtr = (
const double*)inputVector;
225 float* outputVectorPtr = outputVector;
227 __m512d inputVal1, inputVal2;
229 for (; number < oneSixteenthPoints; number++) {
230 inputVal1 = _mm512_load_pd(inputVectorPtr);
232 inputVal2 = _mm512_load_pd(inputVectorPtr);
235 ret1 = _mm512_cvtpd_ps(inputVal1);
236 ret2 = _mm512_cvtpd_ps(inputVal2);
238 _mm256_store_ps(outputVectorPtr, ret1);
239 outputVectorPtr += 8;
241 _mm256_store_ps(outputVectorPtr, ret2);
242 outputVectorPtr += 8;
245 number = oneSixteenthPoints * 16;
246 for (; number < num_points; number++) {
247 outputVector[number] = (float)(inputVector[number]);
254 #include <immintrin.h> 257 const double* inputVector,
258 unsigned int num_points)
260 unsigned int number = 0;
262 const unsigned int oneEightPoints = num_points / 8;
264 const double* inputVectorPtr = (
const double*)inputVector;
265 float* outputVectorPtr = outputVector;
267 __m256d inputVal1, inputVal2;
269 for (; number < oneEightPoints; number++) {
270 inputVal1 = _mm256_load_pd(inputVectorPtr);
272 inputVal2 = _mm256_load_pd(inputVectorPtr);
275 ret1 = _mm256_cvtpd_ps(inputVal1);
276 ret2 = _mm256_cvtpd_ps(inputVal2);
278 _mm_store_ps(outputVectorPtr, ret1);
279 outputVectorPtr += 4;
281 _mm_store_ps(outputVectorPtr, ret2);
282 outputVectorPtr += 4;
285 number = oneEightPoints * 8;
286 for (; number < num_points; number++) {
287 outputVector[number] = (float)(inputVector[number]);
294 #include <emmintrin.h> 297 const double* inputVector,
298 unsigned int num_points)
300 unsigned int number = 0;
302 const unsigned int quarterPoints = num_points / 4;
304 const double* inputVectorPtr = (
const double*)inputVector;
305 float* outputVectorPtr = outputVector;
307 __m128d inputVal1, inputVal2;
309 for (; number < quarterPoints; number++) {
310 inputVal1 = _mm_load_pd(inputVectorPtr);
312 inputVal2 = _mm_load_pd(inputVectorPtr);
315 ret = _mm_cvtpd_ps(inputVal1);
316 ret2 = _mm_cvtpd_ps(inputVal2);
318 ret = _mm_movelh_ps(ret, ret2);
320 _mm_store_ps(outputVectorPtr, ret);
321 outputVectorPtr += 4;
324 number = quarterPoints * 4;
325 for (; number < num_points; number++) {
326 outputVector[number] = (float)(inputVector[number]);
332 #ifdef LV_HAVE_GENERIC 335 const double* inputVector,
336 unsigned int num_points)
338 float* outputVectorPtr = outputVector;
339 const double* inputVectorPtr = inputVector;
340 unsigned int number = 0;
342 for (number = 0; number < num_points; number++) {
343 *outputVectorPtr++ = ((float)(*inputVectorPtr++));
static void volk_64f_convert_32f_a_generic(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:334
static void volk_64f_convert_32f_generic(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:191
static void volk_64f_convert_32f_a_avx(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:256
static void volk_64f_convert_32f_u_sse2(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:153
static void volk_64f_convert_32f_a_sse2(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:296
static void volk_64f_convert_32f_u_avx(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:113