26namespace FloatVectorHelpers
28 #define JUCE_INCREMENT_SRC_DEST dest += (16 / sizeof (*dest)); src += (16 / sizeof (*dest));
29 #define JUCE_INCREMENT_SRC1_SRC2_DEST dest += (16 / sizeof (*dest)); src1 += (16 / sizeof (*dest)); src2 += (16 / sizeof (*dest));
30 #define JUCE_INCREMENT_DEST dest += (16 / sizeof (*dest));
32 #if JUCE_USE_SSE_INTRINSICS
33 static bool isAligned (
const void* p)
noexcept
35 return (((pointer_sized_int) p) & 15) == 0;
41 using ParallelType = __m128;
42 using IntegerType = __m128;
43 enum { numParallel = 4 };
46 static forcedinline IntegerType toint (ParallelType v)
noexcept {
return v; }
47 static forcedinline ParallelType toflt (IntegerType v)
noexcept {
return v; }
49 static forcedinline ParallelType load1 (Type v)
noexcept {
return _mm_load1_ps (&v); }
50 static forcedinline ParallelType loadA (
const Type* v)
noexcept {
return _mm_load_ps (v); }
51 static forcedinline ParallelType loadU (
const Type* v)
noexcept {
return _mm_loadu_ps (v); }
52 static forcedinline
void storeA (Type* dest, ParallelType a)
noexcept { _mm_store_ps (dest, a); }
53 static forcedinline
void storeU (Type* dest, ParallelType a)
noexcept { _mm_storeu_ps (dest, a); }
55 static forcedinline ParallelType add (ParallelType a, ParallelType b)
noexcept {
return _mm_add_ps (a, b); }
56 static forcedinline ParallelType sub (ParallelType a, ParallelType b)
noexcept {
return _mm_sub_ps (a, b); }
57 static forcedinline ParallelType mul (ParallelType a, ParallelType b)
noexcept {
return _mm_mul_ps (a, b); }
58 static forcedinline ParallelType max (ParallelType a, ParallelType b)
noexcept {
return _mm_max_ps (a, b); }
59 static forcedinline ParallelType min (ParallelType a, ParallelType b)
noexcept {
return _mm_min_ps (a, b); }
61 static forcedinline ParallelType bit_and (ParallelType a, ParallelType b)
noexcept {
return _mm_and_ps (a, b); }
62 static forcedinline ParallelType bit_not (ParallelType a, ParallelType b)
noexcept {
return _mm_andnot_ps (a, b); }
63 static forcedinline ParallelType bit_or (ParallelType a, ParallelType b)
noexcept {
return _mm_or_ps (a, b); }
64 static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b)
noexcept {
return _mm_xor_ps (a, b); }
66 static forcedinline Type max (ParallelType a)
noexcept { Type v[numParallel]; storeU (v, a);
return jmax (v[0], v[1], v[2], v[3]); }
67 static forcedinline Type min (ParallelType a)
noexcept { Type v[numParallel]; storeU (v, a);
return jmin (v[0], v[1], v[2], v[3]); }
73 using ParallelType = __m128d;
74 using IntegerType = __m128d;
75 enum { numParallel = 2 };
78 static forcedinline IntegerType toint (ParallelType v)
noexcept {
return v; }
79 static forcedinline ParallelType toflt (IntegerType v)
noexcept {
return v; }
81 static forcedinline ParallelType load1 (Type v)
noexcept {
return _mm_load1_pd (&v); }
82 static forcedinline ParallelType loadA (
const Type* v)
noexcept {
return _mm_load_pd (v); }
83 static forcedinline ParallelType loadU (
const Type* v)
noexcept {
return _mm_loadu_pd (v); }
84 static forcedinline
void storeA (Type* dest, ParallelType a)
noexcept { _mm_store_pd (dest, a); }
85 static forcedinline
void storeU (Type* dest, ParallelType a)
noexcept { _mm_storeu_pd (dest, a); }
87 static forcedinline ParallelType add (ParallelType a, ParallelType b)
noexcept {
return _mm_add_pd (a, b); }
88 static forcedinline ParallelType sub (ParallelType a, ParallelType b)
noexcept {
return _mm_sub_pd (a, b); }
89 static forcedinline ParallelType mul (ParallelType a, ParallelType b)
noexcept {
return _mm_mul_pd (a, b); }
90 static forcedinline ParallelType max (ParallelType a, ParallelType b)
noexcept {
return _mm_max_pd (a, b); }
91 static forcedinline ParallelType min (ParallelType a, ParallelType b)
noexcept {
return _mm_min_pd (a, b); }
93 static forcedinline ParallelType bit_and (ParallelType a, ParallelType b)
noexcept {
return _mm_and_pd (a, b); }
94 static forcedinline ParallelType bit_not (ParallelType a, ParallelType b)
noexcept {
return _mm_andnot_pd (a, b); }
95 static forcedinline ParallelType bit_or (ParallelType a, ParallelType b)
noexcept {
return _mm_or_pd (a, b); }
96 static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b)
noexcept {
return _mm_xor_pd (a, b); }
98 static forcedinline Type max (ParallelType a)
noexcept { Type v[numParallel]; storeU (v, a);
return jmax (v[0], v[1]); }
99 static forcedinline Type min (ParallelType a)
noexcept { Type v[numParallel]; storeU (v, a);
return jmin (v[0], v[1]); }
104 #define JUCE_BEGIN_VEC_OP \
105 using Mode = FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode; \
107 const auto numLongOps = num / Mode::numParallel;
109 #define JUCE_FINISH_VEC_OP(normalOp) \
110 num &= (Mode::numParallel - 1); \
111 if (num == 0) return; \
113 for (auto i = (decltype (num)) 0; i < num; ++i) normalOp;
115 #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
118 if (FloatVectorHelpers::isAligned (dest)) JUCE_VEC_LOOP (vecOp, dummy, Mode::loadA, Mode::storeA, locals, JUCE_INCREMENT_DEST) \
119 else JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
120 JUCE_FINISH_VEC_OP (normalOp)
122 #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
125 if (FloatVectorHelpers::isAligned (dest)) \
127 if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
128 else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
132 if (FloatVectorHelpers::isAligned (src)) JUCE_VEC_LOOP (vecOp, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
133 else JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
135 JUCE_FINISH_VEC_OP (normalOp)
137 #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
140 if (FloatVectorHelpers::isAligned (dest)) \
142 if (FloatVectorHelpers::isAligned (src1)) \
144 if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
145 else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadU, Mode::storeA, locals, increment) \
149 if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
150 else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeA, locals, increment) \
155 if (FloatVectorHelpers::isAligned (src1)) \
157 if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadA, Mode::storeU, locals, increment) \
158 else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
162 if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadA, Mode::storeU, locals, increment) \
163 else JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
166 JUCE_FINISH_VEC_OP (normalOp)
168 #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
171 if (FloatVectorHelpers::isAligned (dest)) \
173 if (FloatVectorHelpers::isAligned (src1)) \
175 if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
176 else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
180 if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadA, Mode::loadA, Mode::storeA, locals, increment) \
181 else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadA, Mode::storeA, locals, increment) \
186 if (FloatVectorHelpers::isAligned (src1)) \
188 if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
189 else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadA, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
193 if (FloatVectorHelpers::isAligned (src2)) JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadA, Mode::loadU, Mode::storeU, locals, increment) \
194 else JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
197 JUCE_FINISH_VEC_OP (normalOp)
201 #elif JUCE_USE_ARM_NEON
206 using ParallelType = float32x4_t;
207 using IntegerType = uint32x4_t;
208 union signMaskUnion { ParallelType f; IntegerType i; };
209 enum { numParallel = 4 };
211 static forcedinline IntegerType toint (ParallelType v)
noexcept { signMaskUnion u; u.f = v;
return u.i; }
212 static forcedinline ParallelType toflt (IntegerType v)
noexcept { signMaskUnion u; u.i = v;
return u.f; }
214 static forcedinline ParallelType load1 (Type v)
noexcept {
return vld1q_dup_f32 (&v); }
215 static forcedinline ParallelType loadA (
const Type* v)
noexcept {
return vld1q_f32 (v); }
216 static forcedinline ParallelType loadU (
const Type* v)
noexcept {
return vld1q_f32 (v); }
217 static forcedinline
void storeA (Type* dest, ParallelType a)
noexcept { vst1q_f32 (dest, a); }
218 static forcedinline
void storeU (Type* dest, ParallelType a)
noexcept { vst1q_f32 (dest, a); }
220 static forcedinline ParallelType add (ParallelType a, ParallelType b)
noexcept {
return vaddq_f32 (a, b); }
221 static forcedinline ParallelType sub (ParallelType a, ParallelType b)
noexcept {
return vsubq_f32 (a, b); }
222 static forcedinline ParallelType mul (ParallelType a, ParallelType b)
noexcept {
return vmulq_f32 (a, b); }
223 static forcedinline ParallelType max (ParallelType a, ParallelType b)
noexcept {
return vmaxq_f32 (a, b); }
224 static forcedinline ParallelType min (ParallelType a, ParallelType b)
noexcept {
return vminq_f32 (a, b); }
226 static forcedinline ParallelType bit_and (ParallelType a, ParallelType b)
noexcept {
return toflt (vandq_u32 (toint (a), toint (b))); }
227 static forcedinline ParallelType bit_not (ParallelType a, ParallelType b)
noexcept {
return toflt (vbicq_u32 (toint (a), toint (b))); }
228 static forcedinline ParallelType bit_or (ParallelType a, ParallelType b)
noexcept {
return toflt (vorrq_u32 (toint (a), toint (b))); }
229 static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b)
noexcept {
return toflt (veorq_u32 (toint (a), toint (b))); }
231 static forcedinline Type max (ParallelType a)
noexcept { Type v[numParallel]; storeU (v, a);
return jmax (v[0], v[1], v[2], v[3]); }
232 static forcedinline Type min (ParallelType a)
noexcept { Type v[numParallel]; storeU (v, a);
return jmin (v[0], v[1], v[2], v[3]); }
238 using ParallelType = double;
239 using IntegerType = uint64;
240 union signMaskUnion { ParallelType f; IntegerType i; };
241 enum { numParallel = 1 };
243 static forcedinline IntegerType toint (ParallelType v)
noexcept { signMaskUnion u; u.f = v;
return u.i; }
244 static forcedinline ParallelType toflt (IntegerType v)
noexcept { signMaskUnion u; u.i = v;
return u.f; }
246 static forcedinline ParallelType load1 (Type v)
noexcept {
return v; }
247 static forcedinline ParallelType loadA (
const Type* v)
noexcept {
return *v; }
248 static forcedinline ParallelType loadU (
const Type* v)
noexcept {
return *v; }
249 static forcedinline
void storeA (Type* dest, ParallelType a)
noexcept { *dest = a; }
250 static forcedinline
void storeU (Type* dest, ParallelType a)
noexcept { *dest = a; }
252 static forcedinline ParallelType add (ParallelType a, ParallelType b)
noexcept {
return a + b; }
253 static forcedinline ParallelType sub (ParallelType a, ParallelType b)
noexcept {
return a - b; }
254 static forcedinline ParallelType mul (ParallelType a, ParallelType b)
noexcept {
return a * b; }
255 static forcedinline ParallelType max (ParallelType a, ParallelType b)
noexcept {
return jmax (a, b); }
256 static forcedinline ParallelType min (ParallelType a, ParallelType b)
noexcept {
return jmin (a, b); }
258 static forcedinline ParallelType bit_and (ParallelType a, ParallelType b)
noexcept {
return toflt (toint (a) & toint (b)); }
259 static forcedinline ParallelType bit_not (ParallelType a, ParallelType b)
noexcept {
return toflt ((~toint (a)) & toint (b)); }
260 static forcedinline ParallelType bit_or (ParallelType a, ParallelType b)
noexcept {
return toflt (toint (a) | toint (b)); }
261 static forcedinline ParallelType bit_xor (ParallelType a, ParallelType b)
noexcept {
return toflt (toint (a) ^ toint (b)); }
263 static forcedinline Type max (ParallelType a)
noexcept {
return a; }
264 static forcedinline Type min (ParallelType a)
noexcept {
return a; }
267 #define JUCE_BEGIN_VEC_OP \
268 using Mode = FloatVectorHelpers::ModeType<sizeof(*dest)>::Mode; \
269 if (Mode::numParallel > 1) \
271 const auto numLongOps = num / Mode::numParallel;
273 #define JUCE_FINISH_VEC_OP(normalOp) \
274 num &= (Mode::numParallel - 1); \
275 if (num == 0) return; \
277 for (auto i = (decltype (num)) 0; i < num; ++i) normalOp;
279 #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
282 JUCE_VEC_LOOP (vecOp, dummy, Mode::loadU, Mode::storeU, locals, JUCE_INCREMENT_DEST) \
283 JUCE_FINISH_VEC_OP (normalOp)
285 #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
288 JUCE_VEC_LOOP (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
289 JUCE_FINISH_VEC_OP (normalOp)
291 #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
294 JUCE_VEC_LOOP_TWO_SOURCES (vecOp, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
295 JUCE_FINISH_VEC_OP (normalOp)
297 #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
300 JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD (vecOp, Mode::loadU, Mode::loadU, Mode::loadU, Mode::storeU, locals, increment) \
301 JUCE_FINISH_VEC_OP (normalOp)
306 #define JUCE_PERFORM_VEC_OP_DEST(normalOp, vecOp, locals, setupOp) \
307 for (auto i = (decltype (num)) 0; i < num; ++i) normalOp;
309 #define JUCE_PERFORM_VEC_OP_SRC_DEST(normalOp, vecOp, locals, increment, setupOp) \
310 for (auto i = (decltype (num)) 0; i < num; ++i) normalOp;
312 #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST(normalOp, vecOp, locals, increment, setupOp) \
313 for (auto i = (decltype (num)) 0; i < num; ++i) normalOp;
315 #define JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST(normalOp, vecOp, locals, increment, setupOp) \
316 for (auto i = (decltype (num)) 0; i < num; ++i) normalOp;
321 #define JUCE_VEC_LOOP(vecOp, srcLoad, dstLoad, dstStore, locals, increment) \
322 for (auto i = (decltype (numLongOps)) 0; i < numLongOps; ++i) \
324 locals (srcLoad, dstLoad); \
325 dstStore (dest, vecOp); \
329 #define JUCE_VEC_LOOP_TWO_SOURCES(vecOp, src1Load, src2Load, dstStore, locals, increment) \
330 for (auto i = (decltype (numLongOps)) 0; i < numLongOps; ++i) \
332 locals (src1Load, src2Load); \
333 dstStore (dest, vecOp); \
337 #define JUCE_VEC_LOOP_TWO_SOURCES_WITH_DEST_LOAD(vecOp, src1Load, src2Load, dstLoad, dstStore, locals, increment) \
338 for (auto i = (decltype (numLongOps)) 0; i < numLongOps; ++i) \
340 locals (src1Load, src2Load, dstLoad); \
341 dstStore (dest, vecOp); \
345 #define JUCE_LOAD_NONE(srcLoad, dstLoad)
346 #define JUCE_LOAD_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest);
347 #define JUCE_LOAD_SRC(srcLoad, dstLoad) const Mode::ParallelType s = srcLoad (src);
348 #define JUCE_LOAD_SRC1_SRC2(src1Load, src2Load) const Mode::ParallelType s1 = src1Load (src1), s2 = src2Load (src2);
349 #define JUCE_LOAD_SRC1_SRC2_DEST(src1Load, src2Load, dstLoad) const Mode::ParallelType d = dstLoad (dest), s1 = src1Load (src1), s2 = src2Load (src2);
350 #define JUCE_LOAD_SRC_DEST(srcLoad, dstLoad) const Mode::ParallelType d = dstLoad (dest), s = srcLoad (src);
352 union signMask32 {
float f; uint32 i; };
353 union signMask64 {
double d; uint64 i; };
355 #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
356 template <
int typeSize>
struct ModeType {
using Mode = BasicOps32; };
357 template <>
struct ModeType<8> {
using Mode = BasicOps64; };
359 template <
typename Mode>
362 using Type =
typename Mode::Type;
363 using ParallelType =
typename Mode::ParallelType;
365 template <
typename Size>
366 static Type findMinOrMax (
const Type* src, Size num,
const bool isMinimum)
noexcept
368 auto numLongOps = num / Mode::numParallel;
374 #if ! JUCE_USE_ARM_NEON
377 val = Mode::loadA (src);
381 while (--numLongOps > 0)
383 src += Mode::numParallel;
384 val = Mode::min (val, Mode::loadA (src));
389 while (--numLongOps > 0)
391 src += Mode::numParallel;
392 val = Mode::max (val, Mode::loadA (src));
399 val = Mode::loadU (src);
403 while (--numLongOps > 0)
405 src += Mode::numParallel;
406 val = Mode::min (val, Mode::loadU (src));
411 while (--numLongOps > 0)
413 src += Mode::numParallel;
414 val = Mode::max (val, Mode::loadU (src));
419 Type result = isMinimum ? Mode::min (val)
422 num &= (Mode::numParallel - 1);
423 src += Mode::numParallel;
425 for (
auto i = (
decltype (num)) 0; i < num; ++i)
426 result = isMinimum ? jmin (result, src[i])
427 : jmax (result, src[i]);
435 return isMinimum ? *std::min_element (src, src + num)
436 : *std::max_element (src, src + num);
439 template <
typename Size>
440 static Range<Type> findMinAndMax (
const Type* src, Size num)
noexcept
442 auto numLongOps = num / Mode::numParallel;
448 #if ! JUCE_USE_ARM_NEON
451 mn = Mode::loadA (src);
454 while (--numLongOps > 0)
456 src += Mode::numParallel;
457 const ParallelType v = Mode::loadA (src);
458 mn = Mode::min (mn, v);
459 mx = Mode::max (mx, v);
465 mn = Mode::loadU (src);
468 while (--numLongOps > 0)
470 src += Mode::numParallel;
471 const ParallelType v = Mode::loadU (src);
472 mn = Mode::min (mn, v);
473 mx = Mode::max (mx, v);
477 Range<Type> result (Mode::min (mn),
480 num &= (Mode::numParallel - 1);
481 src += Mode::numParallel;
483 for (
auto i = (
decltype (num)) 0; i < num; ++i)
484 result = result.getUnionWith (src[i]);
497 template <
typename Size>
498 void clear (
float* dest, Size num)
noexcept
500 #if JUCE_USE_VDSP_FRAMEWORK
501 vDSP_vclr (dest, 1, (vDSP_Length) num);
503 zeromem (dest, (
size_t) num *
sizeof (
float));
507 template <
typename Size>
508 void clear (
double* dest, Size num)
noexcept
510 #if JUCE_USE_VDSP_FRAMEWORK
511 vDSP_vclrD (dest, 1, (vDSP_Length) num);
513 zeromem (dest, (
size_t) num *
sizeof (
double));
517 template <
typename Size>
518 void fill (
float* dest,
float valueToFill, Size num)
noexcept
520 #if JUCE_USE_VDSP_FRAMEWORK
521 vDSP_vfill (&valueToFill, dest, 1, (vDSP_Length) num);
523 JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill,
526 const Mode::ParallelType val = Mode::load1 (valueToFill);)
530 template <
typename Size>
531 void fill (
double* dest,
double valueToFill, Size num)
noexcept
533 #if JUCE_USE_VDSP_FRAMEWORK
534 vDSP_vfillD (&valueToFill, dest, 1, (vDSP_Length) num);
536 JUCE_PERFORM_VEC_OP_DEST (dest[i] = valueToFill,
539 const Mode::ParallelType val = Mode::load1 (valueToFill);)
543 template <
typename Size>
544 void copyWithMultiply (
float* dest,
const float* src,
float multiplier, Size num)
noexcept
546 #if JUCE_USE_VDSP_FRAMEWORK
547 vDSP_vsmul (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
549 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
552 JUCE_INCREMENT_SRC_DEST,
553 const Mode::ParallelType mult = Mode::load1 (multiplier);)
557 template <
typename Size>
558 void copyWithMultiply (
double* dest,
const double* src,
double multiplier, Size num)
noexcept
560 #if JUCE_USE_VDSP_FRAMEWORK
561 vDSP_vsmulD (src, 1, &multiplier, dest, 1, (vDSP_Length) num);
563 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
566 JUCE_INCREMENT_SRC_DEST,
567 const Mode::ParallelType mult = Mode::load1 (multiplier);)
571 template <
typename Size>
572 void add (
float* dest,
float amount, Size num)
noexcept
574 #if JUCE_USE_VDSP_FRAMEWORK
575 vDSP_vsadd (dest, 1, &amount, dest, 1, (vDSP_Length) num);
577 JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount,
578 Mode::add (d, amountToAdd),
580 const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
584 template <
typename Size>
585 void add (
double* dest,
double amount, Size num)
noexcept
587 JUCE_PERFORM_VEC_OP_DEST (dest[i] += amount,
588 Mode::add (d, amountToAdd),
590 const Mode::ParallelType amountToAdd = Mode::load1 (amount);)
593 template <
typename Size>
594 void add (
float* dest,
const float* src,
float amount, Size num)
noexcept
596 #if JUCE_USE_VDSP_FRAMEWORK
597 vDSP_vsadd (src, 1, &amount, dest, 1, (vDSP_Length) num);
599 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] + amount,
602 JUCE_INCREMENT_SRC_DEST,
603 const Mode::ParallelType am = Mode::load1 (amount);)
607 template <
typename Size>
608 void add (
double* dest,
const double* src,
double amount, Size num)
noexcept
610 #if JUCE_USE_VDSP_FRAMEWORK
611 vDSP_vsaddD (src, 1, &amount, dest, 1, (vDSP_Length) num);
613 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] + amount,
616 JUCE_INCREMENT_SRC_DEST,
617 const Mode::ParallelType am = Mode::load1 (amount);)
621 template <
typename Size>
622 void add (
float* dest,
const float* src, Size num)
noexcept
624 #if JUCE_USE_VDSP_FRAMEWORK
625 vDSP_vadd (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
627 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i],
630 JUCE_INCREMENT_SRC_DEST, )
634 template <
typename Size>
635 void add (
double* dest,
const double* src, Size num)
noexcept
637 #if JUCE_USE_VDSP_FRAMEWORK
638 vDSP_vaddD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
640 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i],
643 JUCE_INCREMENT_SRC_DEST, )
647 template <
typename Size>
648 void add (
float* dest,
const float* src1,
const float* src2, Size num)
noexcept
650 #if JUCE_USE_VDSP_FRAMEWORK
651 vDSP_vadd (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
653 JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] + src2[i],
656 JUCE_INCREMENT_SRC1_SRC2_DEST, )
660 template <
typename Size>
661 void add (
double* dest,
const double* src1,
const double* src2, Size num)
noexcept
663 #if JUCE_USE_VDSP_FRAMEWORK
664 vDSP_vaddD (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
666 JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] + src2[i],
669 JUCE_INCREMENT_SRC1_SRC2_DEST, )
673 template <
typename Size>
674 void subtract (
float* dest,
const float* src, Size num)
noexcept
676 #if JUCE_USE_VDSP_FRAMEWORK
677 vDSP_vsub (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
679 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i],
682 JUCE_INCREMENT_SRC_DEST, )
686 template <
typename Size>
687 void subtract (
double* dest,
const double* src, Size num)
noexcept
689 #if JUCE_USE_VDSP_FRAMEWORK
690 vDSP_vsubD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
692 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i],
695 JUCE_INCREMENT_SRC_DEST, )
699 template <
typename Size>
700 void subtract (
float* dest,
const float* src1,
const float* src2, Size num)
noexcept
702 #if JUCE_USE_VDSP_FRAMEWORK
703 vDSP_vsub (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
705 JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] - src2[i],
708 JUCE_INCREMENT_SRC1_SRC2_DEST, )
712 template <
typename Size>
713 void subtract (
double* dest,
const double* src1,
const double* src2, Size num)
noexcept
715 #if JUCE_USE_VDSP_FRAMEWORK
716 vDSP_vsubD (src2, 1, src1, 1, dest, 1, (vDSP_Length) num);
718 JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] - src2[i],
721 JUCE_INCREMENT_SRC1_SRC2_DEST, )
725 template <
typename Size>
726 void addWithMultiply (
float* dest,
const float* src,
float multiplier, Size num)
noexcept
728 #if JUCE_USE_VDSP_FRAMEWORK
729 vDSP_vsma (src, 1, &multiplier, dest, 1, dest, 1, (vDSP_Length) num);
731 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier,
732 Mode::add (d, Mode::mul (mult, s)),
734 JUCE_INCREMENT_SRC_DEST,
735 const Mode::ParallelType mult = Mode::load1 (multiplier);)
739 template <
typename Size>
740 void addWithMultiply (
double* dest,
const double* src,
double multiplier, Size num)
noexcept
742 #if JUCE_USE_VDSP_FRAMEWORK
743 vDSP_vsmaD (src, 1, &multiplier, dest, 1, dest, 1, (vDSP_Length) num);
745 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] += src[i] * multiplier,
746 Mode::add (d, Mode::mul (mult, s)),
748 JUCE_INCREMENT_SRC_DEST,
749 const Mode::ParallelType mult = Mode::load1 (multiplier);)
753 template <
typename Size>
754 void addWithMultiply (
float* dest,
const float* src1,
const float* src2, Size num)
noexcept
756 #if JUCE_USE_VDSP_FRAMEWORK
757 vDSP_vma ((
float*) src1, 1, (
float*) src2, 1, dest, 1, dest, 1, (vDSP_Length) num);
759 JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] += src1[i] * src2[i],
760 Mode::add (d, Mode::mul (s1, s2)),
761 JUCE_LOAD_SRC1_SRC2_DEST,
762 JUCE_INCREMENT_SRC1_SRC2_DEST, )
766 template <
typename Size>
767 void addWithMultiply (
double* dest,
const double* src1,
const double* src2, Size num)
noexcept
769 #if JUCE_USE_VDSP_FRAMEWORK
770 vDSP_vmaD ((
double*) src1, 1, (
double*) src2, 1, dest, 1, dest, 1, (vDSP_Length) num);
772 JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] += src1[i] * src2[i],
773 Mode::add (d, Mode::mul (s1, s2)),
774 JUCE_LOAD_SRC1_SRC2_DEST,
775 JUCE_INCREMENT_SRC1_SRC2_DEST, )
779 template <
typename Size>
780 void subtractWithMultiply (
float* dest,
const float* src,
float multiplier, Size num)
noexcept
782 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i] * multiplier,
783 Mode::sub (d, Mode::mul (mult, s)),
785 JUCE_INCREMENT_SRC_DEST,
786 const Mode::ParallelType mult = Mode::load1 (multiplier);)
789 template <
typename Size>
790 void subtractWithMultiply (
double* dest,
const double* src,
double multiplier, Size num)
noexcept
792 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] -= src[i] * multiplier,
793 Mode::sub (d, Mode::mul (mult, s)),
795 JUCE_INCREMENT_SRC_DEST,
796 const Mode::ParallelType mult = Mode::load1 (multiplier);)
799 template <
typename Size>
800 void subtractWithMultiply (
float* dest,
const float* src1,
const float* src2, Size num)
noexcept
802 JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] -= src1[i] * src2[i],
803 Mode::sub (d, Mode::mul (s1, s2)),
804 JUCE_LOAD_SRC1_SRC2_DEST,
805 JUCE_INCREMENT_SRC1_SRC2_DEST, )
808 template <
typename Size>
809 void subtractWithMultiply (
double* dest,
const double* src1,
const double* src2, Size num)
noexcept
811 JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST_DEST (dest[i] -= src1[i] * src2[i],
812 Mode::sub (d, Mode::mul (s1, s2)),
813 JUCE_LOAD_SRC1_SRC2_DEST,
814 JUCE_INCREMENT_SRC1_SRC2_DEST, )
817 template <
typename Size>
818 void multiply (
float* dest,
const float* src, Size num)
noexcept
820 #if JUCE_USE_VDSP_FRAMEWORK
821 vDSP_vmul (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
823 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i],
826 JUCE_INCREMENT_SRC_DEST, )
830 template <
typename Size>
831 void multiply (
double* dest,
const double* src, Size num)
noexcept
833 #if JUCE_USE_VDSP_FRAMEWORK
834 vDSP_vmulD (src, 1, dest, 1, dest, 1, (vDSP_Length) num);
836 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] *= src[i],
839 JUCE_INCREMENT_SRC_DEST, )
843 template <
typename Size>
844 void multiply (
float* dest,
const float* src1,
const float* src2, Size num)
noexcept
846 #if JUCE_USE_VDSP_FRAMEWORK
847 vDSP_vmul (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
849 JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] * src2[i],
852 JUCE_INCREMENT_SRC1_SRC2_DEST, )
856 template <
typename Size>
857 void multiply (
double* dest,
const double* src1,
const double* src2, Size num)
noexcept
859 #if JUCE_USE_VDSP_FRAMEWORK
860 vDSP_vmulD (src1, 1, src2, 1, dest, 1, (vDSP_Length) num);
862 JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = src1[i] * src2[i],
865 JUCE_INCREMENT_SRC1_SRC2_DEST, )
869 template <
typename Size>
870 void multiply (
float* dest,
float multiplier, Size num)
noexcept
872 #if JUCE_USE_VDSP_FRAMEWORK
873 vDSP_vsmul (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
875 JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier,
878 const Mode::ParallelType mult = Mode::load1 (multiplier);)
882 template <
typename Size>
883 void multiply (
double* dest,
double multiplier, Size num)
noexcept
885 #if JUCE_USE_VDSP_FRAMEWORK
886 vDSP_vsmulD (dest, 1, &multiplier, dest, 1, (vDSP_Length) num);
888 JUCE_PERFORM_VEC_OP_DEST (dest[i] *= multiplier,
891 const Mode::ParallelType mult = Mode::load1 (multiplier);)
895 template <
typename Size>
896 void multiply (
float* dest,
const float* src,
float multiplier, Size num)
noexcept
898 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
901 JUCE_INCREMENT_SRC_DEST,
902 const Mode::ParallelType mult = Mode::load1 (multiplier);)
905 template <
typename Size>
906 void multiply (
double* dest,
const double* src,
double multiplier, Size num)
noexcept
908 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = src[i] * multiplier,
911 JUCE_INCREMENT_SRC_DEST,
912 const Mode::ParallelType mult = Mode::load1 (multiplier);)
915 template <
typename Size>
916 void negate (
float* dest,
const float* src, Size num)
noexcept
918 #if JUCE_USE_VDSP_FRAMEWORK
919 vDSP_vneg ((
float*) src, 1, dest, 1, (vDSP_Length) num);
921 copyWithMultiply (dest, src, -1.0f, num);
925 template <
typename Size>
926 void negate (
double* dest,
const double* src, Size num)
noexcept
928 #if JUCE_USE_VDSP_FRAMEWORK
929 vDSP_vnegD ((
double*) src, 1, dest, 1, (vDSP_Length) num);
931 copyWithMultiply (dest, src, -1.0f, num);
935 template <
typename Size>
936 void abs (
float* dest,
const float* src, Size num)
noexcept
938 #if JUCE_USE_VDSP_FRAMEWORK
939 vDSP_vabs ((
float*) src, 1, dest, 1, (vDSP_Length) num);
941 [[maybe_unused]] FloatVectorHelpers::signMask32 signMask;
942 signMask.i = 0x7fffffffUL;
943 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = std::abs (src[i]),
944 Mode::bit_and (s, mask),
946 JUCE_INCREMENT_SRC_DEST,
947 const Mode::ParallelType mask = Mode::load1 (signMask.f);)
951 template <
typename Size>
952 void abs (
double* dest,
const double* src, Size num)
noexcept
954 #if JUCE_USE_VDSP_FRAMEWORK
955 vDSP_vabsD ((
double*) src, 1, dest, 1, (vDSP_Length) num);
957 [[maybe_unused]] FloatVectorHelpers::signMask64 signMask;
958 signMask.i = 0x7fffffffffffffffULL;
960 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = std::abs (src[i]),
961 Mode::bit_and (s, mask),
963 JUCE_INCREMENT_SRC_DEST,
964 const Mode::ParallelType mask = Mode::load1 (signMask.d);)
968 template <
typename Size>
969 void min (
float* dest,
const float* src,
float comp, Size num)
noexcept
971 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmin (src[i], comp),
974 JUCE_INCREMENT_SRC_DEST,
975 const Mode::ParallelType cmp = Mode::load1 (comp);)
978 template <
typename Size>
979 void min (
double* dest,
const double* src,
double comp, Size num)
noexcept
981 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmin (src[i], comp),
984 JUCE_INCREMENT_SRC_DEST,
985 const Mode::ParallelType cmp = Mode::load1 (comp);)
988 template <
typename Size>
989 void min (
float* dest,
const float* src1,
const float* src2, Size num)
noexcept
991 #if JUCE_USE_VDSP_FRAMEWORK
992 vDSP_vmin ((
float*) src1, 1, (
float*) src2, 1, dest, 1, (vDSP_Length) num);
994 JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmin (src1[i], src2[i]),
997 JUCE_INCREMENT_SRC1_SRC2_DEST, )
1001 template <
typename Size>
1002 void min (
double* dest,
const double* src1,
const double* src2, Size num)
noexcept
1004 #if JUCE_USE_VDSP_FRAMEWORK
1005 vDSP_vminD ((
double*) src1, 1, (
double*) src2, 1, dest, 1, (vDSP_Length) num);
1007 JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmin (src1[i], src2[i]),
1009 JUCE_LOAD_SRC1_SRC2,
1010 JUCE_INCREMENT_SRC1_SRC2_DEST, )
1014 template <
typename Size>
1015 void max (
float* dest,
const float* src,
float comp, Size num)
noexcept
1017 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (src[i], comp),
1020 JUCE_INCREMENT_SRC_DEST,
1021 const Mode::ParallelType cmp = Mode::load1 (comp);)
1024 template <
typename Size>
1025 void max (
double* dest,
const double* src,
double comp, Size num)
noexcept
1027 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (src[i], comp),
1030 JUCE_INCREMENT_SRC_DEST,
1031 const Mode::ParallelType cmp = Mode::load1 (comp);)
1034 template <
typename Size>
1035 void max (
float* dest,
const float* src1,
const float* src2, Size num)
noexcept
1037 #if JUCE_USE_VDSP_FRAMEWORK
1038 vDSP_vmax ((
float*) src1, 1, (
float*) src2, 1, dest, 1, (vDSP_Length) num);
1040 JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmax (src1[i], src2[i]),
1042 JUCE_LOAD_SRC1_SRC2,
1043 JUCE_INCREMENT_SRC1_SRC2_DEST, )
1047 template <
typename Size>
1048 void max (
double* dest,
const double* src1,
const double* src2, Size num)
noexcept
1050 #if JUCE_USE_VDSP_FRAMEWORK
1051 vDSP_vmaxD ((
double*) src1, 1, (
double*) src2, 1, dest, 1, (vDSP_Length) num);
1053 JUCE_PERFORM_VEC_OP_SRC1_SRC2_DEST (dest[i] = jmax (src1[i], src2[i]),
1055 JUCE_LOAD_SRC1_SRC2,
1056 JUCE_INCREMENT_SRC1_SRC2_DEST, )
1060 template <
typename Size>
1061 void clip (
float* dest,
const float* src,
float low,
float high, Size num)
noexcept
1063 jassert (high >= low);
1065 #if JUCE_USE_VDSP_FRAMEWORK
1066 vDSP_vclip ((
float*) src, 1, &low, &high, dest, 1, (vDSP_Length) num);
1068 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (jmin (src[i], high), low),
1069 Mode::max (Mode::min (s, hi), lo),
1071 JUCE_INCREMENT_SRC_DEST,
1072 const Mode::ParallelType lo = Mode::load1 (low);
1073 const Mode::ParallelType hi = Mode::load1 (high);)
1077 template <
typename Size>
1078 void clip (
double* dest,
const double* src,
double low,
double high, Size num)
noexcept
1080 jassert (high >= low);
1082 #if JUCE_USE_VDSP_FRAMEWORK
1083 vDSP_vclipD ((
double*) src, 1, &low, &high, dest, 1, (vDSP_Length) num);
1085 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = jmax (jmin (src[i], high), low),
1086 Mode::max (Mode::min (s, hi), lo),
1088 JUCE_INCREMENT_SRC_DEST,
1089 const Mode::ParallelType lo = Mode::load1 (low);
1090 const Mode::ParallelType hi = Mode::load1 (high);)
1094 template <
typename Size>
1095 Range<float> findMinAndMax (
const float* src, Size num)
noexcept
1097 #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
1098 return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinAndMax (src, num);
1104 template <
typename Size>
1105 Range<double> findMinAndMax (
const double* src, Size num)
noexcept
1107 #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
1108 return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinAndMax (src, num);
1114 template <
typename Size>
1115 float findMinimum (
const float* src, Size num)
noexcept
1117 #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
1118 return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num,
true);
1120 return juce::findMinimum (src, num);
1124 template <
typename Size>
1125 double findMinimum (
const double* src, Size num)
noexcept
1127 #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
1128 return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num,
true);
1130 return juce::findMinimum (src, num);
1134 template <
typename Size>
1135 float findMaximum (
const float* src, Size num)
noexcept
1137 #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
1138 return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps32>::findMinOrMax (src, num,
false);
1140 return juce::findMaximum (src, num);
1144 template <
typename Size>
1145 double findMaximum (
const double* src, Size num)
noexcept
1147 #if JUCE_USE_SSE_INTRINSICS || JUCE_USE_ARM_NEON
1148 return FloatVectorHelpers::MinMax<FloatVectorHelpers::BasicOps64>::findMinOrMax (src, num,
false);
1150 return juce::findMaximum (src, num);
1154 template <
typename Size>
1155 void convertFixedToFloat (
float* dest,
const int* src,
float multiplier, Size num)
noexcept
1157 #if JUCE_USE_ARM_NEON
1158 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = (
float) src[i] * multiplier,
1159 vmulq_n_f32 (vcvtq_f32_s32 (vld1q_s32 (src)), multiplier),
1161 JUCE_INCREMENT_SRC_DEST, )
1163 JUCE_PERFORM_VEC_OP_SRC_DEST (dest[i] = (
float) src[i] * multiplier,
1164 Mode::mul (mult, _mm_cvtepi32_ps (_mm_loadu_si128 (
reinterpret_cast<const __m128i*
> (src)))),
1166 JUCE_INCREMENT_SRC_DEST,
1167 const Mode::ParallelType mult = Mode::load1 (multiplier);)
1175template <
typename FloatType,
typename CountType>
1177 CountType numValues)
noexcept
1179 FloatVectorHelpers::clear (dest, numValues);
1182template <
typename FloatType,
typename CountType>
1184 FloatType valueToFill,
1185 CountType numValues)
noexcept
1187 FloatVectorHelpers::fill (dest, valueToFill, numValues);
1190template <
typename FloatType,
typename CountType>
1192 const FloatType* src,
1193 CountType numValues)
noexcept
1195 memcpy (dest, src, (
size_t) numValues *
sizeof (FloatType));
1198template <
typename FloatType,
typename CountType>
1200 const FloatType* src,
1201 FloatType multiplier,
1202 CountType numValues)
noexcept
1204 FloatVectorHelpers::copyWithMultiply (dest, src, multiplier, numValues);
1207template <
typename FloatType,
typename CountType>
1209 FloatType amountToAdd,
1210 CountType numValues)
noexcept
1212 FloatVectorHelpers::add (dest, amountToAdd, numValues);
1215template <
typename FloatType,
typename CountType>
1217 const FloatType* src,
1219 CountType numValues)
noexcept
1221 FloatVectorHelpers::add (dest, src, amount, numValues);
1224template <
typename FloatType,
typename CountType>
1226 const FloatType* src,
1227 CountType numValues)
noexcept
1229 FloatVectorHelpers::add (dest, src, numValues);
1232template <
typename FloatType,
typename CountType>
1234 const FloatType* src1,
1235 const FloatType* src2,
1236 CountType num)
noexcept
1238 FloatVectorHelpers::add (dest, src1, src2, num);
1241template <
typename FloatType,
typename CountType>
1243 const FloatType* src,
1244 CountType numValues)
noexcept
1246 FloatVectorHelpers::subtract (dest, src, numValues);
1249template <
typename FloatType,
typename CountType>
1251 const FloatType* src1,
1252 const FloatType* src2,
1253 CountType num)
noexcept
1255 FloatVectorHelpers::subtract (dest, src1, src2, num);
1258template <
typename FloatType,
typename CountType>
1260 const FloatType* src,
1261 FloatType multiplier,
1262 CountType numValues)
noexcept
1264 FloatVectorHelpers::addWithMultiply (dest, src, multiplier, numValues);
1267template <
typename FloatType,
typename CountType>
1269 const FloatType* src1,
1270 const FloatType* src2,
1271 CountType num)
noexcept
1273 FloatVectorHelpers::addWithMultiply (dest, src1, src2, num);
1276template <
typename FloatType,
typename CountType>
1278 const FloatType* src,
1279 FloatType multiplier,
1280 CountType numValues)
noexcept
1282 FloatVectorHelpers::subtractWithMultiply (dest, src, multiplier, numValues);
1285template <
typename FloatType,
typename CountType>
1287 const FloatType* src1,
1288 const FloatType* src2,
1289 CountType num)
noexcept
1291 FloatVectorHelpers::subtractWithMultiply (dest, src1, src2, num);
1294template <
typename FloatType,
typename CountType>
1296 const FloatType* src,
1297 CountType numValues)
noexcept
1299 FloatVectorHelpers::multiply (dest, src, numValues);
1302template <
typename FloatType,
typename CountType>
1304 const FloatType* src1,
1305 const FloatType* src2,
1306 CountType numValues)
noexcept
1308 FloatVectorHelpers::multiply (dest, src1, src2, numValues);
1311template <
typename FloatType,
typename CountType>
1313 FloatType multiplier,
1314 CountType numValues)
noexcept
1316 FloatVectorHelpers::multiply (dest, multiplier, numValues);
1319template <
typename FloatType,
typename CountType>
1321 const FloatType* src,
1322 FloatType multiplier,
1323 CountType num)
noexcept
1325 FloatVectorHelpers::multiply (dest, src, multiplier, num);
1328template <
typename FloatType,
typename CountType>
1330 const FloatType* src,
1331 CountType numValues)
noexcept
1333 FloatVectorHelpers::negate (dest, src, numValues);
1336template <
typename FloatType,
typename CountType>
1338 const FloatType* src,
1339 CountType numValues)
noexcept
1341 FloatVectorHelpers::abs (dest, src, numValues);
1344template <
typename FloatType,
typename CountType>
1346 const FloatType* src,
1348 CountType num)
noexcept
1350 FloatVectorHelpers::min (dest, src, comp, num);
1353template <
typename FloatType,
typename CountType>
1355 const FloatType* src1,
1356 const FloatType* src2,
1357 CountType num)
noexcept
1359 FloatVectorHelpers::min (dest, src1, src2, num);
1362template <
typename FloatType,
typename CountType>
1364 const FloatType* src,
1366 CountType num)
noexcept
1368 FloatVectorHelpers::max (dest, src, comp, num);
1371template <
typename FloatType,
typename CountType>
1373 const FloatType* src1,
1374 const FloatType* src2,
1375 CountType num)
noexcept
1377 FloatVectorHelpers::max (dest, src1, src2, num);
1380template <
typename FloatType,
typename CountType>
1382 const FloatType* src,
1385 CountType num)
noexcept
1387 FloatVectorHelpers::clip (dest, src, low, high, num);
1390template <
typename FloatType,
typename CountType>
1392 CountType numValues)
noexcept
1394 return FloatVectorHelpers::findMinAndMax (src, numValues);
1397template <
typename FloatType,
typename CountType>
1399 CountType numValues)
noexcept
1401 return FloatVectorHelpers::findMinimum (src, numValues);
1404template <
typename FloatType,
typename CountType>
1406 CountType numValues)
noexcept
1408 return FloatVectorHelpers::findMaximum (src, numValues);
1416void JUCE_CALLTYPE FloatVectorOperations::convertFixedToFloat (
float* dest,
const int* src,
float multiplier,
size_t num)
noexcept
1418 FloatVectorHelpers::convertFixedToFloat (dest, src, multiplier, num);
1421void JUCE_CALLTYPE FloatVectorOperations::convertFixedToFloat (
float* dest,
const int* src,
float multiplier,
int num)
noexcept
1423 FloatVectorHelpers::convertFixedToFloat (dest, src, multiplier, num);
1426intptr_t JUCE_CALLTYPE FloatVectorOperations::getFpStatusRegister() noexcept
1429 #if JUCE_INTEL && JUCE_USE_SSE_INTRINSICS
1430 fpsr =
static_cast<intptr_t
> (_mm_getcsr());
1431 #elif (JUCE_64BIT && JUCE_ARM) || JUCE_USE_ARM_NEON
1436 fpsr = (intptr_t) (_control87 (0, 0) & _MCW_DN);
1439 asm volatile(
"mrs %0, fpcr"
1441 #elif JUCE_USE_ARM_NEON
1442 asm volatile(
"vmrs %0, fpscr"
1447 #if ! (defined (JUCE_INTEL) || defined (JUCE_ARM))
1455void JUCE_CALLTYPE FloatVectorOperations::setFpStatusRegister ([[maybe_unused]] intptr_t fpsr)
noexcept
1457 #if JUCE_INTEL && JUCE_USE_SSE_INTRINSICS
1460 volatile auto fpsr_w =
static_cast<uint32_t
> (fpsr);
1461 _mm_setcsr (fpsr_w);
1462 #elif (JUCE_64BIT && JUCE_ARM) || JUCE_USE_ARM_NEON
1464 _control87 ((
unsigned int) fpsr, _MCW_DN);
1467 asm volatile(
"msr fpcr, %0"
1470 #elif JUCE_USE_ARM_NEON
1471 asm volatile(
"vmsr fpscr, %0"
1477 #if ! (defined (JUCE_INTEL) || defined (JUCE_ARM))
1485 #if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || (JUCE_64BIT && JUCE_ARM))
1486 #if JUCE_USE_SSE_INTRINSICS
1487 intptr_t mask = _MM_FLUSH_ZERO_MASK;
1489 intptr_t mask = (1 << 24 );
1491 setFpStatusRegister ((getFpStatusRegister() & (~mask)) | (shouldEnable ? mask : 0));
1493 #if ! (defined (JUCE_INTEL) || defined (JUCE_ARM))
1501 #if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || (JUCE_64BIT && JUCE_ARM))
1502 #if JUCE_USE_SSE_INTRINSICS
1503 intptr_t mask = 0x8040;
1505 intptr_t mask = (1 << 24 );
1508 setFpStatusRegister ((getFpStatusRegister() & (~mask)) | (shouldDisable ? mask : 0));
1511 #if ! (defined (JUCE_INTEL) || defined (JUCE_ARM))
1519 #if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || (JUCE_64BIT && JUCE_ARM))
1520 #if JUCE_USE_SSE_INTRINSICS
1521 intptr_t mask = 0x8040;
1523 intptr_t mask = (1 << 24 );
1526 return ((getFpStatusRegister() & mask) == mask);
1532ScopedNoDenormals::ScopedNoDenormals() noexcept
1534 #if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || (JUCE_64BIT && JUCE_ARM))
1535 #if JUCE_USE_SSE_INTRINSICS
1536 intptr_t mask = 0x8040;
1538 intptr_t mask = (1 << 24 );
1541 fpsr = FloatVectorOperations::getFpStatusRegister();
1542 FloatVectorOperations::setFpStatusRegister (fpsr | mask);
1546ScopedNoDenormals::~ScopedNoDenormals() noexcept
1548 #if JUCE_USE_SSE_INTRINSICS || (JUCE_USE_ARM_NEON || (JUCE_64BIT && JUCE_ARM))
1549 FloatVectorOperations::setFpStatusRegister (fpsr);
1558class FloatVectorOperationsTests final :
public UnitTest
1561 FloatVectorOperationsTests()
1562 : UnitTest (
"FloatVectorOperations", UnitTestCategories::audio)
1565 template <
typename ValueType>
1568 static void runTest (UnitTest& u, Random random)
1570 const int range = random.nextBool() ? 500 : 10;
1571 const int num = random.nextInt (range) + 1;
1573 HeapBlock<ValueType> buffer1 (num + 16), buffer2 (num + 16);
1574 HeapBlock<int> buffer3 (num + 16,
true);
1577 ValueType*
const data1 = buffer1;
1578 ValueType*
const data2 = buffer2;
1579 int*
const int1 = buffer3;
1583 ValueType*
const data1 = addBytesToPointer (buffer1.get(), random.nextInt (16));
1584 ValueType*
const data2 = addBytesToPointer (buffer2.get(), random.nextInt (16));
1585 int*
const int1 = addBytesToPointer (buffer3.get(), random.nextInt (16));
1588 fillRandomly (random, data1, num);
1589 fillRandomly (random, data2, num);
1591 Range<ValueType> minMax1 (FloatVectorOperations::findMinAndMax (data1, num));
1593 u.expect (minMax1 == minMax2);
1595 u.expect (valuesMatch (FloatVectorOperations::findMinimum (data1, num), juce::findMinimum (data1, num)));
1596 u.expect (valuesMatch (FloatVectorOperations::findMaximum (data1, num), juce::findMaximum (data1, num)));
1598 u.expect (valuesMatch (FloatVectorOperations::findMinimum (data2, num), juce::findMinimum (data2, num)));
1599 u.expect (valuesMatch (FloatVectorOperations::findMaximum (data2, num), juce::findMaximum (data2, num)));
1601 FloatVectorOperations::clear (data1, num);
1602 u.expect (areAllValuesEqual (data1, num, 0));
1604 FloatVectorOperations::fill (data1, (ValueType) 2, num);
1605 u.expect (areAllValuesEqual (data1, num, (ValueType) 2));
1607 FloatVectorOperations::add (data1, (ValueType) 2, num);
1608 u.expect (areAllValuesEqual (data1, num, (ValueType) 4));
1610 FloatVectorOperations::copy (data2, data1, num);
1611 u.expect (areAllValuesEqual (data2, num, (ValueType) 4));
1613 FloatVectorOperations::add (data2, data1, num);
1614 u.expect (areAllValuesEqual (data2, num, (ValueType) 8));
1616 FloatVectorOperations::copyWithMultiply (data2, data1, (ValueType) 4, num);
1617 u.expect (areAllValuesEqual (data2, num, (ValueType) 16));
1619 FloatVectorOperations::addWithMultiply (data2, data1, (ValueType) 4, num);
1620 u.expect (areAllValuesEqual (data2, num, (ValueType) 32));
1622 FloatVectorOperations::multiply (data1, (ValueType) 2, num);
1623 u.expect (areAllValuesEqual (data1, num, (ValueType) 8));
1625 FloatVectorOperations::multiply (data1, data2, num);
1626 u.expect (areAllValuesEqual (data1, num, (ValueType) 256));
1628 FloatVectorOperations::negate (data2, data1, num);
1629 u.expect (areAllValuesEqual (data2, num, (ValueType) -256));
1631 FloatVectorOperations::subtract (data1, data2, num);
1632 u.expect (areAllValuesEqual (data1, num, (ValueType) 512));
1634 FloatVectorOperations::abs (data1, data2, num);
1635 u.expect (areAllValuesEqual (data1, num, (ValueType) 256));
1637 FloatVectorOperations::abs (data2, data1, num);
1638 u.expect (areAllValuesEqual (data2, num, (ValueType) 256));
1640 fillRandomly (random, int1, num);
1641 doConversionTest (u, data1, data2, int1, num);
1643 FloatVectorOperations::fill (data1, (ValueType) 2, num);
1644 FloatVectorOperations::fill (data2, (ValueType) 3, num);
1645 FloatVectorOperations::addWithMultiply (data1, data1, data2, num);
1646 u.expect (areAllValuesEqual (data1, num, (ValueType) 8));
1649 static void doConversionTest (UnitTest& u,
float* data1,
float* data2,
int*
const int1,
int num)
1651 FloatVectorOperations::convertFixedToFloat (data1, int1, 2.0f, num);
1652 convertFixed (data2, int1, 2.0f, num);
1653 u.expect (buffersMatch (data1, data2, num));
1656 static void doConversionTest (UnitTest&,
double*,
double*,
int*,
int) {}
1658 static void fillRandomly (Random& random, ValueType* d,
int num)
1661 *d++ = (ValueType) (random.nextDouble() * 1000.0);
1664 static void fillRandomly (Random& random,
int* d,
int num)
1667 *d++ = random.nextInt();
1670 static void convertFixed (
float* d,
const int* s, ValueType multiplier,
int num)
1673 *d++ = (float) *s++ * multiplier;
1676 static bool areAllValuesEqual (
const ValueType* d,
int num, ValueType target)
1679 if (! exactlyEqual (*d++, target))
1685 static bool buffersMatch (
const ValueType* d1,
const ValueType* d2,
int num)
1688 if (! valuesMatch (*d1++, *d2++))
1694 static bool valuesMatch (ValueType v1, ValueType v2)
1696 return std::abs (v1 - v2) < std::numeric_limits<ValueType>::epsilon();
1700 void runTest()
override
1702 beginTest (
"FloatVectorOperations");
1704 for (
int i = 1000; --i >= 0;)
1706 TestRunner<float>::runTest (*
this, getRandom());
1707 TestRunner<double>::runTest (*
this, getRandom());
1712static FloatVectorOperationsTests vectorOpTests;
static void JUCE_CALLTYPE disableDenormalisedNumberSupport(bool shouldDisable=true) noexcept
static void JUCE_CALLTYPE enableFlushToZeroMode(bool shouldEnable) noexcept
static bool JUCE_CALLTYPE areDenormalsDisabled() noexcept
static Range findMinAndMax(const ValueType *values, Integral numValues) noexcept
static void JUCE_CALLTYPE add(FloatType *dest, FloatType amountToAdd, CountType numValues) noexcept
static void JUCE_CALLTYPE max(FloatType *dest, const FloatType *src, FloatType comp, CountType num) noexcept
static FloatType JUCE_CALLTYPE findMinimum(const FloatType *src, CountType numValues) noexcept
static void JUCE_CALLTYPE multiply(FloatType *dest, const FloatType *src, CountType numValues) noexcept
static void JUCE_CALLTYPE clear(FloatType *dest, CountType numValues) noexcept
static void JUCE_CALLTYPE addWithMultiply(FloatType *dest, const FloatType *src, FloatType multiplier, CountType numValues) noexcept
static Range< FloatType > JUCE_CALLTYPE findMinAndMax(const FloatType *src, CountType numValues) noexcept
static void JUCE_CALLTYPE copy(FloatType *dest, const FloatType *src, CountType numValues) noexcept
static void JUCE_CALLTYPE clip(FloatType *dest, const FloatType *src, FloatType low, FloatType high, CountType num) noexcept
static FloatType JUCE_CALLTYPE findMaximum(const FloatType *src, CountType numValues) noexcept
static void JUCE_CALLTYPE subtractWithMultiply(FloatType *dest, const FloatType *src, FloatType multiplier, CountType numValues) noexcept
static void JUCE_CALLTYPE copyWithMultiply(FloatType *dest, const FloatType *src, FloatType multiplier, CountType numValues) noexcept
static void JUCE_CALLTYPE fill(FloatType *dest, FloatType valueToFill, CountType numValues) noexcept
static void JUCE_CALLTYPE subtract(FloatType *dest, const FloatType *src, CountType numValues) noexcept
static void JUCE_CALLTYPE abs(FloatType *dest, const FloatType *src, CountType numValues) noexcept
static void JUCE_CALLTYPE negate(FloatType *dest, const FloatType *src, CountType numValues) noexcept
static void JUCE_CALLTYPE min(FloatType *dest, const FloatType *src, FloatType comp, CountType num) noexcept