diff options
Diffstat (limited to 'src/3rdparty/pffft/pffft.c')
-rw-r--r-- | src/3rdparty/pffft/pffft.c | 50 |
1 files changed, 21 insertions, 29 deletions
diff --git a/src/3rdparty/pffft/pffft.c b/src/3rdparty/pffft/pffft.c index e6018e162..9271a9ad9 100644 --- a/src/3rdparty/pffft/pffft.c +++ b/src/3rdparty/pffft/pffft.c @@ -274,7 +274,7 @@ void pffft_aligned_free(void *p) { if (p) free(*((void **) p - 1)); } -int pffft_simd_size() { return SIMD_SZ; } +int pffft_simd_size(void) { return SIMD_SZ; } /* passf2 and passb2 has been merged here, fsign = -1 for passf2, +1 for passb2 @@ -1233,27 +1233,19 @@ PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform) { s->e = (float*)s->data; s->twiddle = (float*)(s->data + (2*s->Ncvec*(SIMD_SZ-1))/SIMD_SZ); - if (transform == PFFFT_REAL) { - for (k=0; k < s->Ncvec; ++k) { - int i = k/SIMD_SZ; - int j = k%SIMD_SZ; - for (m=0; m < SIMD_SZ-1; ++m) { - float A = -2*M_PI*(m+1)*k / N; - s->e[(2*(i*3 + m) + 0) * SIMD_SZ + j] = cos(A); - s->e[(2*(i*3 + m) + 1) * SIMD_SZ + j] = sin(A); - } + for (k=0; k < s->Ncvec; ++k) { + int i = k/SIMD_SZ; + int j = k%SIMD_SZ; + for (m=0; m < SIMD_SZ-1; ++m) { + float A = -2*M_PI*(m+1)*k / N; + s->e[(2*(i*3 + m) + 0) * SIMD_SZ + j] = cos(A); + s->e[(2*(i*3 + m) + 1) * SIMD_SZ + j] = sin(A); } + } + + if (transform == PFFFT_REAL) { rffti1_ps(N/SIMD_SZ, s->twiddle, s->ifac); } else { - for (k=0; k < s->Ncvec; ++k) { - int i = k/SIMD_SZ; - int j = k%SIMD_SZ; - for (m=0; m < SIMD_SZ-1; ++m) { - float A = -2*M_PI*(m+1)*k / N; - s->e[(2*(i*3 + m) + 0)*SIMD_SZ + j] = cos(A); - s->e[(2*(i*3 + m) + 1)*SIMD_SZ + j] = sin(A); - } - } cffti1_ps(N/SIMD_SZ, s->twiddle, s->ifac); } @@ -1314,7 +1306,7 @@ void pffft_zreorder(PFFFT_Setup *setup, const float *in, float *out, pffft_direc v4sf *vout = (v4sf*)out; assert(in != out); if (setup->transform == PFFFT_REAL) { - int k, dk = N/32; + int dk = N/32; if (direction == PFFFT_FORWARD) { for (k=0; k < dk; ++k) { INTERLEAVE2(vin[k*8 + 0], vin[k*8 + 1], vout[2*(0*dk + k) + 0], vout[2*(0*dk + k) + 1]); @@ -1708,19 +1700,19 @@ void pffft_zconvolve_accumulate(PFFFT_Setup *s, const float *a, const float *b, # endif #endif - float ar, ai, br, bi, abr, abi; + float ar0, ai0, br0, bi0, abr0, abi0; #ifndef ZCONVOLVE_USING_INLINE_ASM v4sf vscal = LD_PS1(scaling); int i; #endif assert(VALIGNED(a) && VALIGNED(b) && VALIGNED(ab)); - ar = ((v4sf_union*)va)[0].f[0]; - ai = ((v4sf_union*)va)[1].f[0]; - br = ((v4sf_union*)vb)[0].f[0]; - bi = ((v4sf_union*)vb)[1].f[0]; - abr = ((v4sf_union*)vab)[0].f[0]; - abi = ((v4sf_union*)vab)[1].f[0]; + ar0 = ((v4sf_union*)va)[0].f[0]; + ai0 = ((v4sf_union*)va)[1].f[0]; + br0 = ((v4sf_union*)vb)[0].f[0]; + bi0 = ((v4sf_union*)vb)[1].f[0]; + abr0 = ((v4sf_union*)vab)[0].f[0]; + abi0 = ((v4sf_union*)vab)[1].f[0]; #ifdef ZCONVOLVE_USING_INLINE_ASM // inline asm version, unfortunately miscompiled by clang 3.2, at least on ubuntu.. so this will be restricted to gcc const float *a_ = a, *b_ = b; float *ab_ = ab; @@ -1774,8 +1766,8 @@ void pffft_zconvolve_accumulate(PFFFT_Setup *s, const float *a, const float *b, } #endif if (s->transform == PFFFT_REAL) { - ((v4sf_union*)vab)[0].f[0] = abr + ar*br*scaling; - ((v4sf_union*)vab)[1].f[0] = abi + ai*bi*scaling; + ((v4sf_union*)vab)[0].f[0] = abr0 + ar0*br0*scaling; + ((v4sf_union*)vab)[1].f[0] = abi0 + ai0*bi0*scaling; } } |