summaryrefslogtreecommitdiffstats
path: root/chromium/third_party/openmax_dl/dl/sp/src/mips/mips_FFTFwd_RToCCS_F32_real.c
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/third_party/openmax_dl/dl/sp/src/mips/mips_FFTFwd_RToCCS_F32_real.c')
-rw-r--r--chromium/third_party/openmax_dl/dl/sp/src/mips/mips_FFTFwd_RToCCS_F32_real.c191
1 files changed, 191 insertions, 0 deletions
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/mips/mips_FFTFwd_RToCCS_F32_real.c b/chromium/third_party/openmax_dl/dl/sp/src/mips/mips_FFTFwd_RToCCS_F32_real.c
new file mode 100644
index 00000000000..9a17567277f
--- /dev/null
+++ b/chromium/third_party/openmax_dl/dl/sp/src/mips/mips_FFTFwd_RToCCS_F32_real.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ *
+ */
+
+#include <stdint.h>
+
+#include "dl/api/omxtypes.h"
+#include "dl/sp/api/mipsSP.h"
+
+OMXResult mips_FFTFwd_RToCCS_F32_real(const OMX_F32* pSrc,
+ OMX_F32* pDst,
+ const MIPSFFTSpec_R_FC32* pFFTSpec) {
+ OMX_U32 num_transforms, step;
+ OMX_FC32* p_dst = (OMX_FC32*)pDst;
+ OMX_FC32* p_buf = (OMX_FC32*)pFFTSpec->pBuf;
+ OMX_F32 tmp1, tmp2, tmp3, tmp4;
+ OMX_F32* w_re_ptr;
+ OMX_F32* w_im_ptr;
+
+ /* Transform for order = 2. */
+ /* TODO: hard-code the offsets for p_src. */
+ if (pFFTSpec->order == 2) {
+ OMX_U16* p_bitrev = pFFTSpec->pBitRev;
+
+ tmp1 = pSrc[p_bitrev[0]] + pSrc[p_bitrev[1]];
+ tmp2 = pSrc[p_bitrev[2]] + pSrc[p_bitrev[3]];
+ tmp3 = pSrc[p_bitrev[0]] - pSrc[p_bitrev[1]];
+ tmp4 = pSrc[p_bitrev[2]] - pSrc[p_bitrev[3]];
+
+ p_dst[0].Re = tmp1 + tmp2;
+ p_dst[2].Re = tmp1 - tmp2;
+ p_dst[0].Im = 0.0f;
+ p_dst[2].Im = 0.0f;
+ p_dst[1].Re = tmp3;
+ p_dst[1].Im = -tmp4;
+
+ return OMX_Sts_NoErr;
+ }
+
+ /*
+ * Loop performing sub-transforms of size 4, which contain two butterfly
+ * operations. Reading the input signal from split-radix bitreverse offsets.
+ */
+ num_transforms = (SUBTRANSFORM_CONST >> (16 - pFFTSpec->order)) | 1;
+ for (uint32_t n = 0; n < num_transforms; ++n) {
+ OMX_U32 offset = pFFTSpec->pOffset[n] << 2;
+ OMX_FC32* p_tmp = p_buf + offset;
+ OMX_U16* p_bitrev = pFFTSpec->pBitRev + offset;
+
+ tmp1 = pSrc[p_bitrev[0]] + pSrc[p_bitrev[1]];
+ tmp2 = pSrc[p_bitrev[2]] + pSrc[p_bitrev[3]];
+ tmp3 = pSrc[p_bitrev[0]] - pSrc[p_bitrev[1]];
+ tmp4 = pSrc[p_bitrev[2]] - pSrc[p_bitrev[3]];
+
+ p_tmp[0].Re = tmp1 + tmp2;
+ p_tmp[2].Re = tmp1 - tmp2;
+ p_tmp[0].Im = 0.0f;
+ p_tmp[2].Im = 0.0f;
+ p_tmp[1].Re = tmp3;
+ p_tmp[3].Re = tmp3;
+ p_tmp[1].Im = -tmp4;
+ p_tmp[3].Im = tmp4;
+ }
+
+ /*
+ * Loop performing sub-transforms of size 8,
+ * which contain four butterfly operations.
+ */
+ num_transforms >>= 1;
+ if (!num_transforms) {
+ /*
+ * Means the FFT size is equal to 8, so this is the last stage. Place the
+ * output to the destination buffer and avoid unnecessary computations.
+ */
+ OMX_FC32* p_tmp = p_buf;
+ OMX_U16* p_bitrev = pFFTSpec->pBitRev;
+ OMX_F32 tmp5;
+
+ tmp1 = pSrc[p_bitrev[4]] + pSrc[p_bitrev[5]];
+ tmp2 = pSrc[p_bitrev[6]] + pSrc[p_bitrev[7]];
+ tmp3 = tmp1 + tmp2;
+ tmp4 = tmp1 - tmp2;
+
+ tmp1 = pSrc[p_bitrev[4]] - pSrc[p_bitrev[5]];
+ tmp2 = pSrc[p_bitrev[6]] - pSrc[p_bitrev[7]];
+ tmp5 = SQRT1_2 * (tmp1 + tmp2);
+ tmp1 = SQRT1_2 * (tmp1 - tmp2);
+
+ p_dst[4].Re = p_tmp[0].Re - tmp3;
+ p_dst[0].Re = p_tmp[0].Re + tmp3;
+ p_dst[0].Im = p_tmp[0].Im;
+ p_dst[4].Im = p_tmp[0].Im;
+ p_dst[2].Re = p_tmp[2].Re;
+ p_dst[2].Im = p_tmp[2].Im - tmp4;
+ p_dst[1].Re = p_tmp[1].Re + tmp5;
+ p_dst[1].Im = p_tmp[1].Im - tmp1;
+ p_dst[3].Re = p_tmp[3].Re - tmp5;
+ p_dst[3].Im = p_tmp[3].Im - tmp1;
+
+ return OMX_Sts_NoErr;
+ }
+
+ num_transforms |= 1;
+
+ for (uint32_t n = 0; n < num_transforms; ++n) {
+ OMX_U32 offset = pFFTSpec->pOffset[n] << 3;
+ OMX_FC32* p_tmp = p_buf + offset;
+ OMX_U16* p_bitrev = pFFTSpec->pBitRev + offset;
+ OMX_F32 tmp5;
+
+ tmp1 = pSrc[p_bitrev[4]] + pSrc[p_bitrev[5]];
+ tmp2 = pSrc[p_bitrev[6]] + pSrc[p_bitrev[7]];
+ tmp3 = tmp1 + tmp2;
+ tmp4 = tmp1 - tmp2;
+
+ tmp1 = pSrc[p_bitrev[4]] - pSrc[p_bitrev[5]];
+ tmp2 = pSrc[p_bitrev[6]] - pSrc[p_bitrev[7]];
+ tmp5 = SQRT1_2 * (tmp1 + tmp2);
+ tmp1 = SQRT1_2 * (tmp1 - tmp2);
+
+ p_tmp[4].Re = p_tmp[0].Re - tmp3;
+ p_tmp[0].Re = p_tmp[0].Re + tmp3;
+ p_tmp[4].Im = p_tmp[0].Im;
+ p_tmp[6].Re = p_tmp[2].Re;
+ p_tmp[6].Im = p_tmp[2].Im + tmp4;
+ p_tmp[2].Im = p_tmp[2].Im - tmp4;
+
+ p_tmp[5].Re = p_tmp[1].Re - tmp5;
+ p_tmp[1].Re = p_tmp[1].Re + tmp5;
+ p_tmp[5].Im = p_tmp[1].Im + tmp1;
+ p_tmp[1].Im = p_tmp[1].Im - tmp1;
+ p_tmp[7].Re = p_tmp[3].Re + tmp5;
+ p_tmp[3].Re = p_tmp[3].Re - tmp5;
+ p_tmp[7].Im = p_tmp[3].Im + tmp1;
+ p_tmp[3].Im = p_tmp[3].Im - tmp1;
+ }
+
+ step = 1 << (TWIDDLE_TABLE_ORDER - 4);
+ /*
+ * Last FFT stage, performing sub-transforms of size 16. Place the output
+ * into the destination buffer and avoid unnecessary computations.
+ */
+ tmp1 = p_buf[8].Re + p_buf[12].Re;
+ tmp2 = p_buf[8].Re - p_buf[12].Re;
+ tmp3 = p_buf[8].Im + p_buf[12].Im;
+ tmp4 = p_buf[8].Im - p_buf[12].Im;
+
+ p_dst[8].Re = p_buf[0].Re - tmp1;
+ p_dst[0].Re = p_buf[0].Re + tmp1;
+ p_dst[8].Im = p_buf[0].Im - tmp3;
+ p_dst[0].Im = p_buf[0].Im + tmp3;
+ p_dst[4].Re = p_buf[4].Re + tmp4;
+ p_dst[4].Im = p_buf[4].Im - tmp2;
+
+ w_re_ptr = pFFTSpec->pTwiddle + step;
+ w_im_ptr =
+ pFFTSpec->pTwiddle + (OMX_U32)(1 << TWIDDLE_TABLE_ORDER - 2) - step;
+
+ /* Loop performing split-radix butterfly operations. */
+ for (uint32_t n = 1; n < 4; ++n) {
+ OMX_F32 tmp5, tmp6;
+ OMX_F32 w_re = *w_re_ptr;
+ OMX_F32 w_im = *w_im_ptr;
+
+ tmp1 = w_re * p_buf[8 + n].Re + w_im * p_buf[8 + n].Im;
+ tmp2 = w_re * p_buf[8 + n].Im - w_im * p_buf[8 + n].Re;
+ tmp3 = w_re * p_buf[12 + n].Re - w_im * p_buf[12 + n].Im;
+ tmp4 = w_re * p_buf[12 + n].Im + w_im * p_buf[12 + n].Re;
+
+ tmp5 = tmp1 + tmp3;
+ tmp1 = tmp1 - tmp3;
+ tmp6 = tmp2 + tmp4;
+ tmp2 = tmp2 - tmp4;
+
+ p_dst[n].Re = p_buf[n].Re + tmp5;
+ p_dst[n].Im = p_buf[n].Im + tmp6;
+ p_dst[4 + n].Re = p_buf[4 + n].Re + tmp2;
+ p_dst[4 + n].Im = p_buf[4 + n].Im - tmp1;
+
+ w_re_ptr += step;
+ w_im_ptr -= step;
+ }
+ return OMX_Sts_NoErr;
+}