diff options
Diffstat (limited to 'chromium/third_party/openmax_dl/dl/sp/src/mips/mips_FFTFwd_RToCCS_F32_real.c')
-rw-r--r-- | chromium/third_party/openmax_dl/dl/sp/src/mips/mips_FFTFwd_RToCCS_F32_real.c | 191 |
1 files changed, 191 insertions, 0 deletions
diff --git a/chromium/third_party/openmax_dl/dl/sp/src/mips/mips_FFTFwd_RToCCS_F32_real.c b/chromium/third_party/openmax_dl/dl/sp/src/mips/mips_FFTFwd_RToCCS_F32_real.c new file mode 100644 index 00000000000..9a17567277f --- /dev/null +++ b/chromium/third_party/openmax_dl/dl/sp/src/mips/mips_FFTFwd_RToCCS_F32_real.c @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#include <stdint.h> + +#include "dl/api/omxtypes.h" +#include "dl/sp/api/mipsSP.h" + +OMXResult mips_FFTFwd_RToCCS_F32_real(const OMX_F32* pSrc, + OMX_F32* pDst, + const MIPSFFTSpec_R_FC32* pFFTSpec) { + OMX_U32 num_transforms, step; + OMX_FC32* p_dst = (OMX_FC32*)pDst; + OMX_FC32* p_buf = (OMX_FC32*)pFFTSpec->pBuf; + OMX_F32 tmp1, tmp2, tmp3, tmp4; + OMX_F32* w_re_ptr; + OMX_F32* w_im_ptr; + + /* Transform for order = 2. */ + /* TODO: hard-code the offsets for p_src. */ + if (pFFTSpec->order == 2) { + OMX_U16* p_bitrev = pFFTSpec->pBitRev; + + tmp1 = pSrc[p_bitrev[0]] + pSrc[p_bitrev[1]]; + tmp2 = pSrc[p_bitrev[2]] + pSrc[p_bitrev[3]]; + tmp3 = pSrc[p_bitrev[0]] - pSrc[p_bitrev[1]]; + tmp4 = pSrc[p_bitrev[2]] - pSrc[p_bitrev[3]]; + + p_dst[0].Re = tmp1 + tmp2; + p_dst[2].Re = tmp1 - tmp2; + p_dst[0].Im = 0.0f; + p_dst[2].Im = 0.0f; + p_dst[1].Re = tmp3; + p_dst[1].Im = -tmp4; + + return OMX_Sts_NoErr; + } + + /* + * Loop performing sub-transforms of size 4, which contain two butterfly + * operations. Reading the input signal from split-radix bitreverse offsets. + */ + num_transforms = (SUBTRANSFORM_CONST >> (16 - pFFTSpec->order)) | 1; + for (uint32_t n = 0; n < num_transforms; ++n) { + OMX_U32 offset = pFFTSpec->pOffset[n] << 2; + OMX_FC32* p_tmp = p_buf + offset; + OMX_U16* p_bitrev = pFFTSpec->pBitRev + offset; + + tmp1 = pSrc[p_bitrev[0]] + pSrc[p_bitrev[1]]; + tmp2 = pSrc[p_bitrev[2]] + pSrc[p_bitrev[3]]; + tmp3 = pSrc[p_bitrev[0]] - pSrc[p_bitrev[1]]; + tmp4 = pSrc[p_bitrev[2]] - pSrc[p_bitrev[3]]; + + p_tmp[0].Re = tmp1 + tmp2; + p_tmp[2].Re = tmp1 - tmp2; + p_tmp[0].Im = 0.0f; + p_tmp[2].Im = 0.0f; + p_tmp[1].Re = tmp3; + p_tmp[3].Re = tmp3; + p_tmp[1].Im = -tmp4; + p_tmp[3].Im = tmp4; + } + + /* + * Loop performing sub-transforms of size 8, + * which contain four butterfly operations. + */ + num_transforms >>= 1; + if (!num_transforms) { + /* + * Means the FFT size is equal to 8, so this is the last stage. Place the + * output to the destination buffer and avoid unnecessary computations. + */ + OMX_FC32* p_tmp = p_buf; + OMX_U16* p_bitrev = pFFTSpec->pBitRev; + OMX_F32 tmp5; + + tmp1 = pSrc[p_bitrev[4]] + pSrc[p_bitrev[5]]; + tmp2 = pSrc[p_bitrev[6]] + pSrc[p_bitrev[7]]; + tmp3 = tmp1 + tmp2; + tmp4 = tmp1 - tmp2; + + tmp1 = pSrc[p_bitrev[4]] - pSrc[p_bitrev[5]]; + tmp2 = pSrc[p_bitrev[6]] - pSrc[p_bitrev[7]]; + tmp5 = SQRT1_2 * (tmp1 + tmp2); + tmp1 = SQRT1_2 * (tmp1 - tmp2); + + p_dst[4].Re = p_tmp[0].Re - tmp3; + p_dst[0].Re = p_tmp[0].Re + tmp3; + p_dst[0].Im = p_tmp[0].Im; + p_dst[4].Im = p_tmp[0].Im; + p_dst[2].Re = p_tmp[2].Re; + p_dst[2].Im = p_tmp[2].Im - tmp4; + p_dst[1].Re = p_tmp[1].Re + tmp5; + p_dst[1].Im = p_tmp[1].Im - tmp1; + p_dst[3].Re = p_tmp[3].Re - tmp5; + p_dst[3].Im = p_tmp[3].Im - tmp1; + + return OMX_Sts_NoErr; + } + + num_transforms |= 1; + + for (uint32_t n = 0; n < num_transforms; ++n) { + OMX_U32 offset = pFFTSpec->pOffset[n] << 3; + OMX_FC32* p_tmp = p_buf + offset; + OMX_U16* p_bitrev = pFFTSpec->pBitRev + offset; + OMX_F32 tmp5; + + tmp1 = pSrc[p_bitrev[4]] + pSrc[p_bitrev[5]]; + tmp2 = pSrc[p_bitrev[6]] + pSrc[p_bitrev[7]]; + tmp3 = tmp1 + tmp2; + tmp4 = tmp1 - tmp2; + + tmp1 = pSrc[p_bitrev[4]] - pSrc[p_bitrev[5]]; + tmp2 = pSrc[p_bitrev[6]] - pSrc[p_bitrev[7]]; + tmp5 = SQRT1_2 * (tmp1 + tmp2); + tmp1 = SQRT1_2 * (tmp1 - tmp2); + + p_tmp[4].Re = p_tmp[0].Re - tmp3; + p_tmp[0].Re = p_tmp[0].Re + tmp3; + p_tmp[4].Im = p_tmp[0].Im; + p_tmp[6].Re = p_tmp[2].Re; + p_tmp[6].Im = p_tmp[2].Im + tmp4; + p_tmp[2].Im = p_tmp[2].Im - tmp4; + + p_tmp[5].Re = p_tmp[1].Re - tmp5; + p_tmp[1].Re = p_tmp[1].Re + tmp5; + p_tmp[5].Im = p_tmp[1].Im + tmp1; + p_tmp[1].Im = p_tmp[1].Im - tmp1; + p_tmp[7].Re = p_tmp[3].Re + tmp5; + p_tmp[3].Re = p_tmp[3].Re - tmp5; + p_tmp[7].Im = p_tmp[3].Im + tmp1; + p_tmp[3].Im = p_tmp[3].Im - tmp1; + } + + step = 1 << (TWIDDLE_TABLE_ORDER - 4); + /* + * Last FFT stage, performing sub-transforms of size 16. Place the output + * into the destination buffer and avoid unnecessary computations. + */ + tmp1 = p_buf[8].Re + p_buf[12].Re; + tmp2 = p_buf[8].Re - p_buf[12].Re; + tmp3 = p_buf[8].Im + p_buf[12].Im; + tmp4 = p_buf[8].Im - p_buf[12].Im; + + p_dst[8].Re = p_buf[0].Re - tmp1; + p_dst[0].Re = p_buf[0].Re + tmp1; + p_dst[8].Im = p_buf[0].Im - tmp3; + p_dst[0].Im = p_buf[0].Im + tmp3; + p_dst[4].Re = p_buf[4].Re + tmp4; + p_dst[4].Im = p_buf[4].Im - tmp2; + + w_re_ptr = pFFTSpec->pTwiddle + step; + w_im_ptr = + pFFTSpec->pTwiddle + (OMX_U32)(1 << TWIDDLE_TABLE_ORDER - 2) - step; + + /* Loop performing split-radix butterfly operations. */ + for (uint32_t n = 1; n < 4; ++n) { + OMX_F32 tmp5, tmp6; + OMX_F32 w_re = *w_re_ptr; + OMX_F32 w_im = *w_im_ptr; + + tmp1 = w_re * p_buf[8 + n].Re + w_im * p_buf[8 + n].Im; + tmp2 = w_re * p_buf[8 + n].Im - w_im * p_buf[8 + n].Re; + tmp3 = w_re * p_buf[12 + n].Re - w_im * p_buf[12 + n].Im; + tmp4 = w_re * p_buf[12 + n].Im + w_im * p_buf[12 + n].Re; + + tmp5 = tmp1 + tmp3; + tmp1 = tmp1 - tmp3; + tmp6 = tmp2 + tmp4; + tmp2 = tmp2 - tmp4; + + p_dst[n].Re = p_buf[n].Re + tmp5; + p_dst[n].Im = p_buf[n].Im + tmp6; + p_dst[4 + n].Re = p_buf[4 + n].Re + tmp2; + p_dst[4 + n].Im = p_buf[4 + n].Im - tmp1; + + w_re_ptr += step; + w_im_ptr -= step; + } + return OMX_Sts_NoErr; +} |