51 radix2_bfly(
ar, ai,
br, bi);
52 radix2_bfly(
cr, ci,
dr, di);
55 radix2_bfly(
ar, ai,
cr, ci);
85template<
typename T,
typename TBitReverse=DefaultBitReverse>
86static size_t bitrev_initial_radix4_core_vec4(
float *
out,
float const *in,
size_t N,
FftSign sign)
88 static_assert(kBurstSize >= 4,
"This code requires a burst size of at least 4.");
90 size_t Nbits = rrCtz64(N);
99 float const *
inA = in;
100 float const *
inB = in + burst_swizzle(2 *
step);
101 float const *
inC = in + burst_swizzle(1 *
step);
102 float const *
inD = in + burst_swizzle(3 *
step);
111 for (
size_t i = 0; i <
step; i += 4)
113 size_t is = burst_swizzle(i);
115 size_t js = burst_swizzle(j);
117 T
ar = T::load(&
inA[
js + 0*kBurstSize]);
118 T ai = T::load(&
inA[
js + 1*kBurstSize]);
119 T
br = T::load(&
inB[
js + 0*kBurstSize]);
120 T bi = T::load(&
inB[
js + 1*kBurstSize]);
121 T
cr = T::load(&
inC[
js + 0*kBurstSize]);
122 T ci = T::load(&
inC[
js + 1*kBurstSize]);
123 T
dr = T::load(&
inD[
js + 0*kBurstSize]);
124 T di = T::load(&
inD[
js + 1*kBurstSize]);
127 dft4_bfly_permuted(
ar, ai,
br, bi,
cr, ci,
dr, di);
131 T::transpose4x4(ai, bi, ci, di);
134 ar.store(&
outA[is + 0*kBurstSize]);
135 ai.store(&
outA[is + 1*kBurstSize]);
136 br.store(&
outB[is + 0*kBurstSize]);
137 bi.store(&
outB[is + 1*kBurstSize]);
138 cr.store(&
outC[is + 0*kBurstSize]);
139 ci.store(&
outC[is + 1*kBurstSize]);
140 dr.store(&
outD[is + 0*kBurstSize]);
141 di.store(&
outD[is + 1*kBurstSize]);
148static void initial_radix2_core_vec4(
float *
out,
size_t N,
FftSign sign)
150 size_t const swiz_N = burst_swizzle(N);
155 float *
outB =
out + burst_swizzle(4);
156 size_t swiz_dec = burst_swizzle(~
size_t(7));
160 T
ar = T::load(&
outA[j + 0*kBurstSize]);
161 T ai = T::load(&
outA[j + 1*kBurstSize]);
162 T
br = T::load(&
outB[j + 0*kBurstSize]);
163 T bi = T::load(&
outB[j + 1*kBurstSize]);
165 T::radix2_twiddle(
ar, ai,
br, bi,
wr,
wi);
167 ar.store(&
outA[j + 0*kBurstSize]);
168 ai.store(&
outA[j + 1*kBurstSize]);
169 br.store(&
outB[j + 0*kBurstSize]);
170 bi.store(&
outB[j + 1*kBurstSize]);
176template<
typename T,
typename TBitReverse=DefaultBitReverse>
177static size_t bitrev_initial_radix8_core_vec4(
float *
out,
float const *in,
size_t N,
FftSign sign)
179 static_assert(kBurstSize >= 8,
"This code requires a burst size of at least 8.");
181 size_t Nbits = rrCtz64(N);
196 float const *
inA = in;
197 float const *
inB = in + burst_swizzle(2 *
step);
198 float const *
inC = in + burst_swizzle(1 *
step);
199 float const *
inD = in + burst_swizzle(3 *
step);
208 for (
size_t i = 0; i <
step; i += 8)
212 T
a0r = T::load(&
inA[
j0s + 0*kBurstSize]);
213 T
a0i = T::load(&
inA[
j0s + 1*kBurstSize]);
214 T
b0r = T::load(&
inB[
j0s + 0*kBurstSize]);
215 T
b0i = T::load(&
inB[
j0s + 1*kBurstSize]);
216 T
c0r = T::load(&
inC[
j0s + 0*kBurstSize]);
217 T
c0i = T::load(&
inC[
j0s + 1*kBurstSize]);
218 T
d0r = T::load(&
inD[
j0s + 0*kBurstSize]);
219 T
d0i = T::load(&
inD[
j0s + 1*kBurstSize]);
230 T
a1r = T::load(&
inA[
j1s + 0*kBurstSize]);
231 T
a1i = T::load(&
inA[
j1s + 1*kBurstSize]);
232 T
b1r = T::load(&
inB[
j1s + 0*kBurstSize]);
233 T
b1i = T::load(&
inB[
j1s + 1*kBurstSize]);
234 T
c1r = T::load(&
inC[
j1s + 0*kBurstSize]);
235 T
c1i = T::load(&
inC[
j1s + 1*kBurstSize]);
236 T
d1r = T::load(&
inD[
j1s + 0*kBurstSize]);
237 T
d1i = T::load(&
inD[
j1s + 1*kBurstSize]);
247 size_t is = burst_swizzle(i);
250 a0r.store(&
outA[is + 0*kBurstSize]);
251 a1r.store(&
outA[is + 0*kBurstSize + 4]);
252 a0i.store(&
outA[is + 1*kBurstSize]);
253 a1i.store(&
outA[is + 1*kBurstSize + 4]);
256 b0r.store(&
outB[is + 0*kBurstSize]);
257 b1r.store(&
outB[is + 0*kBurstSize + 4]);
258 b0i.store(&
outB[is + 1*kBurstSize]);
259 b1i.store(&
outB[is + 1*kBurstSize + 4]);
262 c0r.store(&
outC[is + 0*kBurstSize]);
263 c1r.store(&
outC[is + 0*kBurstSize + 4]);
264 c0i.store(&
outC[is + 1*kBurstSize]);
265 c1i.store(&
outC[is + 1*kBurstSize + 4]);
268 d0r.store(&
outD[is + 0*kBurstSize]);
269 d1r.store(&
outD[is + 0*kBurstSize + 4]);
270 d0i.store(&
outD[is + 1*kBurstSize]);
271 d1i.store(&
outD[is + 1*kBurstSize + 4]);
290 size_t swiz_dec = burst_swizzle(~((3 *
step) | (T::kCount - 1)));
310 for (
size_t j = 0, k = 0; j <
swiz_N; )
312 T
ar = T::load(&
outA[j + 0*kBurstSize]);
313 T ai = T::load(&
outA[j + 1*kBurstSize]);
314 T
br = T::load(&
outB[j + 0*kBurstSize]);
315 T bi = T::load(&
outB[j + 1*kBurstSize]);
316 T
cr = T::load(&
outC[j + 0*kBurstSize]);
317 T ci = T::load(&
outC[j + 1*kBurstSize]);
318 T
dr = T::load(&
outD[j + 0*kBurstSize]);
319 T di = T::load(&
outD[j + 1*kBurstSize]);
335 ar.store(&
outA[j + 0*kBurstSize]);
336 ai.store(&
outA[j + 1*kBurstSize]);
337 cr.store(&
outC[j + 0*kBurstSize]);
338 ci.store(&
outC[j + 1*kBurstSize]);
360 br.store(&
outWrB[j + 0*kBurstSize]);
361 bi.store(&
outWrB[j + 1*kBurstSize]);
362 dr.store(&
outWrD[j + 0*kBurstSize]);
363 di.store(&
outWrD[j + 1*kBurstSize]);
371static void burst_imdct_prefft(
float *
dest,
float const *
coeffs,
float const *
tw_re,
float const *
tw_im,
size_t N)
423 for (
size_t i = 0; i < M1; i += T::kCount)
425 size_t j = M2 - T::kCount - i;
426 size_t is = burst_swizzle(i);
427 size_t js = burst_swizzle(j);
468 oir.store(&
dest[is + 0*kBurstSize]);
469 oii.store(&
dest[is + 1*kBurstSize]);
476static void burst_imdct_postfft(
float *
signal0,
float *
signal1,
float const *
dft,
float const *
tw_re,
float const *
tw_im,
size_t N)
481 for (
size_t i = 0; i < M1; i += T::kCount)
483 size_t j = M2 - T::kCount - i;
484 size_t is = burst_swizzle(i);
485 size_t js = burst_swizzle(j);
494 T
re0 = T::load(&
dft[is + 0*kBurstSize]);
495 T
im0 = T::load(&
dft[is + 1*kBurstSize]);
496 T
re1 = T::load(&
dft[
js + 0*kBurstSize]);
497 T
im1 = T::load(&
dft[
js + 1*kBurstSize]);
#define RADFORCEINLINE
Definition rrCore.h:159
UE_FORCEINLINE_HINT TSharedRef< CastToType, Mode > StaticCastSharedRef(TSharedRef< CastFromType, Mode > const &InSharedRef)
Definition SharedPointer.h:127
char * dest
Definition lz4.h:709
Definition radaudio_mdct.cpp:49
FFTIndex s_bit_reverse[kMaxFFTN]
FftSign
Definition radaudio_mdct_internal.h:10
@ FftSign_Positive
Definition radaudio_mdct_internal.h:12
@ FftSign_Negative
Definition radaudio_mdct_internal.h:11
Definition radaudio_mdct_internal.inl:71
size_t shift_amt
Definition radaudio_mdct_internal.inl:72
DefaultBitReverse(size_t fft_nbits)
Definition radaudio_mdct_internal.inl:74
size_t operator()(size_t i) const
Definition radaudio_mdct_internal.inl:79