00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037 #ifdef HAVE_CONFIG_H
00038 # include <config.h>
00039 #endif
00040
00041 #include "lame.h"
00042 #include "machine.h"
00043 #include "encoder.h"
00044 #include "util.h"
00045 #include "fft.h"
00046
00047
00048
00049
00050
00051 #define TRI_SIZE (5-1)
00052
00053
00054 static FLOAT window[BLKSIZE], window_s[BLKSIZE_s / 2];
00055
00056 static const FLOAT costab[TRI_SIZE * 2] = {
00057 9.238795325112867e-01, 3.826834323650898e-01,
00058 9.951847266721969e-01, 9.801714032956060e-02,
00059 9.996988186962042e-01, 2.454122852291229e-02,
00060 9.999811752826011e-01, 6.135884649154475e-03
00061 };
00062
00063 static void
00064 fht(FLOAT * fz, int n)
00065 {
00066 const FLOAT *tri = costab;
00067 int k4;
00068 FLOAT *fi, *gi;
00069 FLOAT const *fn;
00070
00071 n <<= 1;
00072 fn = fz + n;
00073 k4 = 4;
00074 do {
00075 FLOAT s1, c1;
00076 int i, k1, k2, k3, kx;
00077 kx = k4 >> 1;
00078 k1 = k4;
00079 k2 = k4 << 1;
00080 k3 = k2 + k1;
00081 k4 = k2 << 1;
00082 fi = fz;
00083 gi = fi + kx;
00084 do {
00085 FLOAT f0, f1, f2, f3;
00086 f1 = fi[0] - fi[k1];
00087 f0 = fi[0] + fi[k1];
00088 f3 = fi[k2] - fi[k3];
00089 f2 = fi[k2] + fi[k3];
00090 fi[k2] = f0 - f2;
00091 fi[0] = f0 + f2;
00092 fi[k3] = f1 - f3;
00093 fi[k1] = f1 + f3;
00094 f1 = gi[0] - gi[k1];
00095 f0 = gi[0] + gi[k1];
00096 f3 = SQRT2 * gi[k3];
00097 f2 = SQRT2 * gi[k2];
00098 gi[k2] = f0 - f2;
00099 gi[0] = f0 + f2;
00100 gi[k3] = f1 - f3;
00101 gi[k1] = f1 + f3;
00102 gi += k4;
00103 fi += k4;
00104 } while (fi < fn);
00105 c1 = tri[0];
00106 s1 = tri[1];
00107 for (i = 1; i < kx; i++) {
00108 FLOAT c2, s2;
00109 c2 = 1 - (2 * s1) * s1;
00110 s2 = (2 * s1) * c1;
00111 fi = fz + i;
00112 gi = fz + k1 - i;
00113 do {
00114 FLOAT a, b, g0, f0, f1, g1, f2, g2, f3, g3;
00115 b = s2 * fi[k1] - c2 * gi[k1];
00116 a = c2 * fi[k1] + s2 * gi[k1];
00117 f1 = fi[0] - a;
00118 f0 = fi[0] + a;
00119 g1 = gi[0] - b;
00120 g0 = gi[0] + b;
00121 b = s2 * fi[k3] - c2 * gi[k3];
00122 a = c2 * fi[k3] + s2 * gi[k3];
00123 f3 = fi[k2] - a;
00124 f2 = fi[k2] + a;
00125 g3 = gi[k2] - b;
00126 g2 = gi[k2] + b;
00127 b = s1 * f2 - c1 * g3;
00128 a = c1 * f2 + s1 * g3;
00129 fi[k2] = f0 - a;
00130 fi[0] = f0 + a;
00131 gi[k3] = g1 - b;
00132 gi[k1] = g1 + b;
00133 b = c1 * g2 - s1 * f3;
00134 a = s1 * g2 + c1 * f3;
00135 gi[k2] = g0 - a;
00136 gi[0] = g0 + a;
00137 fi[k3] = f1 - b;
00138 fi[k1] = f1 + b;
00139 gi += k4;
00140 fi += k4;
00141 } while (fi < fn);
00142 c2 = c1;
00143 c1 = c2 * tri[0] - s1 * tri[1];
00144 s1 = c2 * tri[1] + s1 * tri[0];
00145 }
00146 tri += 2;
00147 } while (k4 < n);
00148 }
00149
00150 static const unsigned char rv_tbl[] = {
00151 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
00152 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
00153 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
00154 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
00155 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
00156 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
00157 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
00158 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
00159 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
00160 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
00161 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
00162 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
00163 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
00164 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
00165 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
00166 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe
00167 };
00168
00169 #define ch01(index) (buffer[chn][index])
00170
00171 #define ml00(f) (window[i ] * f(i))
00172 #define ml10(f) (window[i + 0x200] * f(i + 0x200))
00173 #define ml20(f) (window[i + 0x100] * f(i + 0x100))
00174 #define ml30(f) (window[i + 0x300] * f(i + 0x300))
00175
00176 #define ml01(f) (window[i + 0x001] * f(i + 0x001))
00177 #define ml11(f) (window[i + 0x201] * f(i + 0x201))
00178 #define ml21(f) (window[i + 0x101] * f(i + 0x101))
00179 #define ml31(f) (window[i + 0x301] * f(i + 0x301))
00180
00181 #define ms00(f) (window_s[i ] * f(i + k))
00182 #define ms10(f) (window_s[0x7f - i] * f(i + k + 0x80))
00183 #define ms20(f) (window_s[i + 0x40] * f(i + k + 0x40))
00184 #define ms30(f) (window_s[0x3f - i] * f(i + k + 0xc0))
00185
00186 #define ms01(f) (window_s[i + 0x01] * f(i + k + 0x01))
00187 #define ms11(f) (window_s[0x7e - i] * f(i + k + 0x81))
00188 #define ms21(f) (window_s[i + 0x41] * f(i + k + 0x41))
00189 #define ms31(f) (window_s[0x3e - i] * f(i + k + 0xc1))
00190
00191
00192 void
00193 fft_short(lame_internal_flags const *const gfc,
00194 FLOAT x_real[3][BLKSIZE_s], int chn, const sample_t * buffer[2])
00195 {
00196 int i;
00197 int j;
00198 int b;
00199
00200 for (b = 0; b < 3; b++) {
00201 FLOAT *x = &x_real[b][BLKSIZE_s / 2];
00202 short const k = (576 / 3) * (b + 1);
00203 j = BLKSIZE_s / 8 - 1;
00204 do {
00205 FLOAT f0, f1, f2, f3, w;
00206
00207 i = rv_tbl[j << 2];
00208
00209 f0 = ms00(ch01);
00210 w = ms10(ch01);
00211 f1 = f0 - w;
00212 f0 = f0 + w;
00213 f2 = ms20(ch01);
00214 w = ms30(ch01);
00215 f3 = f2 - w;
00216 f2 = f2 + w;
00217
00218 x -= 4;
00219 x[0] = f0 + f2;
00220 x[2] = f0 - f2;
00221 x[1] = f1 + f3;
00222 x[3] = f1 - f3;
00223
00224 f0 = ms01(ch01);
00225 w = ms11(ch01);
00226 f1 = f0 - w;
00227 f0 = f0 + w;
00228 f2 = ms21(ch01);
00229 w = ms31(ch01);
00230 f3 = f2 - w;
00231 f2 = f2 + w;
00232
00233 x[BLKSIZE_s / 2 + 0] = f0 + f2;
00234 x[BLKSIZE_s / 2 + 2] = f0 - f2;
00235 x[BLKSIZE_s / 2 + 1] = f1 + f3;
00236 x[BLKSIZE_s / 2 + 3] = f1 - f3;
00237 } while (--j >= 0);
00238
00239 gfc->fft_fht(x, BLKSIZE_s / 2);
00240
00241 }
00242 }
00243
00244 void
00245 fft_long(lame_internal_flags const *const gfc,
00246 FLOAT x[BLKSIZE], int chn, const sample_t * buffer[2])
00247 {
00248 int i;
00249 int jj = BLKSIZE / 8 - 1;
00250 x += BLKSIZE / 2;
00251
00252 do {
00253 FLOAT f0, f1, f2, f3, w;
00254
00255 i = rv_tbl[jj];
00256 f0 = ml00(ch01);
00257 w = ml10(ch01);
00258 f1 = f0 - w;
00259 f0 = f0 + w;
00260 f2 = ml20(ch01);
00261 w = ml30(ch01);
00262 f3 = f2 - w;
00263 f2 = f2 + w;
00264
00265 x -= 4;
00266 x[0] = f0 + f2;
00267 x[2] = f0 - f2;
00268 x[1] = f1 + f3;
00269 x[3] = f1 - f3;
00270
00271 f0 = ml01(ch01);
00272 w = ml11(ch01);
00273 f1 = f0 - w;
00274 f0 = f0 + w;
00275 f2 = ml21(ch01);
00276 w = ml31(ch01);
00277 f3 = f2 - w;
00278 f2 = f2 + w;
00279
00280 x[BLKSIZE / 2 + 0] = f0 + f2;
00281 x[BLKSIZE / 2 + 2] = f0 - f2;
00282 x[BLKSIZE / 2 + 1] = f1 + f3;
00283 x[BLKSIZE / 2 + 3] = f1 - f3;
00284 } while (--jj >= 0);
00285
00286 gfc->fft_fht(x, BLKSIZE / 2);
00287
00288 }
00289
00290
00291 void
00292 init_fft(lame_internal_flags * const gfc)
00293 {
00294 int i;
00295
00296
00297
00298 for (i = 0; i < BLKSIZE; i++)
00299
00300 window[i] = 0.42 - 0.5 * cos(2 * PI * (i + .5) / BLKSIZE) +
00301 0.08 * cos(4 * PI * (i + .5) / BLKSIZE);
00302
00303 for (i = 0; i < BLKSIZE_s / 2; i++)
00304 window_s[i] = 0.5 * (1.0 - cos(2.0 * PI * (i + 0.5) / BLKSIZE_s));
00305
00306 #ifdef HAVE_NASM
00307 if (gfc->CPU_features.AMD_3DNow) {
00308 extern void fht_3DN(FLOAT * fz, int n);
00309 gfc->fft_fht = fht_3DN;
00310 }
00311 else if (gfc->CPU_features.SSE) {
00312 extern void fht_SSE(FLOAT * fz, int n);
00313 gfc->fft_fht = fht_SSE;
00314 }
00315 else
00316 #endif
00317 {
00318 gfc->fft_fht = fht;
00319 }
00320 }