fft.c

Go to the documentation of this file.
00001 /*
00002 ** FFT and FHT routines
00003 **  Copyright 1988, 1993; Ron Mayer
00004 **      Copyright (c) 1999-2000 Takehiro Tominaga
00005 **
00006 **  fht(fz,n);
00007 **      Does a hartley transform of "n" points in the array "fz".
00008 **
00009 ** NOTE: This routine uses at least 2 patented algorithms, and may be
00010 **       under the restrictions of a bunch of different organizations.
00011 **       Although I wrote it completely myself; it is kind of a derivative
00012 **       of a routine I once authored and released under the GPL, so it
00013 **       may fall under the free software foundation's restrictions;
00014 **       it was worked on as a Stanford Univ project, so they claim
00015 **       some rights to it; it was further optimized at work here, so
00016 **       I think this company claims parts of it.  The patents are
00017 **       held by R. Bracewell (the FHT algorithm) and O. Buneman (the
00018 **       trig generator), both at Stanford Univ.
00019 **       If it were up to me, I'd say go do whatever you want with it;
00020 **       but it would be polite to give credit to the following people
00021 **       if you use this anywhere:
00022 **           Euler     - probable inventor of the fourier transform.
00023 **           Gauss     - probable inventor of the FFT.
00024 **           Hartley   - probable inventor of the hartley transform.
00025 **           Buneman   - for a really cool trig generator
00026 **           Mayer(me) - for authoring this particular version and
00027 **                       including all the optimizations in one package.
00028 **       Thanks,
00029 **       Ron Mayer; mayer@acuson.com
00030 ** and added some optimization by
00031 **           Mather    - idea of using lookup table
00032 **           Takehiro  - some dirty hack for speed up
00033 */
00034 
00035 /* $Id: fft.c,v 1.31 2007/01/07 14:30:32 robert Exp $ */
00036 
00037 #ifdef HAVE_CONFIG_H
00038 # include <config.h>
00039 #endif
00040 
00041 #include "lame.h"
00042 #include "machine.h"
00043 #include "encoder.h"
00044 #include "util.h"
00045 #include "fft.h"
00046 
00047 
00048 
00049 
00050 
00051 #define TRI_SIZE (5-1)  /* 1024 =  4**5 */
00052 
00053 /* fft.c    */
00054 static FLOAT window[BLKSIZE], window_s[BLKSIZE_s / 2];
00055 
00056 static const FLOAT costab[TRI_SIZE * 2] = {
00057     9.238795325112867e-01, 3.826834323650898e-01,
00058     9.951847266721969e-01, 9.801714032956060e-02,
00059     9.996988186962042e-01, 2.454122852291229e-02,
00060     9.999811752826011e-01, 6.135884649154475e-03
00061 };
00062 
00063 static void
00064 fht(FLOAT * fz, int n)
00065 {
00066     const FLOAT *tri = costab;
00067     int     k4;
00068     FLOAT  *fi, *gi;
00069     FLOAT const *fn;
00070 
00071     n <<= 1;            /* to get BLKSIZE, because of 3DNow! ASM routine */
00072     fn = fz + n;
00073     k4 = 4;
00074     do {
00075         FLOAT   s1, c1;
00076         int     i, k1, k2, k3, kx;
00077         kx = k4 >> 1;
00078         k1 = k4;
00079         k2 = k4 << 1;
00080         k3 = k2 + k1;
00081         k4 = k2 << 1;
00082         fi = fz;
00083         gi = fi + kx;
00084         do {
00085             FLOAT   f0, f1, f2, f3;
00086             f1 = fi[0] - fi[k1];
00087             f0 = fi[0] + fi[k1];
00088             f3 = fi[k2] - fi[k3];
00089             f2 = fi[k2] + fi[k3];
00090             fi[k2] = f0 - f2;
00091             fi[0] = f0 + f2;
00092             fi[k3] = f1 - f3;
00093             fi[k1] = f1 + f3;
00094             f1 = gi[0] - gi[k1];
00095             f0 = gi[0] + gi[k1];
00096             f3 = SQRT2 * gi[k3];
00097             f2 = SQRT2 * gi[k2];
00098             gi[k2] = f0 - f2;
00099             gi[0] = f0 + f2;
00100             gi[k3] = f1 - f3;
00101             gi[k1] = f1 + f3;
00102             gi += k4;
00103             fi += k4;
00104         } while (fi < fn);
00105         c1 = tri[0];
00106         s1 = tri[1];
00107         for (i = 1; i < kx; i++) {
00108             FLOAT   c2, s2;
00109             c2 = 1 - (2 * s1) * s1;
00110             s2 = (2 * s1) * c1;
00111             fi = fz + i;
00112             gi = fz + k1 - i;
00113             do {
00114                 FLOAT   a, b, g0, f0, f1, g1, f2, g2, f3, g3;
00115                 b = s2 * fi[k1] - c2 * gi[k1];
00116                 a = c2 * fi[k1] + s2 * gi[k1];
00117                 f1 = fi[0] - a;
00118                 f0 = fi[0] + a;
00119                 g1 = gi[0] - b;
00120                 g0 = gi[0] + b;
00121                 b = s2 * fi[k3] - c2 * gi[k3];
00122                 a = c2 * fi[k3] + s2 * gi[k3];
00123                 f3 = fi[k2] - a;
00124                 f2 = fi[k2] + a;
00125                 g3 = gi[k2] - b;
00126                 g2 = gi[k2] + b;
00127                 b = s1 * f2 - c1 * g3;
00128                 a = c1 * f2 + s1 * g3;
00129                 fi[k2] = f0 - a;
00130                 fi[0] = f0 + a;
00131                 gi[k3] = g1 - b;
00132                 gi[k1] = g1 + b;
00133                 b = c1 * g2 - s1 * f3;
00134                 a = s1 * g2 + c1 * f3;
00135                 gi[k2] = g0 - a;
00136                 gi[0] = g0 + a;
00137                 fi[k3] = f1 - b;
00138                 fi[k1] = f1 + b;
00139                 gi += k4;
00140                 fi += k4;
00141             } while (fi < fn);
00142             c2 = c1;
00143             c1 = c2 * tri[0] - s1 * tri[1];
00144             s1 = c2 * tri[1] + s1 * tri[0];
00145         }
00146         tri += 2;
00147     } while (k4 < n);
00148 }
00149 
00150 static const unsigned char rv_tbl[] = {
00151     0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
00152     0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
00153     0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
00154     0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
00155     0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
00156     0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
00157     0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
00158     0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
00159     0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
00160     0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
00161     0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
00162     0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
00163     0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
00164     0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
00165     0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
00166     0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe
00167 };
00168 
00169 #define ch01(index)  (buffer[chn][index])
00170 
00171 #define ml00(f) (window[i        ] * f(i))
00172 #define ml10(f) (window[i + 0x200] * f(i + 0x200))
00173 #define ml20(f) (window[i + 0x100] * f(i + 0x100))
00174 #define ml30(f) (window[i + 0x300] * f(i + 0x300))
00175 
00176 #define ml01(f) (window[i + 0x001] * f(i + 0x001))
00177 #define ml11(f) (window[i + 0x201] * f(i + 0x201))
00178 #define ml21(f) (window[i + 0x101] * f(i + 0x101))
00179 #define ml31(f) (window[i + 0x301] * f(i + 0x301))
00180 
00181 #define ms00(f) (window_s[i       ] * f(i + k))
00182 #define ms10(f) (window_s[0x7f - i] * f(i + k + 0x80))
00183 #define ms20(f) (window_s[i + 0x40] * f(i + k + 0x40))
00184 #define ms30(f) (window_s[0x3f - i] * f(i + k + 0xc0))
00185 
00186 #define ms01(f) (window_s[i + 0x01] * f(i + k + 0x01))
00187 #define ms11(f) (window_s[0x7e - i] * f(i + k + 0x81))
00188 #define ms21(f) (window_s[i + 0x41] * f(i + k + 0x41))
00189 #define ms31(f) (window_s[0x3e - i] * f(i + k + 0xc1))
00190 
00191 
00192 void
00193 fft_short(lame_internal_flags const *const gfc,
00194           FLOAT x_real[3][BLKSIZE_s], int chn, const sample_t * buffer[2])
00195 {
00196     int     i;
00197     int     j;
00198     int     b;
00199 
00200     for (b = 0; b < 3; b++) {
00201         FLOAT  *x = &x_real[b][BLKSIZE_s / 2];
00202         short const k = (576 / 3) * (b + 1);
00203         j = BLKSIZE_s / 8 - 1;
00204         do {
00205             FLOAT   f0, f1, f2, f3, w;
00206 
00207             i = rv_tbl[j << 2];
00208 
00209             f0 = ms00(ch01);
00210             w = ms10(ch01);
00211             f1 = f0 - w;
00212             f0 = f0 + w;
00213             f2 = ms20(ch01);
00214             w = ms30(ch01);
00215             f3 = f2 - w;
00216             f2 = f2 + w;
00217 
00218             x -= 4;
00219             x[0] = f0 + f2;
00220             x[2] = f0 - f2;
00221             x[1] = f1 + f3;
00222             x[3] = f1 - f3;
00223 
00224             f0 = ms01(ch01);
00225             w = ms11(ch01);
00226             f1 = f0 - w;
00227             f0 = f0 + w;
00228             f2 = ms21(ch01);
00229             w = ms31(ch01);
00230             f3 = f2 - w;
00231             f2 = f2 + w;
00232 
00233             x[BLKSIZE_s / 2 + 0] = f0 + f2;
00234             x[BLKSIZE_s / 2 + 2] = f0 - f2;
00235             x[BLKSIZE_s / 2 + 1] = f1 + f3;
00236             x[BLKSIZE_s / 2 + 3] = f1 - f3;
00237         } while (--j >= 0);
00238 
00239         gfc->fft_fht(x, BLKSIZE_s / 2);
00240         /* BLKSIZE_s/2 because of 3DNow! ASM routine */
00241     }
00242 }
00243 
00244 void
00245 fft_long(lame_internal_flags const *const gfc,
00246          FLOAT x[BLKSIZE], int chn, const sample_t * buffer[2])
00247 {
00248     int     i;
00249     int     jj = BLKSIZE / 8 - 1;
00250     x += BLKSIZE / 2;
00251 
00252     do {
00253         FLOAT   f0, f1, f2, f3, w;
00254 
00255         i = rv_tbl[jj];
00256         f0 = ml00(ch01);
00257         w = ml10(ch01);
00258         f1 = f0 - w;
00259         f0 = f0 + w;
00260         f2 = ml20(ch01);
00261         w = ml30(ch01);
00262         f3 = f2 - w;
00263         f2 = f2 + w;
00264 
00265         x -= 4;
00266         x[0] = f0 + f2;
00267         x[2] = f0 - f2;
00268         x[1] = f1 + f3;
00269         x[3] = f1 - f3;
00270 
00271         f0 = ml01(ch01);
00272         w = ml11(ch01);
00273         f1 = f0 - w;
00274         f0 = f0 + w;
00275         f2 = ml21(ch01);
00276         w = ml31(ch01);
00277         f3 = f2 - w;
00278         f2 = f2 + w;
00279 
00280         x[BLKSIZE / 2 + 0] = f0 + f2;
00281         x[BLKSIZE / 2 + 2] = f0 - f2;
00282         x[BLKSIZE / 2 + 1] = f1 + f3;
00283         x[BLKSIZE / 2 + 3] = f1 - f3;
00284     } while (--jj >= 0);
00285 
00286     gfc->fft_fht(x, BLKSIZE / 2);
00287     /* BLKSIZE/2 because of 3DNow! ASM routine */
00288 }
00289 
00290 
00291 void
00292 init_fft(lame_internal_flags * const gfc)
00293 {
00294     int     i;
00295 
00296     /* The type of window used here will make no real difference, but */
00297     /* in the interest of merging nspsytune stuff - switch to blackman window */
00298     for (i = 0; i < BLKSIZE; i++)
00299         /* blackman window */
00300         window[i] = 0.42 - 0.5 * cos(2 * PI * (i + .5) / BLKSIZE) +
00301             0.08 * cos(4 * PI * (i + .5) / BLKSIZE);
00302 
00303     for (i = 0; i < BLKSIZE_s / 2; i++)
00304         window_s[i] = 0.5 * (1.0 - cos(2.0 * PI * (i + 0.5) / BLKSIZE_s));
00305 
00306 #ifdef HAVE_NASM
00307     if (gfc->CPU_features.AMD_3DNow) {
00308         extern void fht_3DN(FLOAT * fz, int n);
00309         gfc->fft_fht = fht_3DN;
00310     }
00311     else if (gfc->CPU_features.SSE) {
00312         extern void fht_SSE(FLOAT * fz, int n);
00313         gfc->fft_fht = fht_SSE;
00314     }
00315     else
00316 #endif
00317     {
00318         gfc->fft_fht = fht;
00319     }
00320 }

Generated on Sun Dec 2 11:34:19 2007 for LAME by  doxygen 1.5.2