// Copyright Epic Games, Inc. All Rights Reserved.

#ifndef __RADRR_COREH__
  #include "rrCore.h"
#endif

#include "binkace.h"

#include <string.h>
#define rrmemsetzero(d,c) memset(d,0,c) // use for small zero clears
#define rrmemmovebig memmove // use for large copies (>512 bytes) - can overlay

#include "radmath.h"
#include "binkace.h"
#include "varbits.h"
#include "popmal.h"
#include "radfft.h"

#define AUDIOFLOAT              F32
#define AUDIOSAMPLE             F32
#define AUDIOTABLESAMPLE        F32

//#define DEBUGSTACKVARS

#ifdef BIG_OLE_FFT // never set for ue binka
#define MAXBUFFERSIZE     4096
#else
#define MAXBUFFERSIZE     2048
#endif
#define MAXBUFFERSIZEHALF   ( MAXBUFFERSIZE / 2 )
#define MAXCHANNELS         2
#define WINDOWRATIO         16

#define TOTBANDS 25

#define FXPBITS 29

#define VQLENGTH 8

#define RLEBITS 4
#define MAXRLE (1<<RLEBITS)

static U8 rlelens[ MAXRLE ] =
{
  2,3,4,5, 6,8,9,10, 11,12,13,14, 15,16,32,64
};

static U32 bandtopfreq[ TOTBANDS ]=
{
  0, 100, 200, 300, 400, 510, 630, 770, 920, 1080, 1270, 1480, 1720, 2000,
  2320, 2700, 3150, 3700, 4400, 5300, 6400, 7700, 9500, 12000, 15500
};

static AUDIOFLOAT RADINLINE Undecibel( AUDIOFLOAT d )
{
  return( ( AUDIOFLOAT ) radpow( 10, d * 0.10f ) );
}

#include "undeci.inc"

// ----

typedef struct BINKAUDIOCOMP
{
  U32 transform_size;
  U32 buffer_size;
  U32 window_size;
  S32 chans;
  U32 flags;
  F32 transform_size_root;
  AUDIOFLOAT threshold_curve_adj;
  S16* inp;
  S16* inpr;
  S16* outp;
  U32 start_frame;
  U32 num_bands;
  U32 given;
  U32 got;
  U32 * bands;
  AUDIOFLOAT * renorm;
  AUDIOFLOAT * last_pow_pha;
  AUDIOFLOAT * ath;
  AUDIOFLOAT * band_ath;
  void* (*memalloc)(UINTa bytes);
  void (*memfree)(void* ptr);
} BINKAUDIOCOMP;


// center frequency of each band (here just for reference)
//static U32 frequency[TOTBANDS]={50,150,250,350,450,570,700,840,1000,1170,1370,1600,1850,
//                                2150,2500,2900,3400,4000,4800,5800,7000,8500,10500,13500,18775};

static AUDIOFLOAT spreadfactors[TOTBANDS*2]=
{
   0.0000000F, 0.0000000F, 0.0000000F, 0.0000000F, 0.0000000F,
   0.0000000F, 0.0000000F, 0.0000000F, 0.0000000F, 0.0000000F,
   0.0000000F, 0.0000000F, 0.0000000F, 0.0000000F, 0.0000000F,
   0.0000000F, 0.0000000F, 0.0000000F, 0.0000000F, 0.0000000F,
   0.0000000F, 0.0000000F, 0.0000038F, 0.0010497F, 0.1347701F,

   1.0000000F, 0.0787500F, 0.0146941F, 0.0052495F, 0.0005273F,
   0.0000507F, 0.0000048F, 0.0000004F, 0.0000000F, 0.0000000F,
   0.0000000F, 0.0000000F, 0.0000000F, 0.0000000F, 0.0000000F,
   0.0000000F, 0.0000000F, 0.0000000F, 0.0000000F, 0.0000000F,
   0.0000000F, 0.0000000F, 0.0000000F, 0.0000000F, 0.0000000F
};


static AUDIOFLOAT RADINLINE Decibel( AUDIOFLOAT d )
{
  return( (AUDIOFLOAT) ( 10.0 * radlog10( d ) ) );
}

//==============================================================================
//   encoding functions
//==============================================================================

static U32 calcbitlevels( U16 * levels, S16 * coeffs, U32 buffersize )
{
  U32 i;
  U16* l = levels;

  for( i = 2 ; i < buffersize ; )
  {
    U32 j,lev;

    j = i + VQLENGTH;
    if ( j > buffersize)
      j = buffersize;

    lev = 0;
    for( ; i < j ; i++ )
    {
      U32 cur = getbitlevelvar( radabs( coeffs[ i ] ) );
      if ( cur > lev )
        lev = cur;
    }
    *l++ = (U16) lev;
  }

  return (U32)( l - levels );
}


static void rlebitlevels( U16 * levels, U32 levlen )
{
  U32 lev, len, maxlen, test;
  S32 rle;
  U16 * levs, * olevs;

  maxlen = levlen;
  levs = levels;
  olevs = levels;
  test = 0;
  do
  {
    rle = -1;
    len = 1;
    lev = levs[ 0 ];

    while ( ( len < maxlen ) && ( rle < ( MAXRLE - 1 ) ) )
    {
      U32 nextlen, j;

      nextlen = rlelens[ rle + 1 ];
      if ( nextlen > maxlen )
        nextlen = maxlen;

      for( j = len ; j < nextlen ; j++ )
        if ( levs[ j ] != lev )
          goto endrle;

      len = nextlen;
      ++rle;
    }

endrle:

    *olevs++ = (U16) ( ( rle << 8 ) + lev );
    maxlen -= len;
    levs += len;
  } while ( maxlen );
}


static void encodebitlevels( VARBITS * vb,
                             U16 * levels,
                             S16 * coeffs,
                             U32 buffersize )
{
  // bink audio 1 encoder -- sign bits directly follow the coeff bits
  U32 i;
  U16* l = levels;

  i = 2;

  do
  {
    U32 rle, len, lev;

    lev = *l++;
    rle = lev >> 8;
    lev &= 255;

    if ( rle == 255 )
    {
      VarBitsPuta0( *vb );
      len = VQLENGTH;
    }
    else 
    {
      VarBitsPuta1( *vb );
      VarBitsPut( *vb, rle, RLEBITS );
      len = ( (U32) rlelens[ rle ] ) * VQLENGTH;
    }
    VarBitsPut( *vb, lev, 4);

    if ( len > ( buffersize - i ) )
      len = buffersize - i;

    if ( lev == 0 )
      i += len;
    else
    {
      while ( len-- )
      {
        VarBitsPut( *vb, radabs( coeffs[ i ] ), lev );

        if ( coeffs[ i ] )
          VarBitsPut1( *vb, ( coeffs[ i ] < 0 ) );

        ++i;
      }
    }

  } while ( i < buffersize );
}

static void encodebitlevels2( VARBITS * vb,
                             U16 * levels,
                             S16 * coeffs,
                             U32 buffersize )
{
    // bink audio 2 encoder -- sign bits are grouped after all coeffs in a run.
	U32 i;
	U16* l = levels;

	i = 2;

	do
	{
		U32 rle, len, lev;

		lev = *l++;
		rle = lev >> 8;
		lev &= 255;

		if (rle == 255)
		{
			VarBitsPuta0(*vb);
			len = VQLENGTH;
		}
		else
		{
			VarBitsPuta1(*vb);
			VarBitsPut(*vb, rle, RLEBITS);
			len = ((U32)rlelens[rle]) * VQLENGTH;
		}
		VarBitsPut(*vb, lev, 4);

		if (len > (buffersize - i))
			len = buffersize - i;

        if (lev == 0)
        {
            i += len;
        }
		else
		{
            U32 start = i;
            U32 startlen = len;
            while (len--)
            {
                VarBitsPut(*vb, radabs(coeffs[i]), lev);
                ++i;
            }

            len = startlen;
            i = start;
			while (len--)
			{
				if (coeffs[i])
					VarBitsPut1(*vb, (coeffs[i] < 0));
				++i;
			}
		}

	} while (i < buffersize);
}


static U32 ftofxp( F32 val )
{
  U32 b, v;
  F32 f;
  f = (F32) radfabs( val );
  v = (U32) radfloor(f);
  b = getbitlevelvar( v );

  b |= ( (U32) ( (F64) f * (F64) ( 1 << ( 23 - b ) ) ) ) << 5;

  if ( val < 0 )
    b |= 0x10000000;

  return( b );
}


static AUDIOFLOAT calctonality( U32 transform_size,
                                U32 startband,
                                U32 endband,
                                AUDIOFLOAT* power,
                                AUDIOFLOAT* phase,
                                AUDIOFLOAT* last1pow )
{
  // simple model described in Applications of Digital Signal Processing
  AUDIOFLOAT t, n;
  U32 i;

  AUDIOFLOAT * last1pha = last1pow + ( transform_size / 2 );
  AUDIOFLOAT * last2pow = last1pha + ( transform_size / 2 );
  AUDIOFLOAT * last2pha = last2pow + ( transform_size / 2 );

  t=0;
  n=0;

  for( i = startband ; i < endband ; i++ )
  {
    AUDIOFLOAT m, mp, pp, c, pow, a, b;

    m = (AUDIOFLOAT) radfsqrt( power[ i ] );

    mp = last1pow[ i ] + last1pow[ i ] - last2pow[ i ];
    pp = last1pha[ i ] + last1pha[ i ] - last2pha[ i ];

    last2pow[ i ] = last1pow[ i ];
    last2pha[ i ] = last1pha[ i ];
    last1pow[ i ] = m;
    last1pha[ i ] = phase[ i ];

    a = mp - m;
    b = pp - phase[ i ];

    pow = (AUDIOFLOAT) ( m + radfabs( mp ) );
    if ( pow < 2.5F )
    {
      c = 0.5f;
    }
    else
    {
      c = (AUDIOFLOAT) ( radfsqrt( a*a + b*b ) / pow );
    }

    if ( c > 0.5F )
      c = 0.5F;
    else if ( c < 0.05F)
      c = 0.05F;

    c = (AUDIOFLOAT)( -0.43F * ranged_log_0p05_to_0p5( c ) ) - 0.29F;

    t += c * m;
    n += m;
  }

  if ( n < ( ( (AUDIOFLOAT) ( endband - startband ) ) * 2.5f ) )
    t= 0.0f;
  else
    t = t / n;

  return( t );
}


static void multiply_samples_by_scalar( AUDIOFLOAT * samples,
                                        U32 number,
                                        F32 scalar )
{
  U32 i;
  AUDIOFLOAT * f;

  f = samples;
  for( i = number; i; i-- )
  {
    (*f) *= ( (AUDIOFLOAT) scalar );
    ++f;
  }
}


static void calc_power_phase( AUDIOFLOAT * samples,
                              AUDIOFLOAT * power,
                              AUDIOFLOAT * phase,
                              U32 num )
{
  U32 i;
  AUDIOFLOAT * f;

  f = samples;
  for( i = 0; i < num; i++ )
  {
    power[ i ] = f[ 0 ] * f[ 0 ] + f[ 1 ] * f[ 1 ];
    if ( power[ i ] <= 0.0005F )
    {
      phase[i] = 0.0F;
      power[i] = 0.0005F;
    }
    else
    {
      // imaginary is sign inverted in our fft
      phase[ i ] = (AUDIOFLOAT) radatan2( -f[ 1 ], f[ 0 ] );
    }
    f += 2;
  }
}


static void calc_band_power( AUDIOFLOAT * band_power,
                             AUDIOFLOAT * power,
                             U32 * bands,
                             U32 num_bands )
{
  U32 i;

  for ( i = 0 ; i < num_bands ; i++ )
  {
    U32 j;

    band_power[ i ] = 0.0;

    for( j = bands[ i ] ; j < bands[ i + 1 ] ; j++ )
    {
      band_power[ i ] += power[ j ];
    }
  }
}


static void calc_band_tonality( AUDIOFLOAT * band_tonality,
                                AUDIOFLOAT * power,
                                AUDIOFLOAT * phase,
                                AUDIOFLOAT * last_pow_pha,
                                U32 * bands,
                                U32 num_bands,
                                U32 transform_size )
{
  U32 i;

  for ( i = 0 ; i < num_bands ; i++ )
  {
    U32 j;
    AUDIOFLOAT tone_vs_noise;

    tone_vs_noise = calctonality( transform_size, bands[i], bands[i+1], power, phase, last_pow_pha );

    band_tonality[ i ] = 0.0;

    for( j = bands[ i ] ; j < bands[ i + 1 ] ; j++ )
    {
      band_tonality[ i ] += power[ j ] * tone_vs_noise;
    }
  }
}


static void simulate_spreading( AUDIOFLOAT * out,
                                AUDIOFLOAT * in,
                                U32 num_bands )
{
  U32 i;

  for ( i = 0 ; i < num_bands ; i++)
  {
    out[ i ] = 0.0;
  }

  for ( i = 0 ; i < num_bands ; i++)
  {
    U32 j;

    // spread across the bands
    for ( j = 0 ; j < num_bands; j++ )
    {
      out[ j ] += ( in[ i ] * spreadfactors[ (S32) j - (S32) i + TOTBANDS ] );
    }
  }
}

#define CAREFULBANDS 8

static void calc_thresholds( AUDIOFLOAT * threshold,
                             AUDIOFLOAT lossy,
                             AUDIOFLOAT threshold_curve_adj,
                             AUDIOFLOAT * spread_tonality,
                             AUDIOFLOAT * spread_power,
                             U32 * bands,
                             U32 num_bands )
{
  U32 i;

  for ( i = 0 ; i < num_bands ; i++ )
  {
    AUDIOFLOAT range, db_level;

    // normalize the tonality (non-normalized after spreading)
    if ( spread_power[ i ] <= 0.0005f )
      range = 0.0f;
    else
      range = ( spread_tonality[ i ] / spread_power[ i ] );

    // clamp the tonality to the range
    if ( range < 0.0F )
      range = 0.0F;
    else if ( range > 1.0F)
      range = 1.0F;

    // calculate the masking difference (which is deltaed from the signal strength)
    db_level = ( range * ( 15.0F + (AUDIOFLOAT) i ) ) +
               ( ( 1.0F - range ) * 6.0F );

    // adjust to move directly off the edge of the curve
    db_level += threshold_curve_adj;

    if ( i < CAREFULBANDS )
      db_level += threshold_curve_adj;

    // factor in our lossy level
    db_level -= lossy * 2;
    if ( db_level < 0.0f ) db_level = 0.0f;
    
//    printf( "%i %12.3f %12.3f\n", i, range, db_level );

    threshold[i]=db_level;
  }
}


static void calc_best_quant( U32 * best_qlevel,
                             AUDIOFLOAT * samples,
                             AUDIOFLOAT * threshold,
                             U32 * bands,
                             U32 num_bands )
{
  U32 i;

  for( i = 0 ; i < num_bands ; i++ )
  {
    U32 ll, hl, nl;  
    AUDIOFLOAT best_diff, max_value, band_power, band_thres;
    U32 j;
    AUDIOFLOAT * f;

    best_qlevel[ i ] = 0;

    ll = 0;
    hl = 96;
    nl = 48;
    max_value = 0.0f;
    band_power = 0.0f;
    best_diff = 0.0f;

    f = samples + ( bands[ i ] * 2 );
    for( j = bands[ i ] ; j < bands[ i + 1 ] ; j++ )
    {
      AUDIOFLOAT fa,fb;

      fa = f[ 0 ];
      fb = f[ 1 ];

      band_power += ( ( fa * fa ) + ( fb * fb ) );

      fa = (AUDIOFLOAT) radfabs( fa );
      fb = (AUDIOFLOAT) radfabs( fb );

      if ( max_value < fa ) max_value = fa;
      if ( max_value < fb ) max_value = fb;

      f += 2;
    }

    // find the top end quantization
    for( j = 94; j > 0 ; j-- )
      if ( max_value >= bink_Undecibel_table[ j ] )
        break;
    hl = j + 2;

    nl = ( hl + ll ) / 2;

    // subtract db from the band_power of the 
    band_thres = band_power / Undecibel( threshold[ i ] );
    
    band_thres = band_thres + ( band_thres * 0.0000001f );

    // we're going to binary search for the best quant level
    for(;;)
    {
      AUDIOFLOAT testthr, testthrdiv, tot_diff;
      U32 l;

      l = nl;

      testthr = bink_Undecibel_table[ l ];
      testthrdiv  = (AUDIOFLOAT) ( 1.0 / testthr );

      // add up the error for this threshold
      f = samples + ( bands[ i ] * 2 );
      tot_diff = (AUDIOFLOAT) 0;

      for( j = bands[ i ] ; j < bands[ i + 1 ] ; j++ )
      {
        AUDIOFLOAT temp, samp;

        
        // quantize and then unquantize
        samp = (AUDIOFLOAT) radfabs( f[ 0 ] );
        temp = (AUDIOFLOAT) radfloor( samp * testthrdiv + 0.5f );
        if ( temp <= -32767.0 )
          temp = -32767.0F;
        else if ( temp >= 32767.0 )
          temp = 32767.0F;
        temp *= testthr;

        // take the difference between orig and quant and square the diff
        temp -= samp;
        tot_diff += (AUDIOFLOAT)( temp * temp );


        // quantize and then unquantize
        samp = (AUDIOFLOAT) radfabs( f[ 1 ] );
        temp = (AUDIOFLOAT) radfloor( samp * testthrdiv  + 0.5f );
        if ( temp <= -32767.0 )
          temp = -32767.0F;
        else if ( temp >= 32767.0 )
          temp = 32767.0F;
        temp *= testthr;

        // take the difference between orig and quant and square the diff
        temp -= samp;
        tot_diff += (AUDIOFLOAT)( temp * temp );
        f += 2;
      }

      // the difference less than our threshold?
      if ( tot_diff <= band_thres )
      {
        // is the difference larger than out best difference (but still less than our threshold?)
        if ( tot_diff >= best_diff )
        {
          best_diff = tot_diff;
          best_qlevel[ i ] = l;
        }

        ll = l;
      }
      else
      {
        // move the binary search down
        hl = l;
      }

      nl = ( ( hl + ll ) / 2 );

      if ( nl == l )
        break;

    }

//{ static U32 highest = 0; if (best_qlevel[i]> highest) highest = best_qlevel[i];  printf("%3i %12.3f %12.3f best: %3i (%3i)\n", i, threshold[i], Undecibel_table[ best_qlevel[ i ] ], best_qlevel[ i ], highest ); }
  }
}


static void quantize_fft_samples( S16 * out,
                                  AUDIOFLOAT * samples,
                                  AUDIOFLOAT * threshold_div,
                                  U32 * bands,
                                  U32 num_bands,
                                  AUDIOFLOAT * ath,
                                  AUDIOFLOAT lossy )
{
  U32 i;
  AUDIOFLOAT * f;
  S16 * op;

  f = samples;
  op = out;

  for ( i = 0; i < num_bands ; i++ )
  {
    U32 j;

    for ( j = ( bands[ i ] * 2 ) ; j < ( bands[ i + 1 ] * 2 ) ; j++ )
    {
      AUDIOFLOAT temp;

      temp = ath[ j ];

      if ( i >= CAREFULBANDS )
        temp += lossy * 10.0f;

      temp = (AUDIOFLOAT) Undecibel( temp );

      if ( ( f[ 0 ] * f[ 0 ] ) < temp )
        *op++ = 0;
      else
      {
        AUDIOFLOAT samp;

        samp = f[ 0 ];
        temp = (AUDIOFLOAT) radfloor( radfabs( samp ) * threshold_div[ i ] + 0.5f );
        if ( temp > 32767 ) temp = 32767;
        if ( samp < 0.0f ) temp = -temp;
        
        *op++ = (S16)temp;
      }

      ++f;
    }
  }
}


static void do_perceptual( AUDIOFLOAT * threshold,
                           AUDIOFLOAT lossy,
                           AUDIOFLOAT threshold_curve_adj,
                           AUDIOFLOAT * samples,
                           U32 transform_size,
                           AUDIOFLOAT * last_pow_pha,
                           U32 * bands,
                           U32 num_bands )
{
  #ifdef DEBUGSTACKVARS

  #ifdef __RADFINAL__
  #error "You have debug stack turned on!"
  #endif
  AUDIOFLOAT * power;
  AUDIOFLOAT * phase;
  AUDIOFLOAT * band_power;
  AUDIOFLOAT * band_tonality;
  AUDIOFLOAT * spread_power;
  AUDIOFLOAT * spread_tonality;
  char pmbuf[ PushMallocBytesForXPtrs( 8 ) ];
  
  pushmallocinit( pmbuf, 8 );
  pushmalloc( pmbuf, &power, 4 * ( transform_size / 2 ) );
  pushmalloc( pmbuf, &phase, 4 * ( transform_size / 2 ) );
  pushmalloc( pmbuf, &band_power, 4 * num_bands );
  pushmalloc( pmbuf, &band_tonality, 4 * num_bands );
  pushmalloc( pmbuf, &spread_power, 4 * num_bands );
  spread_tonality = popmalloc( pmbuf, 4 * num_bands );

  #else

  AUDIOFLOAT power[ MAXBUFFERSIZE ];
  AUDIOFLOAT phase[ MAXBUFFERSIZE ];
  AUDIOFLOAT band_power[ TOTBANDS ];
  AUDIOFLOAT band_tonality[ TOTBANDS ];
  AUDIOFLOAT spread_power[ TOTBANDS ];
  AUDIOFLOAT spread_tonality[ TOTBANDS ];

  #endif

  rrassert( MAXBUFFERSIZE >= ( transform_size / 2 ) );
  rrassert( TOTBANDS >= num_bands );


  // since we separately send these values, clear them out so they don't effect anything
  samples[ 0 ] = 1.0f;
  samples[ 1 ] = 0.0f;
  power[ 0 ] = 1.0F;
  phase[ 0 ] = 0.0F;

  // calculate the power and phase for each sample
  calc_power_phase( samples + 2, power + 1, phase + 1, ( transform_size / 2 ) - 1 ); // minus 1, since we do the first one by hand

  // calculate the total power for each band
  calc_band_power( band_power, power, bands, num_bands );

  // calculate the tonality each band
  calc_band_tonality( band_tonality, power, phase, last_pow_pha, bands, num_bands, transform_size );

  // Simulate the spreading activation of sound in the ear
  simulate_spreading( spread_power, band_power, num_bands );
  simulate_spreading( spread_tonality, band_tonality, num_bands );

  // Generate masking threshold from spread spectral information
  calc_thresholds( threshold, lossy, threshold_curve_adj, spread_tonality, spread_power, bands, num_bands );

  #ifdef DEBUGSTACKVARS
    popfree( spread_tonality );
  #endif
}


static void clamp_best_quant_to_ath( U32 * best_qlevel, AUDIOFLOAT * band_ath, U32 num_bands, AUDIOFLOAT lossy )
{
  U32 i;

  for( i = 0 ; i < num_bands ; i++ )
  {
    AUDIOFLOAT band_db, ath;

    ath = band_ath[ i ] + lossy;

    band_db = ( (AUDIOFLOAT) (U8) best_qlevel[ i ] ) * 0.664F;

    if ( ath > band_db )
    {
      best_qlevel[ i ] = (U32) ( ( ath + 0.3319F ) / 0.664F );
    }
    
    if (best_qlevel[i] > 95)
        best_qlevel[i] = 95;
  }
}


static void encode_one_channel( U32 transform_size,
                                VARBITS * vb,
                                AUDIOFLOAT * samples,
                                U32 * best_qlevel,
                                U32 num_bands,
                                U32 * bands,
                                AUDIOFLOAT * ath,
                                F32 lossy,
                                U32 is_ba2)
{
  U32 i;
  #ifdef DEBUGSTACKVARS
  S16 * coeffs;
  U16 * levels;
  AUDIOFLOAT * threshold_mult;
  char pmbuf[ PushMallocBytesForXPtrs( 8 ) ];
  
  pushmallocinit( pmbuf, 8 );
  pushmalloc( pmbuf, &threshold_mult, 4 * num_bands );
  pushmalloc( pmbuf, &coeffs, 2 * transform_size );
  levels = popmalloc( pmbuf, 2 * ( ( transform_size + VQLENGTH ) / VQLENGTH ) );

  #else

  AUDIOFLOAT threshold_mult[ TOTBANDS ];
  S16 coeffs[ MAXBUFFERSIZE ];
  U16 levels[ ( ( MAXBUFFERSIZE + VQLENGTH ) / VQLENGTH ) ];

  #endif

  rrassert( TOTBANDS >= num_bands );
  rrassert( ( MAXBUFFERSIZE ) >= transform_size );

  // unquant the threshold for compression
  for ( i = 0 ; i < num_bands ; i++ )
  {
    threshold_mult[ i ] = ( 1.0F / bink_Undecibel_table[ best_qlevel[ i ] ] );
  }

  // quantize the samples using the levels we found
  quantize_fft_samples( coeffs, samples, threshold_mult, bands, num_bands, ath, lossy );

  //calculate all of the VQ lengths
  i = calcbitlevels( levels, coeffs, transform_size );

  //rle the bit levels
  rlebitlevels( levels, i );

  //now encode the bits
  if (is_ba2)
    encodebitlevels2(vb, levels, coeffs, transform_size);
  else
    encodebitlevels( vb, levels, coeffs, transform_size );

  #ifdef DEBUGSTACKVARS
  popfree( levels );
  #endif
}


static void load_samples( AUDIOFLOAT* samples,
                          S16* in_data,
                          U32 in_bytes,
                          U32 chan,
                          U32 chans,
                          U32 transform_size )
{
  AUDIOFLOAT * f;
  U32 num_samps;
  S16* id;
  AUDIOFLOAT temp;
  U32 i;

  num_samps = ( in_bytes / 2 ) / chans;

  f = samples;
  id = in_data + chan;
  for ( i = num_samps; i ; i-- )
  {
    *f++ = *id;
    id += chans;
  }

  // pad the out the buffer (duplicate the final sample)
  temp = f[ -1 ];
  for ( i = num_samps ; i < transform_size ; i++ )
  {
    *f++ = temp;
  }
}


static F32 simple_ease( F32 in ) // takes linear 0 to 1, returns smooth curve
{
  F32 sqr = in * in;

  // 3x^2 - 2x^3
  return( ( 3.0f * sqr ) - ( 2.0f * sqr * in ) );
}


static void ramp_samples( AUDIOFLOAT * samples, F32 start_weight, F32 end_weight, U32 num )
{
  U32 i;

  for( i = 0 ; i < num ; i++ )
  {
    samples[ i ] = samples[ i ] *
                   simple_ease( ( ( start_weight * (AUDIOFLOAT) ( num - i ) ) +
                                  ( end_weight * (AUDIOFLOAT) i )
                                ) / (AUDIOFLOAT) num );

    if ( ( samples[ i ] >= -0.001 ) || ( samples[ i ] <= 0.001 ) )
      samples[ i ] = 0;
  }
}


//encode the data into an output buffer and return the length (in bytes)
static U32 Percept( U32 transform_size,
                    F32 transform_size_root,
                    U32 chans,
                    U32 flags,
                    AUDIOFLOAT * last_pow_pha,
                    U32 lossy_level,
                    F32 threshold_curve_adj,
                    void * buf,
                    S16 * in_data,
                    U32 in_bytes,
                    U32 num_bands,
                    U32 * bands,
                    AUDIOFLOAT * band_ath,
                    AUDIOFLOAT * ath,
                    AUDIOFLOAT * renorm )
{
  VARBITS vb;
  U32 i;
  AUDIOFLOAT lossy;

  #ifdef DEBUGSTACKVARS

  U32 * best_qlevel;
  AUDIOFLOAT * samples;
  AUDIOFLOAT * threshold;
  char pmbuf[ PushMallocBytesForXPtrs( 8 ) ];
  
  pushmallocinit( pmbuf, 8 );
  pushmalloc( pmbuf, &best_qlevel, 4 * TOTBANDS );
  pushmalloc( pmbuf, &threshold, 4 * TOTBANDS );
  samples = popmalloc( pmbuf, 4 * transform_size * chans );

  #else

  U32 best_qlevel[ TOTBANDS ];
  AUDIOFLOAT threshold[ TOTBANDS ];
  RAD_ALIGN( AUDIOFLOAT, samples[ MAXBUFFERSIZE * MAXCHANNELS ], RADFFT_ALIGN );
  RAD_ALIGN( AUDIOFLOAT, loadbuf[ MAXBUFFERSIZE ], RADFFT_ALIGN );

  #endif

  rrassert( ( transform_size * chans ) <= ( MAXBUFFERSIZE * MAXCHANNELS ) );

  lossy = (AUDIOFLOAT)lossy_level;
  
  VarBitsOpen( vb, buf );

  //if ( flags & BINKACNEWFORMAT )
  {
    VarBitsPut( vb, 0, 2 );
  }

  for ( i = 0 ; i < chans ; i++ )
  {
    U32 t;

    // Load up the data structure with the samples
    load_samples( loadbuf,
                  in_data,
                  in_bytes,
                  i,
                  chans,
                  transform_size );

    // round the samples on the ends
    ramp_samples( loadbuf, 0.0f, 1.0f, ( transform_size / ( WINDOWRATIO * 2 ) ) );
    ramp_samples( loadbuf + transform_size - ( transform_size / ( WINDOWRATIO * 2 ) ),
                  1.0f, 0.0f, ( transform_size / ( WINDOWRATIO * 2 ) ) );

    // do the fft
    radfft_rfft( (rfft_complex*)samples, loadbuf, transform_size );

    // Normalize the coeffs
    multiply_samples_by_scalar( samples, transform_size, transform_size_root );

    // Do perceptual model
    do_perceptual( threshold, lossy, threshold_curve_adj, samples, transform_size, last_pow_pha, bands, num_bands );

    // Load up the data structure with the samples (again)
    load_samples( loadbuf,
                  in_data,
                  in_bytes,
                  i,
                  chans,
                  transform_size );

    // do the dct
    //if ( flags & BINKACNEWFORMAT )
    {
      radfft_dct( samples, loadbuf, transform_size );
      samples[ 0 ] *= 0.5F;
    }

    // Normalize the coeffs
    multiply_samples_by_scalar( samples, transform_size, transform_size_root );

    // figure out the best threshold for each band
    calc_best_quant( best_qlevel, samples, threshold, bands, num_bands );

    // check any band level is below the band's ath and crank if up if necessary
    clamp_best_quant_to_ath( best_qlevel, band_ath, num_bands, lossy );


    // dump out the DC component and the Nyquist frequency in high resolution
    t = ftofxp( samples[ 0 ] );
    VarBitsPut( vb, t, FXPBITS );
    t = ftofxp( samples[ 1 ] );
    VarBitsPut( vb, t, FXPBITS );

    // output the thresholds
    for ( t = 0 ; t < num_bands ; t++ )
    {
      if (flags & BINKAC20)
      {
        U8 qlevel = (U8)best_qlevel[t];
        if (qlevel > 95)
            qlevel = 95;
        VarBitsPut(vb, qlevel, 7);
      }
      else
      {
        VarBitsPut(vb, (U8)best_qlevel[t], 8);
      }
    }

    // encode a channel
    encode_one_channel( transform_size,
                        &vb,
                        samples,
                        best_qlevel,
                        num_bands,
                        bands,
                        ath,
                        lossy,
                        flags & BINKAC20);
  }

  #ifdef DEBUGSTACKVARS
  popfree( samples );
  #endif

  VarBitsPutAlign( vb );
  return( ( VarBitsSize( vb ) / 8 ) );
}

RADDEFFUNC HBINKAUDIOCOMP RADLINK BinkAudioCompressOpen( U32 rate, U32 chans, U32 flags, BinkAudioCompressAllocFnType* memalloc, BinkAudioCompressFreeFnType* memfree)
{
  U32 i, j;
  U32 transform_size, transform_size_half, buffer_size;
  U32 num_bands;
  S32 nyq;
  AUDIOFLOAT adj_for_old;
  AUDIOFLOAT threshold_curve_adj;

  HBINKAUDIOCOMP ba;

  if ( rate >= 44100 )
    transform_size = 2048;
  else if ( rate >= 22050 )
    transform_size = 1024;
  else
    transform_size = 512;

  // in bytes
  buffer_size = transform_size * chans * 2;

  // by default, no adjustment for old audio codec
  adj_for_old = 1.0f;
  threshold_curve_adj = 12.0f;

  transform_size_half = transform_size / 2;
  nyq = ( rate + 1 ) / 2;

  // calculate the number of bands we'll use
  for( i = 0 ; i < TOTBANDS ; i++ )
  {
    if ( bandtopfreq[ i ] >= (U32) nyq )
      break;
  }
  num_bands = i;

  // allocate our memory
  {
    U32 * bands_ptr;
    AUDIOFLOAT * renorm_ptr;
    AUDIOFLOAT * ath_ptr;
    AUDIOFLOAT * band_ath_ptr;
    AUDIOFLOAT * last_pow_pha_ptr;
    S16 * in_ptr;
    S16* out_ptr;
    char pmbuf[ PushMallocBytesForXPtrs( 16 ) ];
  
    pushmallocinit( pmbuf, 16 );
    pushmalloc( pmbuf, &bands_ptr, 4 * ( num_bands + 1 ) );
    pushmalloc( pmbuf, &renorm_ptr, 4 * num_bands );
    pushmalloc( pmbuf, &last_pow_pha_ptr, 4 * 2 * 2 * chans * transform_size_half ); // 2 for power/phase, the 2 for two frames worth
    pushmalloc( pmbuf, &in_ptr, buffer_size );
    pushmalloc( pmbuf, &ath_ptr, 4 * transform_size );
    pushmalloc( pmbuf, &band_ath_ptr, 4 * num_bands );
    pushmalloc( pmbuf, &out_ptr, buffer_size + ( buffer_size / 2 ) );

    ba = (HBINKAUDIOCOMP) popmalloc( pmbuf, sizeof( BINKAUDIOCOMP ), memalloc );
    if ( ba == 0)
      return( 0 );

    rrmemsetzero( ba, sizeof( BINKAUDIOCOMP ) );

    ba->bands = bands_ptr;
    ba->renorm = renorm_ptr;
    ba->last_pow_pha = last_pow_pha_ptr;
    memset(ba->last_pow_pha, 0, 4 * 2 * 2 * chans * transform_size_half);
    ba->inp = in_ptr;
    ba->outp = out_ptr;
    ba->ath = ath_ptr;
    ba->band_ath = band_ath_ptr;
    ba->memalloc = memalloc;
    ba->memfree = memfree;

    radfft_init();
  }


  ba->flags = flags;
  ba->chans = chans;
  ba->transform_size = transform_size;

  ba->buffer_size = buffer_size;
  ba->window_size = buffer_size / WINDOWRATIO;

  ba->num_bands = num_bands;

  ba->transform_size_root = ( 1.0F / ( (AUDIOFLOAT) radfsqrt( ba->transform_size ) ) );

  // calculate the band ranges
  for( i = 0 ; i < num_bands ; i++ )
  {
    ba->bands[ i ] = ( bandtopfreq[ i ] * transform_size_half ) / nyq;
  }
  ba->bands[ i ] = transform_size_half;

  rrmemsetzero( ba->renorm, ba->num_bands * sizeof( ba->renorm[ 0 ] ) );
  for( i = 0 ; i < ba->num_bands ; i++ )
  {
    for( j = 0 ; j < ba->num_bands ; j++ )
      ba->renorm[ j ] += (AUDIOFLOAT) spreadfactors[ (S32) j - (S32) i + TOTBANDS ];
  }

  for( i = 0 ; i < ba->num_bands ; i++ )
  {
    ba->renorm[ i ] = (AUDIOFLOAT) ( 1.0 / (F64) ba->renorm[ i ] );
  }

  // initialize the ath
  for( i = 0 ; i < transform_size ; i++ )
  {
    AUDIOFLOAT ath, f;

    f = ( (AUDIOFLOAT) mult64anddiv( i, nyq, transform_size ) ) / 1000.0f;

    f = f * adj_for_old;

    ath = 3.64f * (AUDIOFLOAT) radpow( f, -0.8 );
    ath += ( -6.5f * (AUDIOFLOAT) radexp( -0.6f * ( f - 3.3f ) * ( f - 3.3f ) ) );
    ath += ( 0.001f * f * f * f * f );

    ba->ath[ i ] = ath;
  }

  // get the lowest point on the curve for each band
  for( i = 0 ; i < ba->num_bands ; i++ )
  {
    AUDIOFLOAT low = 1000.0f;

    for( j = ( ba->bands[ i ] * 2 ) ; j < ( ba->bands[ i + 1 ] * 2 ) ; j++ )
    {
      AUDIOFLOAT ath, f;

      f = ( (AUDIOFLOAT) mult64anddiv( j, nyq, transform_size ) ) / 1000.0f;

      ath = ba->ath[ j ];

      if ( ath < low )
        low = ath;
    }

    ba->band_ath[ i ] = low;
  }

  ba->threshold_curve_adj = threshold_curve_adj;

  ba->start_frame = 1;

  return( ba );
}


RADDEFFUNC void RADLINK BinkAudioCompressLock( HBINKAUDIOCOMP ba,
                                               void**ptr,
                                               U32*len )
{
  if ( ba->start_frame )
  {
    if ( ptr )
      *ptr = ba->inp;
    if ( len )
      *len = ba->buffer_size;
  }
  else
  {
    if ( ptr )
      *ptr = ( (U8*) ba->inp ) + ba->window_size;
    if ( len )
      *len = ( ba->buffer_size - ba->window_size );
  }
}


RADDEFFUNC void RADLINK BinkAudioCompressUnlock( HBINKAUDIOCOMP ba,
                                                 U32 lossylevel,
                                                 U32 filled,
                                                 void** output,
                                                 U32* outbytes,
                                                 U32* uncompressedbytesused )
{
  U32 in_bytes, out_bytes, used_bytes;

  ba->got += filled;

  in_bytes = filled;

  if ( ba->start_frame )
    ba->start_frame = 0;
  else
    in_bytes += ba->window_size;

  out_bytes = Percept( ba->transform_size,
                       ba->transform_size_root,
                       ba->chans,
                       ba->flags,
                       ba->last_pow_pha,
                       lossylevel,
                       ba->threshold_curve_adj,
                       ba->outp,
                       ba->inp,
                       in_bytes,
                       ba->num_bands,
                       ba->bands,
                       ba->band_ath,
                       ba->ath,
                       ba->renorm );

  // Store end of buffer
  rrmemmovebig( ba->inp,
                ( (U8*) ba->inp ) + ( ba->buffer_size - ba->window_size ),
                ba->window_size );

  // set the output values
  if ( output )
    *output = ba->outp;
  if ( outbytes )
    *outbytes = out_bytes;

  rrassert( out_bytes <= (ba->buffer_size +(ba->buffer_size/2)) );

  // calculate how many bytes of audio we used this call
  used_bytes = ba->buffer_size - ba->window_size;
  if ( ( used_bytes + ba->given ) > ba->got )
    used_bytes = ba->got - ba->given;
  ba->given += used_bytes;

  if ( uncompressedbytesused )
    *uncompressedbytesused = used_bytes;
}


RADDEFFUNC void RADLINK BinkAudioCompressClose(HBINKAUDIOCOMP ba)
{
  popfree( ba, ba->memfree );
}