2902 lines
112 KiB
C
2902 lines
112 KiB
C
// Copyright Epic Games Tools, LLC. All Rights Reserved.
|
|
////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// RADaudio is a new audio codec made by Epic Game Tools for use in games,
|
|
// optimized for fast SIMD decoding and decent quality (roughly similar to
|
|
// Vorbis).
|
|
//
|
|
// It is a classical MDCT-based codec with two block sizes, and it uses
|
|
// the Oodle Data huffman entropy coder to store data.
|
|
|
|
#include <stdio.h>
|
|
|
|
#define ENCODER_VERSION 0x00000000 // unstable debug version
|
|
//#define ENCODER_VERSION 0x01000001 // use this for first released version
|
|
#define HUFFMAN_ENCODE // get Huffman encode tables
|
|
|
|
#include <math.h>
|
|
#include <stdlib.h>
|
|
#include "radaudio_encoder.h"
|
|
#include "radaudio_encoder_internal.h"
|
|
#include "radaudio_encoder_sse.h"
|
|
#include "radaudio_encoder_neon.h"
|
|
#include "radaudio_common.h"
|
|
#include "radaudio_mdct.h"
|
|
#include "rrCore.h"
|
|
#include "radaudio_common.inl"
|
|
|
|
|
|
RR_COMPILER_ASSERT(sizeof(radaudio_encoder_state) <= sizeof(radaudio_encoder));
|
|
RR_COMPILER_ASSERT(sizeof(radaudio_stream_header) <= RADAUDIO_STREAM_HEADER_MAX);
|
|
|
|
#ifdef RADAUDIO_DEVELOPMENT
|
|
#define PROFILE_ZONES() \
|
|
PROF(encode_all) \
|
|
PROF(coefficients_sum) \
|
|
PROF(coefficients_1) \
|
|
PROF(coefficients_2) \
|
|
PROF(coefficients_3) \
|
|
PROF(coefficients_4) \
|
|
PROF(coefficients_5) \
|
|
PROF(coefficients_n8) \
|
|
PROF(coefficients_n16) \
|
|
PROF(coefficients_normalize)\
|
|
PROF(bands) \
|
|
PROF(huffman) \
|
|
PROF(analysis1) \
|
|
PROF(mdct) \
|
|
PROF(window) \
|
|
PROF(subbands) \
|
|
PROF(varbits) \
|
|
PROF(transient_analysis) \
|
|
PROF(check_mono) \
|
|
PROF(total_count)
|
|
|
|
enum
|
|
{
|
|
#define PROF(x) PROF_##x,
|
|
PROFILE_ZONES()
|
|
#undef PROF
|
|
|
|
PROF__end
|
|
};
|
|
|
|
#define PROF_BEGIN(var) es->profile_times[PROF_##var] -= rrGetTicks()
|
|
#define PROF_END(var) es->profile_times[PROF_##var] += rrGetTicks()
|
|
#else
|
|
#define PROF_BEGIN(var)
|
|
#define PROF_END(var)
|
|
#endif
|
|
|
|
typedef struct
|
|
{
|
|
U8 *bitstream; // dynamic array
|
|
U32 pending_bits;
|
|
int pending_bitcount;
|
|
int capacity;
|
|
int length;
|
|
int error;
|
|
} radaudio_bit_encoder;
|
|
|
|
typedef struct
|
|
{
|
|
radaudio_bit_encoder stream[3]; // HUFF3 encoder
|
|
size_t total_bits;
|
|
} huff3_encoder;
|
|
|
|
static void encode_vbstream_init(radaudio_bit_encoder *e, U8 *buffer, int capacity)
|
|
{
|
|
e->capacity = capacity;
|
|
e->bitstream = buffer;
|
|
e->pending_bits = 0;
|
|
e->pending_bitcount = 0;
|
|
e->length = 0;
|
|
e->error = 0;
|
|
}
|
|
|
|
static void encode_vbstream_bits(radaudio_bit_encoder *e, U32 bits, int bitlength)
|
|
{
|
|
rrAssert(bitlength < 32);
|
|
if (e->pending_bitcount+bitlength >= 32) {
|
|
// at most 4 bytes output at once
|
|
if (e->length + 4 > e->capacity) {
|
|
e->error = 1;
|
|
return;
|
|
}
|
|
while (e->pending_bitcount >= 8) {
|
|
e->bitstream[e->length++] = (U8) (e->pending_bits & 255);
|
|
e->pending_bits >>= 8;
|
|
e->pending_bitcount -= 8;
|
|
}
|
|
}
|
|
bits &= ((1 << bitlength) - 1);
|
|
e->pending_bits |= bits << e->pending_bitcount;
|
|
e->pending_bitcount += bitlength;
|
|
}
|
|
|
|
static void encode_vbstream_huff(radaudio_bit_encoder *e, radaudio_huffman *h, U8 symbol)
|
|
{
|
|
rrAssert(h->encode[symbol].length > 0);
|
|
encode_vbstream_bits(e, h->encode[symbol].code, h->encode[symbol].length);
|
|
}
|
|
|
|
static void encode_vbstream_flush(radaudio_bit_encoder *e)
|
|
{
|
|
if (e->length + 4 > e->capacity) {
|
|
e->error = 1;
|
|
return;
|
|
}
|
|
while (e->pending_bitcount > 0) {
|
|
e->bitstream[e->length++] = (U8) (e->pending_bits & 255);
|
|
e->pending_bits >>= 8;
|
|
e->pending_bitcount -= 8;
|
|
}
|
|
e->pending_bitcount = 0;
|
|
e->pending_bits = 0;
|
|
}
|
|
|
|
static void encode_bits(huff3_encoder *es, U32 bits, int bitlength)
|
|
{
|
|
// putting extra bits in stream[0] increases the offset of stream[2]
|
|
// putting extra bits in stream[1] interacts with bit reversal
|
|
// so stream[2] is the best place to put non-huffman data, although we could also multi-stream it
|
|
encode_vbstream_bits(&es->stream[2], bits, bitlength);
|
|
es->total_bits += bitlength;
|
|
}
|
|
|
|
static void encode_huff(huff3_encoder *es, radaudio_huffman *h, U8 symbol, unsigned int stream_index)
|
|
{
|
|
rrAssert(stream_index >= 0 && stream_index < 3);
|
|
encode_vbstream_huff(&es->stream[stream_index], h, symbol);
|
|
}
|
|
|
|
static void encode_recompute_stats(huff3_encoder *es)
|
|
{
|
|
es->total_bits =
|
|
8 * (es->stream[0].length + es->stream[1].length + es->stream[2].length )
|
|
+ (es->stream[0].pending_bitcount + es->stream[1].pending_bitcount + es->stream[2].pending_bitcount);
|
|
}
|
|
|
|
static void encode_huff_array(huff3_encoder *es, radaudio_huffman *huff, U8 *values, int num_values, char *file, int line, int *error)
|
|
{
|
|
for (int i=0; i < num_values; ++i) {
|
|
if (huff->encode[values[i]].length == 0)
|
|
*error = 1;
|
|
else
|
|
encode_huff(es, huff, values[i], (unsigned int) i % 3);
|
|
}
|
|
encode_recompute_stats(es);
|
|
}
|
|
|
|
static float linear_remap(float x, float a, float b, float c, float d)
|
|
{
|
|
return (x-a)/(b-a)*(d-c)+c;
|
|
}
|
|
|
|
// original was 140. @TODO: optimize for different sample rates and bit rates; no decoder change needed
|
|
#define TRANSIENT_THRESHOLD 180
|
|
|
|
static int silence_exp_start = -11;
|
|
static float silence_exp_value = 0.75f;
|
|
|
|
static float inverse_approx_pow_2(float x)
|
|
{
|
|
// solve x = 0.345*y^2 + 0.655*y + 1
|
|
float A = 0.34375f, B = 0.65625f, C = 1-x;
|
|
float B2 = 0.4306640625f; // B*B, 441/1024
|
|
float discr = B2 - (4*A)*C;
|
|
float d = sqrtf(discr);
|
|
float y = (-B + d) / (2*A);
|
|
rrAssert(x >= 1 && x <= 2);
|
|
return y;
|
|
}
|
|
|
|
static void mdct_block(radaudio_encoder_state *es, float *coeff, int is_short_block, float *samples, int offset, int len, int prev_short, int next_short, int stride, rrbool average, radaudio_encode_info *info, int pad_channel)
|
|
{
|
|
PROF_BEGIN(window);
|
|
int i;
|
|
static int block_length[2] = { RADAUDIO_LONG_BLOCK_LEN, RADAUDIO_SHORT_BLOCK_LEN };
|
|
FFT_ALIGN(F32, data[MAX_MDCT_SAMPLES]);
|
|
FFT_ALIGN(F32, workspace[MAX_MDCT_SAMPLES]);
|
|
F32 *window;
|
|
|
|
// the window type is the shortest of the two blocks
|
|
int windowleft = RR_MAX(is_short_block, prev_short);
|
|
int windowright = RR_MAX(is_short_block, next_short);
|
|
int n = block_length[is_short_block];
|
|
int wlen;
|
|
|
|
int raw_start = offset - n;
|
|
int raw_end = offset + n;
|
|
|
|
// clamp range to sample, which is defined from 0..len
|
|
int clamped_start = RR_MAX(raw_start, 0);
|
|
int clamped_end = RR_MIN(raw_end , len);
|
|
|
|
// convert into space where 0 = start of window, i.e. raw_start maps to 0
|
|
int convert_offset = raw_start;
|
|
raw_start -= convert_offset;
|
|
raw_end -= convert_offset;
|
|
clamped_start -= convert_offset;
|
|
clamped_end -= convert_offset;
|
|
|
|
rrAssert(raw_start == 0);
|
|
rrAssert(raw_end == 2*n);
|
|
raw_start = 0;
|
|
raw_end = 2*n;
|
|
|
|
if (clamped_start > raw_start) {
|
|
if (info->padding_len > 0) {
|
|
// this could be turned into two loops, one for the padded samples and one for the 0s,
|
|
// but since it only happens in one block at start of file, let's just leave it clearer here
|
|
for (i=raw_start ; i < clamped_start; ++i) {
|
|
// map back into original sample space:
|
|
// if i= -convert_offset, then sample_pos is 0
|
|
int sample_pos = i+convert_offset;
|
|
// now sample_pos from -padding_len to -1 maps to 0..padlen-1 in padding
|
|
int padding_pos = sample_pos + (int) info->padding_len;
|
|
if (padding_pos >= 0 && padding_pos < (int) info->padding_len)
|
|
data[i] = info->padding[padding_pos*stride+pad_channel];
|
|
else
|
|
data[i] = 0;
|
|
}
|
|
} else {
|
|
for (i=raw_start ; i < clamped_start; ++i)
|
|
data[i] = 0;
|
|
}
|
|
}
|
|
if (average)
|
|
for (i=clamped_start; i < clamped_end ; ++i) data[i] = (samples[(offset-n+i)*stride] + samples[(offset-n+i)*stride]) * 0.5f;
|
|
else
|
|
for (i=clamped_start; i < clamped_end ; ++i) data[i] = samples[(offset-n+i)*stride];
|
|
|
|
#if 0
|
|
if (clamped_start > raw_start) {
|
|
for (i=0; i < raw_end; ++i) {
|
|
printf("%4d %d\n", i, (int) (data[i]*32767));
|
|
}
|
|
printf("seamless fix for channel %d ends at %d\n", pad_channel, clamped_start);
|
|
}
|
|
#endif
|
|
|
|
if (clamped_end < raw_end) {
|
|
if (info->padding_len > 0) {
|
|
for (i=clamped_end; i < raw_end; ++i) {
|
|
// map back into original sample space:
|
|
// if i=raw_end, then sample_pos is len
|
|
rrAssert(clamped_end + convert_offset == len);
|
|
int sample_pos = i+convert_offset;
|
|
// now sample_pos from len..len+padlen-1 maps to 0..padlen-1 in padding
|
|
int padding_pos = sample_pos - len;
|
|
if (padding_pos >= 0 && padding_pos < (int) info->padding_len)
|
|
data[i] = info->padding[padding_pos*stride+pad_channel];
|
|
else
|
|
data[i] = 0;
|
|
}
|
|
#if 0
|
|
for (i=0; i < raw_end; ++i) {
|
|
printf("%4d %d\n", i, (int) (data[i]*32767));
|
|
}
|
|
printf("seamless fix for channel %d starts at %d\n", pad_channel, clamped_end);
|
|
#endif
|
|
} else {
|
|
for (i=clamped_end ; i < raw_end ; ++i)
|
|
data[i] = 0;
|
|
}
|
|
}
|
|
|
|
// apply left window
|
|
wlen = block_length[windowleft];
|
|
window = radaudio_windows[windowleft];
|
|
if (wlen < n) {
|
|
// long block with short window
|
|
int wstart = RADAUDIO_LONG_BLOCK_LEN/2 - RADAUDIO_SHORT_BLOCK_LEN/2;
|
|
for (i=0; i < wstart; ++i)
|
|
data[i] = 0;
|
|
for (i=0; i < RADAUDIO_SHORT_BLOCK_LEN; ++i)
|
|
data[wstart+i] *= window[i];
|
|
} else {
|
|
for (i=0; i < wlen; ++i)
|
|
data[i] *= window[i];
|
|
}
|
|
|
|
// apply right window
|
|
wlen = block_length[windowright];
|
|
window = radaudio_windows[windowright];
|
|
if (wlen < n) {
|
|
// long block with short window
|
|
int wstart = RADAUDIO_LONG_BLOCK_LEN/2 - RADAUDIO_SHORT_BLOCK_LEN/2;
|
|
for (i=0; i < RADAUDIO_SHORT_BLOCK_LEN; ++i)
|
|
data[n+wstart+i] *= window[wlen-1-i];
|
|
for (i=n-wstart; i < n; ++i)
|
|
data[n+i] = 0;
|
|
} else {
|
|
for (i=0; i < wlen; ++i)
|
|
data[n+i] *= window[wlen-1-i];
|
|
}
|
|
PROF_END(window);
|
|
|
|
PROF_BEGIN(mdct);
|
|
radaudio_mdct_fft(es->cpu, coeff, (size_t) n, data, data+n, workspace);
|
|
PROF_END(mdct);
|
|
}
|
|
|
|
// computing error betweeen two vectors that are normalized to have L2-norm of 1.0,
|
|
// without explicitly computing normalized values
|
|
//
|
|
// Square error is: sum[(normalizedq[i] - desired[i])^2]
|
|
// = sum[(normalizedq[i]^2 - 2*normalizedq[i]*desired[i] + desired[i]^2)]
|
|
// = sum[ normalizedq[i]^2] - 2*sum[normalizedq[i]*desired[i]] + sum[desired[i]^2]
|
|
// = 1 - 2*normalize_scale * sum[unnormalizedq*desired] + 1
|
|
// = 2 - 2*normalize_scale * sum[unnormalizedq*desired]
|
|
// normalize_scale = 1/sqrt(sum(unnormalizedq^2))
|
|
|
|
static float compute_quantized_coefficients(S16 quantized_coeff[], F32 normalized_coeff[], int num_coeff, F32 quantizer)
|
|
{
|
|
float cross_sum=0;
|
|
float unnorm_sum2=0;
|
|
for (int k=0; k < num_coeff; ++k) {
|
|
float unquantized = (normalized_coeff[k] * quantizer);
|
|
int quantized = (int) floor(unquantized+0.5);
|
|
quantized_coeff[k] = (S16) quantized;
|
|
cross_sum += quantized * normalized_coeff[k];
|
|
unnorm_sum2 += (F32) quantized * quantized;
|
|
}
|
|
F32 normalize_scale = 1.0f / sqrtf(unnorm_sum2);
|
|
return 2 - 2 * normalize_scale * cross_sum;
|
|
}
|
|
|
|
// iterate through a range of quantizers and pick the one with the
|
|
// smallest error; all of them will in theory be smaller than the
|
|
// "ideal" quantizer, so we don't bother trading off size vs error,
|
|
// we just accept the best.
|
|
static void compute_best_quantized_coeff8_loop(radaudio_encoder_state *es, S16 best_quantized[], F32 best_so_far, F32 ncoeff[], F32 quantizer, F32 step_quantizer, int num_quantizers)
|
|
{
|
|
RR_UNUSED_VARIABLE(es);
|
|
int n = 8;
|
|
for (int q=0; q < num_quantizers; ++q, quantizer += step_quantizer) {
|
|
S16 quantized_attempt[8];
|
|
|
|
F32 dot_product=0;
|
|
F32 unnorm_sum2=0;
|
|
for (int z=0; z < n; ++z) {
|
|
float unquantized = (ncoeff[z] * quantizer);
|
|
int quantized = (int) floor(unquantized+0.5f);
|
|
quantized_attempt[z] = (S16) quantized;
|
|
dot_product += quantized * ncoeff[z];
|
|
unnorm_sum2 += (F32) quantized * quantized;
|
|
}
|
|
float err2 = (dot_product * dot_product) / unnorm_sum2;
|
|
if (err2 > best_so_far) {
|
|
best_so_far = err2;
|
|
memcpy(best_quantized, quantized_attempt, n * sizeof(best_quantized[0]));
|
|
}
|
|
}
|
|
}
|
|
|
|
static void compute_best_quantized_coeff16_loop(radaudio_encoder_state *es, S16 best_quantized[], F32 best_so_far, F32 ncoeff[], F32 quantizer, F32 step_quantizer, int num_quantizers)
|
|
{
|
|
RR_UNUSED_VARIABLE(es);
|
|
#ifdef DO_BUILD_SSE4
|
|
if (es->cpu.has_sse4_1) {
|
|
radaudio_sse2_compute_best_quantized_coeff16_loop(best_quantized, best_so_far, ncoeff, quantizer, step_quantizer, num_quantizers);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
#ifdef DO_BUILD_NEON
|
|
radaudio_neon_compute_best_quantized_coeff16_loop(best_quantized, best_so_far, ncoeff, quantizer, step_quantizer, num_quantizers);
|
|
return;
|
|
#endif
|
|
|
|
#ifndef DO_BUILD_NEON // for unreachable code warnings
|
|
int n = 16;
|
|
for (int q=0; q < num_quantizers; ++q, quantizer += step_quantizer) {
|
|
S16 quantized_attempt[16];
|
|
|
|
for (int z=0; z < n; ++z) {
|
|
float unquantized = (ncoeff[z] * quantizer);
|
|
int quantized = (int) floor(unquantized+0.5f);
|
|
quantized_attempt[z] = (S16) quantized;
|
|
}
|
|
|
|
// same operation order as in SIMD case to be bitwise identical
|
|
F32 dot_product_arr[4];
|
|
F32 unnorm_sum2_arr[4];
|
|
for (int z = 0; z < 4; z++)
|
|
{
|
|
F32 q0 = (F32) quantized_attempt[0 + z];
|
|
F32 q1 = (F32) quantized_attempt[4 + z];
|
|
F32 q2 = (F32) quantized_attempt[8 + z];
|
|
F32 q3 = (F32) quantized_attempt[12 + z];
|
|
unnorm_sum2_arr[z] = 1.0e-20f; // avoid divide by 0
|
|
unnorm_sum2_arr[z] += q0 * q0;
|
|
unnorm_sum2_arr[z] += q1 * q1;
|
|
unnorm_sum2_arr[z] += q2 * q2;
|
|
unnorm_sum2_arr[z] += q3 * q3;
|
|
dot_product_arr[z] = ncoeff[0 + z] * q0;
|
|
dot_product_arr[z] += ncoeff[4 + z] * q1;
|
|
dot_product_arr[z] += ncoeff[8 + z] * q2;
|
|
dot_product_arr[z] += ncoeff[12 + z] * q3;
|
|
}
|
|
F32 unnorm_sum2 = (unnorm_sum2_arr[0] + unnorm_sum2_arr[2]) + (unnorm_sum2_arr[1] + unnorm_sum2_arr[3]);
|
|
F32 dot_product = (dot_product_arr[0] + dot_product_arr[2]) + (dot_product_arr[1] + dot_product_arr[3]);
|
|
|
|
float err2 = (dot_product * dot_product) / unnorm_sum2;
|
|
if (err2 > best_so_far) {
|
|
best_so_far = err2;
|
|
memcpy(best_quantized, quantized_attempt, n * sizeof(best_quantized[0]));
|
|
}
|
|
}
|
|
#endif // !DO_BUILD_NEON
|
|
}
|
|
|
|
static int get_rounded_exponent(float n)
|
|
{
|
|
#if 0
|
|
float recip_log2 = 1.0f / logf(2.0f);
|
|
int exp2 = (int) floor(logf(n) * recip_log2 + 0.5);
|
|
#else
|
|
float two_to_one_half = 1.414213562373f;
|
|
int exp=0;
|
|
frexp(n * 0.5 * two_to_one_half, &exp);
|
|
return exp;
|
|
#endif
|
|
}
|
|
|
|
static void compute_band_energies(radaudio_rate_info *info, float coeff[], int band_exponent[], F32 band_energy[])
|
|
{
|
|
// compute band energy
|
|
int k = 0;
|
|
for (int j=0; j < info->num_bands; ++j) {
|
|
float be = 0;
|
|
for (int i=0; i < info->num_coeffs_for_band[j]; ++i) {
|
|
float x = coeff[k++];
|
|
be += x*x;
|
|
}
|
|
be = sqrtf(be) * info->band_scale_encode[j]; // average energy over whole band... this allows coding all bands identically
|
|
if (be < 0.00002f)
|
|
be = 0;
|
|
band_energy[j] = be;
|
|
|
|
if (be == 0) // no need to check minimum, as that gets 0'd as an exponent
|
|
band_exponent[j] = BAND_EXPONENT_NONE;
|
|
else {
|
|
int exp = get_rounded_exponent(be);
|
|
if (exp < -16) exp = BAND_EXPONENT_NONE;
|
|
if (exp > 4) exp = 4;
|
|
band_exponent[j] = exp;
|
|
}
|
|
}
|
|
}
|
|
|
|
// find the num_data/2'th item if data were sorted
|
|
// can reorder data
|
|
|
|
static int imin(int p0, int p1) { return p0 < p1 ? p0 : p1; }
|
|
static int imax(int p0, int p1) { return p0 > p1 ? p0 : p1; }
|
|
|
|
static int ifind_nth_slow(S16 *data, int count, int n)
|
|
{
|
|
for (int i=0; i <= n; ++i) {
|
|
S16 smallest = data[i];
|
|
int smallest_j = i;
|
|
for (int j=i+1; j < count; ++j) {
|
|
if (data[j] < smallest) {
|
|
smallest = data[j];
|
|
smallest_j = j;
|
|
}
|
|
}
|
|
// smallest_j belongs in i
|
|
S16 t = data[i];
|
|
data[i] = smallest;
|
|
data[smallest_j] = t;
|
|
}
|
|
rrAssert(data[n] >= 0);
|
|
return data[n];
|
|
}
|
|
|
|
RR_COMPILER_ASSERT(MAX_COEFF_PER_SUBBAND == 32);
|
|
static int ifind_nth_fast(radaudio_encoder_state *es, S16 *data, int count, int n)
|
|
{
|
|
rrAssert(n <= MAX_COEFF_PER_SUBBAND);
|
|
S16 temp1[MAX_COEFF_PER_SUBBAND],temp2[MAX_COEFF_PER_SUBBAND], *buf1 = temp1, *buf2 = temp2, *(buf[2]);
|
|
buf[0] = buf1;
|
|
buf[1] = buf2;
|
|
int next_buf=0;
|
|
|
|
buf1 = data;
|
|
for(;;) {
|
|
if (count <= 8) {
|
|
int result;
|
|
#ifdef DO_BUILD_SSE4
|
|
if (es->cpu.has_sse4_1)
|
|
result = radaudio_sse4_findnth_of_8_or_less(buf1, count, n);
|
|
else
|
|
#endif
|
|
result = ifind_nth_slow(buf1, count, n);
|
|
return result;
|
|
}
|
|
|
|
int v0 = data[0];
|
|
int v1 = data[count-1];
|
|
int v2 = data[count>>1];
|
|
int lo = imin(v0,v1);
|
|
int hi = imax(v0,v1);
|
|
int clamp_lo = imax(lo,v2);
|
|
int mid = imin(hi,clamp_lo);
|
|
|
|
// count numbers relative to mid
|
|
int below=0;
|
|
int above=MAX_COEFF_PER_SUBBAND;
|
|
|
|
for (int i=0; i < count; ++i)
|
|
rrAssert(buf1[i] >= 0);
|
|
|
|
for (int i=0; i < count; ++i)
|
|
if (buf1[i] < mid)
|
|
buf2[below++] = buf1[i];
|
|
else
|
|
buf2[--above] = buf1[i];
|
|
|
|
if (n < below) {
|
|
if (below == count)
|
|
return ifind_nth_slow(buf1, count, n);
|
|
|
|
// is the nth item in the below section?
|
|
buf1 = buf2;
|
|
count = below;
|
|
} else {
|
|
if (below == 0)
|
|
return ifind_nth_slow(buf1, count, n);
|
|
|
|
buf1 = buf2+above;
|
|
count = MAX_COEFF_PER_SUBBAND-above;
|
|
n -= below;
|
|
}
|
|
buf2 = buf[next_buf];
|
|
next_buf ^= 1;
|
|
}
|
|
}
|
|
|
|
// only actually ever called with:
|
|
// num_data= 4
|
|
// num_data= 8
|
|
// num_data=16
|
|
// num_data=32
|
|
//
|
|
// 8 & 16 are the most important to optimize, since they're the only subband sizes for long blocks;
|
|
// the others come up with short blocks only
|
|
static float find_median(radaudio_encoder_state *es, F32 *data, int num_data)
|
|
{
|
|
// approximate median finder using top 16 bits of floats, because SSE4.1 has a horizontal min we can leverage.
|
|
// floats are all non-negative (they were already fabs()d), so ordering as integer is valid
|
|
// unmeasurable change in PEAQ basic to use this instead of float median
|
|
S32 idata[MAX_COEFF_PER_SUBBAND];
|
|
S16 idata16[MAX_COEFF_PER_SUBBAND];
|
|
memcpy(idata, data, 4*num_data);
|
|
for (int i=0; i < num_data; ++i)
|
|
idata16[i] = (S16) (idata[i] >> 16);
|
|
// since sign bit is clear, we could use U16 and shift 1 less to keep 1 more mantissa bit
|
|
// but we keep it signed in case of limitations in other SIMD implementations
|
|
|
|
S32 result;
|
|
|
|
#ifdef DO_BUILD_SSE4
|
|
if (num_data == 8 && es->cpu.has_sse4_1) {
|
|
result = radaudio_sse4_find_median_4th_of_8(idata16);
|
|
} else if (num_data == 16 && es->cpu.has_sse4_1 && es->cpu.has_popcnt) {
|
|
result = radaudio_sse4popcnt_find_median_8th_of_16(idata16);
|
|
}
|
|
else
|
|
#elif defined(DO_BUILD_NEON)
|
|
if (num_data == 8) {
|
|
result = radaudio_neon_find_median_4th_of_8(idata16);
|
|
} else if (num_data == 16) {
|
|
result = radaudio_neon_find_median_8th_of_16(idata16);
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
result = ifind_nth_fast(es, idata16, num_data, num_data>>1);
|
|
}
|
|
|
|
// most mantissa bits will be 0, but good enough; the median is a heuristic anyway
|
|
result <<= 16;
|
|
F32 int_median;
|
|
memcpy(&int_median, &result, 4);
|
|
return int_median;
|
|
}
|
|
|
|
static void encode_channel(radaudio_encoder_state *es,
|
|
radaudio_block_data *bd,
|
|
F32 *coeff,
|
|
int channel,
|
|
F32 *sbb_energy,
|
|
U32 midside_bands,
|
|
radaudio_block_data *mid // only if side encoding
|
|
) // mid-side encoding
|
|
{
|
|
float band_pulses_raw[2][MAX_BANDS] = { 0 };
|
|
|
|
int is_short_block = es->current_block_short;
|
|
radaudio_rate_info *info = es->info[is_short_block];
|
|
int i,j,k,s,c;
|
|
U8 has_nonempty_subbands[24] = { 0 };
|
|
RAD_ALIGN(int, band_exponent[24], 16);
|
|
RAD_ALIGN(F32, band_energy[24], 16);
|
|
RAD_ALIGN(F32, subband_energy[72], 16);
|
|
|
|
bd->num_bands = info->num_bands;
|
|
bd->num_quantized_subbands = info->num_quantized_subbands;
|
|
bd->num_quantized_coeffs = info->num_quantized_coeffs;
|
|
|
|
int blocksize = is_short_block ? RADAUDIO_SHORT_BLOCK_LEN : RADAUDIO_LONG_BLOCK_LEN;
|
|
|
|
// ramp off the highest frequencies (if they're even used, only 24/32khz)
|
|
int ramp_to_zero_length = 8;
|
|
for (i=1; i < ramp_to_zero_length; ++i)
|
|
coeff[blocksize-i] *= (float) i / ramp_to_zero_length;
|
|
|
|
for (j=info->num_quantized_coeffs; j < info->num_coeffs; ++j)
|
|
bd->quantized_coeff_encode[j] = 0;
|
|
|
|
PROF_BEGIN(bands);
|
|
compute_band_energies(info, coeff, band_exponent, band_energy);
|
|
for (j=0; j < info->num_bands; ++j)
|
|
bd->band_exponent[j] = band_exponent[j];
|
|
|
|
compute_mantissa_bitcount(es->samprate_mode, is_short_block,
|
|
es->mantissa_param,
|
|
bd->band_exponent, bd->band_mantissa_bitcount);
|
|
|
|
// compute fine band energy
|
|
for (j=0; j < info->num_bands; ++j) {
|
|
if (band_exponent[j] == BAND_EXPONENT_NONE)
|
|
bd->band_mantissa[j] = 0;
|
|
else {
|
|
int nb = bd->band_mantissa_bitcount[j];
|
|
if (nb == 0)
|
|
bd->band_mantissa[j] = 0;
|
|
else {
|
|
float be = band_energy[j];
|
|
int raw_fine;
|
|
float x = be / powf(2.0f, (F32) band_exponent[j]);
|
|
rrAssert(x >= 0.70f && x <= 1.42f);
|
|
x *= sqrtf(2.0f);
|
|
if (x < 1) x = 1;
|
|
if (x > 1.9999f) x = 1.9999f;
|
|
x = inverse_approx_pow_2(x); // output is 0..1
|
|
if (x < 0) x = 0;
|
|
if (x > 0.9999f) x = 0.9999f;
|
|
raw_fine = (int) (((1 << MAX_FINE_ENERGY_BITS)-1) * x + 0.5);
|
|
bd->band_mantissa[j] = raw_fine >> (MAX_FINE_ENERGY_BITS-nb); // truncate to get the correct bucket index
|
|
}
|
|
}
|
|
}
|
|
PROF_END(bands);
|
|
|
|
PROF_BEGIN(subbands);
|
|
// subbands:
|
|
if (es->current_block_short) {
|
|
for (j=0; j < info->num_bands; ++j) {
|
|
has_nonempty_subbands[j] = 1;
|
|
subband_energy[j] = band_energy[j];
|
|
}
|
|
} else {
|
|
// compute subband energy
|
|
for (j=0; j < info->num_subbands; ++j) {
|
|
float sbe = 0;
|
|
int start = info->first_coeff_for_subband[j];
|
|
for (i=0; i < info->num_coeffs_for_subband[j]; ++i) {
|
|
float x = coeff[start+i];
|
|
sbe += x*x;
|
|
}
|
|
sbe = sqrtf(sbe);
|
|
subband_energy[j] = sbe;
|
|
}
|
|
|
|
for (j=0; info->num_subbands_for_band[j] == 1; ++j) {
|
|
has_nonempty_subbands[j] = (band_exponent[j] != BAND_EXPONENT_NONE);
|
|
bd->quantized_subbands[j] = (U16) 1;
|
|
}
|
|
|
|
for (; j < info->num_bands; ++j) {
|
|
int count = info->num_subbands_for_band[j];
|
|
int start = info->first_subband_for_band[j];
|
|
int num_pulses = es->subband_predicted_sum[j];
|
|
|
|
rrAssert(count != 1);
|
|
if (num_pulses == 0 || band_exponent[j] == BAND_EXPONENT_NONE) {
|
|
// skipping empty subbands doesn't really save bitrate on normal files,
|
|
// but it helps enormously for silence and for mono-in-stereo (where the side channel is silent after mid-side encoding)
|
|
// for empty bands, we don't save enough bitrate to be worth the complexity of skipping decoded subbands
|
|
// instead output subbands that are optimal to decode
|
|
float subband_pulses = (float) num_pulses / count;
|
|
if (SUBBANDS_SKIP_EMPTY_BANDS)
|
|
subband_pulses = floorf(subband_pulses);
|
|
for (i=0; i < count; ++i)
|
|
bd->quantized_subbands[start+i] = (U16) (subband_pulses + 0.5*(i & 1));
|
|
has_nonempty_subbands[j] = false;
|
|
} else {
|
|
S32 maxsub=0;
|
|
F32 sum = 1.0e-20f, inv_sum;
|
|
for (i=0; i < count; ++i)
|
|
sum += subband_energy[start+i];
|
|
|
|
inv_sum = 1.0f / sum;
|
|
for (i=0; i < count; ++i) {
|
|
U16 val = (U16) ((num_pulses * (subband_energy[start+i]*inv_sum)+0.5));
|
|
bd->quantized_subbands[start+i] = val;
|
|
maxsub = RR_MAX(maxsub,val);
|
|
}
|
|
if (maxsub > 63) {
|
|
// need to scale the subbands down so the largest value isn't too large to be signalled,
|
|
// so remap so that maxsub will be 1 lower than limit (to allow rounding/slop)
|
|
float revised_pulses = num_pulses * 62 / (float) maxsub;
|
|
maxsub = 0;
|
|
for (i=0; i < count; ++i) {
|
|
U16 val = (U16) ((revised_pulses * (subband_energy[start+i]*inv_sum)+0.5));
|
|
bd->quantized_subbands[start+i] = val;
|
|
maxsub = RR_MAX(maxsub,val);
|
|
}
|
|
rrAssert(maxsub <= 63);
|
|
es->stats.block_events[E_subband_renormalize]++;
|
|
}
|
|
|
|
has_nonempty_subbands[j] = false;
|
|
for (i=0; i < count; ++i)
|
|
if (bd->quantized_subbands[start+i] != 0) {
|
|
has_nonempty_subbands[j] = true;
|
|
break;
|
|
}
|
|
|
|
int total=0;
|
|
for (i=0; i < count; ++i)
|
|
total += bd->quantized_subbands[start+i];
|
|
}
|
|
}
|
|
}
|
|
for (j=0; j < info->num_subbands; ++j)
|
|
sbb_energy[j] = subband_energy[j];
|
|
PROF_END(subbands);
|
|
|
|
PROF_BEGIN(analysis1);
|
|
// compute the number of pulses per band for two adjacent quality modes so we can interpolate between them
|
|
float total_desired=0;
|
|
float total_expected=0;
|
|
{
|
|
float weighting[MAX_BANDS] = { 0 };
|
|
float total_weighting=1.0e-20f, recip_total;
|
|
float base_pulses = es->heur.pulse_quality * 100;
|
|
float num_pulses = (base_pulses * base_pulses * (is_short_block ? es->heur.short_block_pulse_scale : 1));
|
|
|
|
if (!is_short_block) {
|
|
if (es->prev_block_short && es->next_block_short)
|
|
num_pulses *= es->heur.short_overlap_scale2;
|
|
else if (es->prev_block_short || es->next_block_short)
|
|
num_pulses *= es->heur.short_overlap_scale1;
|
|
}
|
|
|
|
for (j=0; j < info->num_bands; ++j) {
|
|
float w;
|
|
rrbool midside=false;
|
|
int be = band_exponent[j];
|
|
if (midside_bands & (1 << j)) {
|
|
be = RR_MAX(be, mid->band_exponent[j]); // weight it as if it was the mid weight, if that's larger; later we'll discard samples based on actual exponent
|
|
midside = true;
|
|
}
|
|
if (be == BAND_EXPONENT_NONE)
|
|
w = 0;
|
|
else {
|
|
float exp = (float) be;
|
|
w = powf(es->heur.band_exponent_base[is_short_block], exp);
|
|
}
|
|
|
|
total_expected += 1.0;
|
|
if (be < -14) {
|
|
if (be == -15) total_desired += 0.75f;
|
|
if (be == -16) total_desired += 0.33f;
|
|
} else
|
|
total_desired += 1.0;
|
|
|
|
w *= powf((F32) info->num_coeffs_for_band[j], (F32) es->heur.band_count_exponent[is_short_block]);
|
|
|
|
#define RADA_LERP(t,a,b) ((a)+(t)*((b)-(a)))
|
|
w *= RADA_LERP((float) j/info->num_bands, es->heur.quality_weight_low[is_short_block], 1.0f);
|
|
|
|
#if 0 // this makes 0.002 PEAQ difference even after tuning, not worth the time it would take to tune at all rates
|
|
// inter-band masking
|
|
if (j > 0 && j < info->num_bands-1 && !midside) {
|
|
int exp = band_exponent[j];
|
|
int quietest_neighboring_band_exp = RR_MIN(band_exponent[j-1], band_exponent[j+1]);
|
|
if (exp < quietest_neighboring_band_exp-5) w *= es->heur.band_mask_8[es->samprate_mode][is_short_block][quality];
|
|
else if (exp < quietest_neighboring_band_exp-3) w *= es->heur.band_mask_4[es->samprate_mode][is_short_block][quality];
|
|
else if (exp < quietest_neighboring_band_exp-2) w *= es->heur.band_mask_2[es->samprate_mode][is_short_block][quality];
|
|
else if (exp < quietest_neighboring_band_exp-1) w *= es->heur.band_mask_1[es->samprate_mode][is_short_block][quality];
|
|
}
|
|
#endif
|
|
if (info->num_coeffs_for_band[j] == 1)
|
|
w = 0; // if only one coefficient, only needs one pulse to indicate sign
|
|
|
|
weighting[j] = w;
|
|
total_weighting += weighting[j];
|
|
}
|
|
recip_total = 1.0f/total_weighting;
|
|
|
|
num_pulses *= total_desired / total_expected;
|
|
|
|
for (j=0; j < info->num_bands; ++j) {
|
|
float raw_pulses = num_pulses * weighting[j] * recip_total;
|
|
|
|
float expectation = (band_exponent[j] - es->heur.expectation_base) * es->heur.expectation_scale;
|
|
if (expectation < 0.0f) expectation = 0.0f;
|
|
if (expectation > 1.0f) expectation = 1.0f;
|
|
if (midside_bands & (1 << j)) {
|
|
if (band_exponent[j] < mid->band_exponent[j]) {
|
|
float scale1=1, scale2=1;
|
|
|
|
// we weighted this as if it was the actual mid exponent, now throw away pulses to save space
|
|
|
|
if (midside_bands == 0xffffffff) {
|
|
// if all bands mid-side
|
|
|
|
// phase 1: if exponents are separate by N, start decaying
|
|
// phase 2: if exponent is less than some threshold, start decaying
|
|
|
|
if (band_exponent[j] < mid->band_exponent[j] - es->heur.side_exp_threshold_all) {
|
|
if (band_exponent[j] <= mid->band_exponent[j] - es->heur.side_exp_start2_all)
|
|
scale1 = 0;
|
|
else
|
|
scale1 = linear_remap((float) band_exponent[j], (float) mid->band_exponent[j]-es->heur.side_exp_start2, (float) mid->band_exponent[j]-es->heur.side_exp_threshold, 0.0f, 1.0);
|
|
}
|
|
if (band_exponent[j] < es->heur.side_exp_end_all) {
|
|
if (band_exponent[j] <= es->heur.side_exp_start_all)
|
|
scale2 = 0;
|
|
else
|
|
scale2 = linear_remap((float) band_exponent[j], (float) es->heur.side_exp_start, (float) es->heur.side_exp_end, 0.0f, 1.0);
|
|
}
|
|
} else {
|
|
// if selected bands, which uses a different detector so can have different decay rules
|
|
|
|
// phase 1: if exponents are separate by N, start decaying
|
|
// phase 2: if exponent is less than some threshold, start decaying
|
|
|
|
if (band_exponent[j] < mid->band_exponent[j] - es->heur.side_exp_threshold) {
|
|
if (band_exponent[j] <= mid->band_exponent[j] - es->heur.side_exp_start2)
|
|
scale1 = 0;
|
|
else
|
|
scale1 = linear_remap((float) band_exponent[j], (float) mid->band_exponent[j]-es->heur.side_exp_start2, (float) mid->band_exponent[j]-es->heur.side_exp_threshold, 0.0f, 1.0);
|
|
}
|
|
if (band_exponent[j] < es->heur.side_exp_end) {
|
|
if (band_exponent[j] <= es->heur.side_exp_start)
|
|
scale2 = 0;
|
|
else
|
|
scale2 = linear_remap((float) band_exponent[j], (float) es->heur.side_exp_start , (float) es->heur.side_exp_end, 0.0f, 1.0);
|
|
}
|
|
}
|
|
|
|
if (scale1 > 1.00) scale1=1.00;
|
|
if (scale1 < 0.00) scale1=0.00;
|
|
if (scale2 > 1.00) scale2=1.00;
|
|
if (scale2 < 0.00) scale2=0.00;
|
|
|
|
float scale = RR_MIN(scale1,scale2);
|
|
|
|
expectation *= scale;
|
|
}
|
|
}
|
|
|
|
if (band_exponent[j] < -9) {
|
|
float scale = linear_remap((float) band_exponent[j], -15.0f , (float) silence_exp_start,
|
|
0.05f, (float) silence_exp_value);
|
|
if (scale > 1.00) scale=1.00;
|
|
if (scale < 0.00) scale=0.00;
|
|
if (band_exponent[j] == -16)
|
|
scale = 0;
|
|
expectation *= scale;
|
|
}
|
|
raw_pulses *= expectation;
|
|
|
|
band_pulses_raw[0][j] = raw_pulses;
|
|
}
|
|
}
|
|
PROF_END(analysis1);
|
|
|
|
PROF_BEGIN(coefficients_sum);
|
|
c=s=0; // c = coefficient index, s = subband index
|
|
for (j=0; j < info->num_bands; ++j) {
|
|
float recip;
|
|
int num_pulses;
|
|
|
|
PROF_BEGIN(coefficients_1);
|
|
if (band_exponent[j] == BAND_EXPONENT_NONE)
|
|
num_pulses = 0;
|
|
else if (info->num_coeffs_for_band[j] == 1)
|
|
num_pulses = 1; // just need the sign
|
|
else {
|
|
float base_pulses = band_pulses_raw[0][j];
|
|
num_pulses = (int) (base_pulses * 44100.0 / es->sample_rate);
|
|
}
|
|
|
|
{
|
|
float sum = 1.0e-12f;
|
|
for (i=0; i < info->num_subbands_for_band[j]; ++i) {
|
|
float x = (float) subband_energy[s+i];
|
|
sum += x*x;
|
|
}
|
|
recip = 1.0f/sum;
|
|
}
|
|
PROF_END(coefficients_1);
|
|
|
|
for (i=0; i < info->num_subbands_for_band[j]; ++i) {
|
|
PROF_BEGIN(coefficients_2);
|
|
|
|
float x = (float) subband_energy[s+i];
|
|
float data[32], median;
|
|
int n = info->num_coeffs_for_subband[s+i];
|
|
|
|
// allocate the pulses per subband, based on squared energy
|
|
int sub_pulses = (int) (num_pulses * x*x * recip+0.5);
|
|
|
|
// check if we need to boost the pulses because lots of large coefficients
|
|
if (band_exponent[j] >= -13 && n >= 8) {
|
|
|
|
// find the median
|
|
#ifdef DO_BUILD_SSE4x
|
|
if (es->cpu.has_sse2)
|
|
radaudio_sse2_fabs_coefficients(data, coeff+c, n);
|
|
else
|
|
#endif
|
|
{
|
|
for (k=0; k < n; ++k)
|
|
data[k] = fabsf(coeff[c+k]);
|
|
}
|
|
median = find_median(es, data, n);
|
|
|
|
// count how many coefficients are significantly above the median
|
|
for (k=0; k < n; ++k) {
|
|
if (data[k] > median * es->heur.large_boost_median_test[is_short_block])
|
|
sub_pulses += 2;
|
|
else if (data[k] > median * es->heur.small_boost_median_test[is_short_block])
|
|
sub_pulses += 1;
|
|
}
|
|
}
|
|
PROF_END(coefficients_2);
|
|
|
|
/// now distribute the pulses to the coefficients
|
|
{
|
|
PROF_BEGIN(coefficients_3);
|
|
rrbool no_pulses = false;
|
|
if (sub_pulses == 0)
|
|
no_pulses = true;
|
|
if (band_exponent[j] == BAND_EXPONENT_NONE)
|
|
no_pulses = true;
|
|
|
|
if (!is_short_block)
|
|
// if the subband is going to get 0 energy, then force all coefficients to 0
|
|
if (info->num_subbands_for_band[j] > 1 && bd->quantized_subbands[s+i]==0 && has_nonempty_subbands[j])
|
|
no_pulses = true;
|
|
|
|
if (no_pulses) {
|
|
for (k=0; k < n; ++k)
|
|
bd->quantized_coeff_encode[c+k] = 0;
|
|
PROF_END(coefficients_3);
|
|
} else if (info->num_coeffs_for_subband[s+i] == 1) {
|
|
// subband has only one coefficient, so just need the sign
|
|
bd->quantized_coeff_encode[c] = coeff[c] < 0 ? -1 : 1;
|
|
PROF_END(coefficients_3);
|
|
} else {
|
|
PROF_END(coefficients_3);
|
|
PROF_BEGIN(coefficients_normalize);
|
|
float ncoeff[MAX_COEFF_PER_BAND], sum2=0;
|
|
for (k=0; k < n; ++k) {
|
|
sum2 += coeff[c+k]*coeff[c+k];
|
|
}
|
|
rrAssert(sum2 != 0);
|
|
float sum=0;
|
|
{
|
|
float scale = 1.0f / (sqrtf(sum2) + 1.e-24f);
|
|
for (k=0; k < n; ++k) {
|
|
float v = scale * coeff[c+k];
|
|
ncoeff[k] = v;
|
|
sum += fabsf(v);
|
|
}
|
|
}
|
|
rrAssert(sum != 0); // if coefficients were all 0, then subband should have been all 0, so no_pulses should have been true
|
|
|
|
S16 best_coeff[MAX_COEFF_PER_BAND];
|
|
memset(best_coeff, 0, n*sizeof(best_coeff[0]));
|
|
|
|
// this rather arbitrary computation must be used as is,
|
|
// attempts to alter it to non-integer values always lead
|
|
// to significant quality loss
|
|
float t0,t1;
|
|
float pc = (float) sub_pulses;
|
|
|
|
t0 = (F32) (int) (pc/1.30f + 0.5);
|
|
t1 = (F32) (int) (pc*1.125f + 0.5);
|
|
|
|
//if (t1 > pc+n) t1 = (F32) pc+n;
|
|
|
|
if (t1 > pc+8) t1 = pc+8;
|
|
if (t0 < pc-8) t0 = pc-8;
|
|
|
|
if (t1 > pc+n/2) t1 = pc+n/2;
|
|
if (t0 < pc-n) t0 = pc-n;
|
|
|
|
// the squared error if you use 0 for all coefficients is 1, since squared error vs. 0 is same as squared sum
|
|
float error2_for_zeroes = 1.0f;
|
|
|
|
// but if we transmit all 0s, the decoder replaces with noise, i.e. with random coefficients.
|
|
// the RMSE error due to using random coefficients will be worse, since when the signs mismatch,
|
|
// the error will be even larger, up to (say) 2x larger, and squared that's 4x larger, and signs
|
|
// mismatch half the time.
|
|
//
|
|
// BUT we know the perceptual error from randomness is LESS than using all 0s. so using any error
|
|
// estimate that is LARGER than the error for all zeroes will be perceptually wrong. so we tune
|
|
// this value. maybe should be per-band:
|
|
float min_error2 = error2_for_zeroes / 2; // best value experimentlaly
|
|
PROF_END(coefficients_normalize);
|
|
|
|
PROF_BEGIN(coefficients_4);
|
|
float best_so_far = min_error2;
|
|
|
|
// optimized error calculation avoids a sqrt() by computing this derived, monotonically consistent error instead:
|
|
// see compute_quantized_coefficients() for naive version
|
|
best_so_far -= 2;
|
|
best_so_far *= best_so_far;
|
|
best_so_far /= 4;
|
|
|
|
int num_steps = (int) (t1-t0) + 1;
|
|
float recip_sum = 1.0f / sum;
|
|
float quantizer = t0 * recip_sum;
|
|
float step_quantizer = recip_sum;
|
|
|
|
if (n == 8) {
|
|
PROF_END(coefficients_4);
|
|
PROF_BEGIN(coefficients_n8);
|
|
compute_best_quantized_coeff8_loop(es, best_coeff, best_so_far, ncoeff, quantizer, step_quantizer, num_steps);
|
|
PROF_END(coefficients_n8);
|
|
} else if (n == 16) {
|
|
PROF_END(coefficients_4);
|
|
PROF_BEGIN(coefficients_n16);
|
|
compute_best_quantized_coeff16_loop(es, best_coeff, best_so_far, ncoeff, quantizer, step_quantizer, num_steps);
|
|
PROF_END(coefficients_n16);
|
|
} else {
|
|
for (int q=0; q < num_steps; ++q, quantizer += step_quantizer) {
|
|
S16 quantized_attempt[MAX_COEFF_PER_SUBBAND];
|
|
|
|
F32 cross_sum=0;
|
|
F32 unnorm_sum2=0;
|
|
for (int z=0; z < n; ++z) {
|
|
float unquantized = (ncoeff[z] * quantizer);
|
|
int quantized = (int) floor(unquantized+0.5f);
|
|
quantized_attempt[z] = (S16) quantized;
|
|
cross_sum += quantized * ncoeff[z];
|
|
unnorm_sum2 += (F32) quantized * quantized;
|
|
}
|
|
// this optimized computation resembles normalizing and computing a dot product
|
|
float err2 = (cross_sum*cross_sum) / unnorm_sum2;
|
|
if (err2 > best_so_far) {
|
|
best_so_far = err2;
|
|
memcpy(best_coeff, quantized_attempt, n * sizeof(best_coeff[0]));
|
|
}
|
|
}
|
|
PROF_END(coefficients_4);
|
|
}
|
|
|
|
PROF_BEGIN(coefficients_5);
|
|
// check if coefficients are too large
|
|
int largest = 0;
|
|
for (k=0; k < n; ++k)
|
|
largest = RR_MAX(largest, abs(best_coeff[k]));
|
|
|
|
if (largest > 112) {
|
|
++es->stats.block_events[E_coefficients_renormalize];
|
|
|
|
float flargest = 0.00001f;
|
|
for (k=0; k < n; ++k)
|
|
if (fabsf(ncoeff[k]) > flargest)
|
|
flargest = fabsf(ncoeff[k]);
|
|
|
|
int target = 112;
|
|
float scale = target / flargest; // scale * flargest = target
|
|
for (k=0; k < n; ++k) {
|
|
best_coeff[k] = (S16) floorf(ncoeff[k] * scale + 0.5f);
|
|
}
|
|
}
|
|
|
|
for (k=0; k < n; ++k)
|
|
bd->quantized_coeff_encode[c+k] = (S16) best_coeff[k];
|
|
|
|
PROF_END(coefficients_5);
|
|
}
|
|
}
|
|
c += info->num_coeffs_for_subband[s+i];
|
|
}
|
|
s += info->num_subbands_for_band[j];
|
|
}
|
|
PROF_END(coefficients_sum);
|
|
}
|
|
|
|
static int transient_analysis(F32 *in, int N, int stride, F32 threshold);
|
|
|
|
static int transient_analysis_wrapper(F32 *in, int offset, int stride, F32 threshold)
|
|
{
|
|
int result = transient_analysis(in+offset*stride, 1024, stride, threshold);
|
|
return result;
|
|
}
|
|
|
|
static int stereo_count_effective_channels(float *stereo_input, size_t input_len, size_t offset, int num_samples, int mono_detection_aggressiveness)
|
|
{
|
|
float threshold = ((F32) mono_detection_aggressiveness+0.5f) / 32768.0f;
|
|
|
|
// clamp range (out of range values are treated 0, so always match mono
|
|
size_t begin = (offset < (size_t)num_samples) ? 0 : offset-num_samples;
|
|
size_t end = (offset+num_samples > input_len) ? input_len : offset+num_samples;
|
|
|
|
for (size_t i=begin; i < end; ++i)
|
|
// if signals deviate enough, it's stereo
|
|
if (fabsf(stereo_input[i*2+0] - stereo_input[i*2+1]) > threshold)
|
|
return 2;
|
|
|
|
// otherwise it's mono
|
|
return 1;
|
|
}
|
|
|
|
RADDEFFUNC int radaudio_determine_preferred_next_block_length(radaudio_encoder *rae,
|
|
radaudio_blocktype firsttype,
|
|
F32 *input,
|
|
size_t input_len,
|
|
size_t offset)
|
|
{
|
|
int cur_short;
|
|
radaudio_encoder_state *es = (radaudio_encoder_state *) rae;
|
|
|
|
if (es->block_number == 0)
|
|
cur_short = (firsttype == RADAUDIO_BLOCKTYPE_short);
|
|
else
|
|
cur_short = es->current_block_short;
|
|
int num_samples = cur_short ? RADAUDIO_SHORT_BLOCK_LEN : RADAUDIO_LONG_BLOCK_LEN;
|
|
|
|
if (offset + num_samples + RADAUDIO_LONG_BLOCK_LEN >= input_len)
|
|
return RADAUDIO_BLOCKTYPE_short;
|
|
else {
|
|
int stride = es->num_channels;
|
|
if (transient_analysis_wrapper(input, (int) offset + num_samples, stride, TRANSIENT_THRESHOLD))
|
|
return RADAUDIO_BLOCKTYPE_short;
|
|
if (es->num_channels == 2) {
|
|
if (transient_analysis_wrapper(input+1, (int) offset + num_samples, stride, TRANSIENT_THRESHOLD))
|
|
return RADAUDIO_BLOCKTYPE_short;
|
|
}
|
|
}
|
|
return RADAUDIO_BLOCKTYPE_long;
|
|
}
|
|
|
|
RADDEFFUNC radaudio_blocktype radaudio_determine_preferred_first_block_length(radaudio_encoder *rae,
|
|
F32 *input,
|
|
size_t input_len)
|
|
{
|
|
radaudio_encoder_state *es = (radaudio_encoder_state *) rae;
|
|
size_t offset=0;
|
|
if (offset + RADAUDIO_LONG_BLOCK_LEN >= input_len)
|
|
return RADAUDIO_BLOCKTYPE_short;
|
|
else {
|
|
if (transient_analysis_wrapper(input, (int) offset, es->num_channels, TRANSIENT_THRESHOLD))
|
|
return RADAUDIO_BLOCKTYPE_short;
|
|
if (es->num_channels == 2) {
|
|
if (transient_analysis_wrapper(input+1, (int) offset, es->num_channels, TRANSIENT_THRESHOLD))
|
|
return RADAUDIO_BLOCKTYPE_short;
|
|
}
|
|
}
|
|
return RADAUDIO_BLOCKTYPE_long;
|
|
}
|
|
|
|
RADDEFFUNC int radaudio_encode_block(radaudio_encoder *rae,
|
|
float *input,
|
|
size_t input_len, // in samples (stereo pairs count as one)
|
|
size_t *poffset , // in samples (stereo pairs count as one)
|
|
U8 *encode_buffer, // recommend MAX_ENCODED_BLOCK_SIZE
|
|
size_t encode_buffer_size)
|
|
{
|
|
radaudio_encode_info info = { 0 };
|
|
return radaudio_encode_block_ext(rae, input, input_len, poffset, encode_buffer, encode_buffer_size, &info);
|
|
}
|
|
|
|
static int compute_rle_length(radaudio_encoder_state *es, radaudio_block_data bd[2], int out_channels, int start, int *numsym)
|
|
{
|
|
radaudio_rate_info *bi = es->info[0];
|
|
int numbits=0;
|
|
int k=0;
|
|
int syms=0;
|
|
for (int c=0; c < out_channels; ++c) {
|
|
k = 0;
|
|
for (int i=start; i < bi->num_quantized_coeffs; ++i) {
|
|
int bits = bd[c].quantized_coeff_encode[i];
|
|
if (bits == 0)
|
|
++k;
|
|
else {
|
|
// new zero-run-length encoding
|
|
int zr = k;
|
|
|
|
while (zr >= MAX_RUNLEN) {
|
|
numbits += rada_zero_runlength_huff.encode[MAX_RUNLEN].length;
|
|
zr -= MAX_RUNLEN;
|
|
++syms;
|
|
}
|
|
if (zr >= COARSE_RUNLEN_THRESHOLD) {
|
|
int coarse = zr & ~3;
|
|
numbits += rada_zero_runlength_huff.encode[coarse].length;
|
|
++syms;
|
|
numbits += 2;
|
|
} else {
|
|
numbits += rada_zero_runlength_huff.encode[zr].length;
|
|
++syms;
|
|
}
|
|
|
|
k = 0;
|
|
}
|
|
}
|
|
if (c == 0 && out_channels == 2) {
|
|
numbits += rada_zero_runlength_huff.encode[END_OF_ZERORUN].length;
|
|
++syms;
|
|
}
|
|
}
|
|
if (numsym != NULL)
|
|
*numsym = syms;
|
|
|
|
return numbits;
|
|
}
|
|
|
|
// returns number of bytes encoded;
|
|
// @TODO: reverse the below, 0 should be "done" and -1 should be "too small"
|
|
// returns 0 if output buffer isn't big enough;
|
|
// returns -1 if stream is done
|
|
// returns -2 on internal error
|
|
int radaudio_encode_block_ext(radaudio_encoder *rae,
|
|
F32 *input,
|
|
size_t input_len,
|
|
size_t *poffset,
|
|
U8 *encode_buffer,
|
|
size_t encode_buffer_max,
|
|
radaudio_encode_info *info)
|
|
{
|
|
int force_first_block = -1;
|
|
int force_next_block = -1;
|
|
radaudio_encoder_state *es = (radaudio_encoder_state *) rae;
|
|
size_t offset = *poffset;
|
|
|
|
// 'offset' is the middle of the region we're going to encode, and also
|
|
// the offset of the samples we will fully encode when we finish encoding this block
|
|
|
|
PROF_BEGIN(encode_all);
|
|
|
|
radaudio_rate_info *bi;
|
|
|
|
RAD_ALIGN(radaudio_block_data, bd[2], 16);
|
|
int num_samples;
|
|
|
|
if (info->force_first_blocktype == RADAUDIO_BLOCKTYPE_short)
|
|
force_first_block = 1;
|
|
else if (info->force_first_blocktype == RADAUDIO_BLOCKTYPE_long)
|
|
force_first_block = 0;
|
|
|
|
if (info->force_next_blocktype == RADAUDIO_BLOCKTYPE_short)
|
|
force_next_block = 1;
|
|
else if (info->force_next_blocktype == RADAUDIO_BLOCKTYPE_long)
|
|
force_next_block = 0;
|
|
|
|
// do transient analysis on the current block
|
|
if (es->block_number == 0) {
|
|
if (force_first_block >= 0)
|
|
es->current_block_short = force_first_block;
|
|
else {
|
|
if (offset + RADAUDIO_LONG_BLOCK_LEN >= input_len)
|
|
es->current_block_short = 1;
|
|
else {
|
|
PROF_BEGIN(transient_analysis);
|
|
es->current_block_short = (U8) transient_analysis_wrapper(input, (int) offset, es->num_channels, TRANSIENT_THRESHOLD);
|
|
if (es->num_channels == 2) {
|
|
if (transient_analysis_wrapper(input+1, (int) offset, es->num_channels, TRANSIENT_THRESHOLD))
|
|
es->current_block_short = 1;
|
|
}
|
|
PROF_END(transient_analysis);
|
|
}
|
|
}
|
|
}
|
|
|
|
num_samples = es->current_block_short ? RADAUDIO_SHORT_BLOCK_LEN : RADAUDIO_LONG_BLOCK_LEN;
|
|
|
|
if (force_next_block >= 0)
|
|
es->next_block_short = force_next_block;
|
|
else {
|
|
// we need to lookahead to the NEXT block to know how to window the right side of our block
|
|
//
|
|
// so our current block is [offset-num_samples, off+num_samples)
|
|
// and the next block will be centered at offset+num_samples, and
|
|
// if it's LONG_BLOCK it will be [offset+num_samples-LONG_BLOCK, offset+num_samples+LONG_BLOCK)
|
|
if (offset + num_samples + RADAUDIO_LONG_BLOCK_LEN >= input_len)
|
|
es->next_block_short = 1;
|
|
else {
|
|
int stride = es->num_channels;
|
|
es->next_block_short = (U8) transient_analysis_wrapper(input, (int) offset + num_samples, stride, TRANSIENT_THRESHOLD);
|
|
if (es->num_channels == 2) {
|
|
if (transient_analysis_wrapper(input+1, (int) offset + num_samples, stride, TRANSIENT_THRESHOLD))
|
|
es->next_block_short = 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
// if the leftmost sample we would encode is off the end of the input, we're done
|
|
if (offset >= input_len + RADAUDIO_SHORT_BLOCK_LEN) {
|
|
PROF_END(encode_all);
|
|
return RADAUDIOENC_AT_EOF;
|
|
}
|
|
|
|
// always end with a short block, to avoid weird overlap rules. actually, two short blocks
|
|
if (offset >= input_len + RADAUDIO_SHORT_BLOCK_LEN*2 + RADAUDIO_LONG_BLOCK_LEN)
|
|
es->next_block_short = 1;
|
|
|
|
bi = es->info[es->current_block_short];
|
|
|
|
PROF_BEGIN(check_mono);
|
|
int num_channels = es->num_channels;
|
|
int out_channels = num_channels == 1 ? 1 : stereo_count_effective_channels(input, input_len, offset, num_samples, 12);
|
|
if (out_channels == 1 && es->num_channels == 2)
|
|
++es->stats.block_events[E_stereo_as_mono];
|
|
PROF_END(check_mono);
|
|
|
|
F32 subband_energy[2][256];
|
|
|
|
rrbool do_mid_side = false;
|
|
rrbool do_mid_side_bands = false;
|
|
|
|
U32 mid_side_bands=0; // set bit (1<<j) for band j if it's mid-side encoded
|
|
|
|
int error = 0;
|
|
|
|
if (out_channels == 1) {
|
|
FFT_ALIGN(F32, coeff[MAX_COEFFS]);
|
|
mdct_block(es, coeff, es->current_block_short, input, (int) offset, (int) input_len, es->prev_block_short, es->next_block_short, es->num_channels, es->num_channels==2, info, 0);
|
|
encode_channel(es, &bd[0], coeff, 0, subband_energy[0], 0, NULL);
|
|
} else {
|
|
FFT_ALIGN(F32, coeff1[MAX_COEFFS]);
|
|
FFT_ALIGN(F32, coeff2[MAX_COEFFS]);
|
|
mdct_block(es, coeff1, es->current_block_short, input , (int) offset, (int) input_len, es->prev_block_short, es->next_block_short, es->num_channels, false, info, 0);
|
|
mdct_block(es, coeff2, es->current_block_short, input+1, (int) offset, (int) input_len, es->prev_block_short, es->next_block_short, es->num_channels, false, info, 1);
|
|
if (es->allow_mid_side) {
|
|
// mid-side encoding intentionally encodes the side channel with less accuracy,
|
|
// so we should only use it if the side is significantly quieter than the mid
|
|
FFT_ALIGN(F32, coeff_mid [MAX_COEFFS]);
|
|
FFT_ALIGN(F32, coeff_side[MAX_COEFFS]);
|
|
for (int i=0; i < bi->num_coeffs; ++i) {
|
|
coeff_mid [i] = (coeff1[i] + coeff2[i]) * 0.5f;
|
|
coeff_side[i] = (coeff1[i] - coeff2[i]) * 1.0f;
|
|
}
|
|
RAD_ALIGN(F32, band_energy_dummy [MAX_BANDS], 16);
|
|
RAD_ALIGN(int, band_exponent_mid [MAX_BANDS], 16);
|
|
RAD_ALIGN(int, band_exponent_side[MAX_BANDS], 16);
|
|
compute_band_energies(bi, coeff_mid , band_exponent_mid , band_energy_dummy);
|
|
compute_band_energies(bi, coeff_side, band_exponent_side, band_energy_dummy);
|
|
int tiny=0, smaller=0, much_smaller=0, larger=0;
|
|
for (int j=0; j < bi->num_bands; ++j) {
|
|
if (band_exponent_side[j] <= es->heur.mid_side_tiny)
|
|
++tiny;
|
|
else if (band_exponent_side[j] <= band_exponent_mid[j]+es->heur.mid_side_offset || (band_exponent_side[j] < band_exponent_mid[j] && band_exponent_mid[j] <= es->heur.mid_side_threshold))
|
|
++much_smaller;
|
|
else if (band_exponent_side[j] < band_exponent_mid[j])
|
|
++smaller;
|
|
else
|
|
++larger;
|
|
}
|
|
|
|
if (larger == 0 && smaller < es->heur.mid_side_max_bad_bands) {
|
|
// most are tiny or much_smaller
|
|
do_mid_side = true;
|
|
encode_channel(es, &bd[0], coeff_mid , 0, subband_energy[0], 0, NULL);
|
|
encode_channel(es, &bd[1], coeff_side, 1, subband_energy[1], 0xffffffff, &bd[0]);
|
|
} else if (!es->current_block_short) {
|
|
// consider doing it per-band
|
|
int do_band[MAX_BANDS] = { 0 };
|
|
for (int j=0; j < bi->num_bands; ++j) {
|
|
if (band_exponent_side[j] < es->heur.side_exp_end) // mid_side_tiny)
|
|
do_band[j] = 2;
|
|
else if (band_exponent_side[j] < band_exponent_mid[j]-es->heur.side_exp_threshold) // + mid_side_offset || (band_exponent_side[j] < band_exponent_mid[j] && band_exponent_mid[j] <= mid_side_threshold))
|
|
do_band[j] = 2;
|
|
else if (band_exponent_side[j] <= es->heur.side_exp_end) // mid_side_tiny)
|
|
do_band[j] = 1;
|
|
else if (band_exponent_side[j] <= band_exponent_mid[j]-es->heur.side_exp_threshold) // + mid_side_offset || (band_exponent_side[j] < band_exponent_mid[j] && band_exponent_mid[j] <= mid_side_threshold))
|
|
do_band[j] = 1;
|
|
}
|
|
do_band[bi->num_bands] = do_band[bi->num_bands+1] = true;
|
|
int count = 0;
|
|
U32 actual_bands = 0;
|
|
int k=0;
|
|
// we signal in groups of 3 bands, so have to find groups of 3 bands where it's ok to mid-side
|
|
for (int j=0; j < bi->num_bands; j += 3, ++k) {
|
|
if (do_band[j+0]+do_band[j+1]+do_band[j+2] >= 5) { // if at least two of the three get reduced, and the third one is close
|
|
++count;
|
|
mid_side_bands |= (1 << k);
|
|
actual_bands |= (7 << j);
|
|
} else {
|
|
do_band[j+0] = do_band[j+1] = do_band[j+2] = 0;
|
|
}
|
|
}
|
|
|
|
if (count >= 1) {
|
|
do_mid_side_bands = true;
|
|
// create composite coefficient arrays that have a mix of either one or the other in each band
|
|
for (int j=0; j < bi->num_bands; ++j) {
|
|
if (do_band[j]) {
|
|
int start = bi->first_coeff_for_band[j];
|
|
int num_coeffs_for_band = bi->num_coeffs_for_band[j];
|
|
memcpy(coeff1 + start, coeff_mid + start, sizeof(coeff_mid[0]) * num_coeffs_for_band);
|
|
memcpy(coeff2 + start, coeff_side + start, sizeof(coeff_mid[0]) * num_coeffs_for_band);
|
|
}
|
|
}
|
|
encode_channel(es, &bd[0], coeff1, 0, subband_energy[0], 0, NULL);
|
|
encode_channel(es, &bd[1], coeff2, 1, subband_energy[1], actual_bands, &bd[0]);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!do_mid_side && !do_mid_side_bands) {
|
|
encode_channel(es, &bd[0], coeff1, 0, subband_energy[0], 0, NULL);
|
|
encode_channel(es, &bd[1], coeff2, 1, subband_energy[1], 0, NULL);
|
|
}
|
|
}
|
|
|
|
int c;
|
|
|
|
U8 band_exponents[32*2];
|
|
int num_band_exponents=0;
|
|
rrbool stereo_predict_exponent = false;
|
|
|
|
// band exponents
|
|
|
|
for (c=0; c < out_channels; ++c) {
|
|
int lastn = PREDICT_FIRST_BAND_EXP;
|
|
|
|
for (int j=0; j < bi->num_bands; ++j) {
|
|
int n = bd[c].band_exponent[j];
|
|
rrAssert(n == BAND_EXPONENT_NONE || (n >= -32 && n < 32));
|
|
if (BAND_EXPONENT_NONE == -17)
|
|
rrAssert(n >= -17 && n < 32);
|
|
|
|
band_exponents[num_band_exponents++] = (U8) (n - lastn);
|
|
lastn = n;
|
|
}
|
|
}
|
|
|
|
if (out_channels == 2) {
|
|
// try stereo predicting exponents
|
|
int right_cost_nostereo=0, right_cost_stereo=0;
|
|
for (int j=0; j < bi->num_bands; ++j) {
|
|
right_cost_nostereo += rada_band_exponent_correct_huff.encode[band_exponents[bi->num_bands + j]].length;
|
|
|
|
int predict = bd[1].band_exponent[j] - bd[0].band_exponent[j];
|
|
int cost = rada_band_exponent_stereo_correct_huff.encode[(U8) predict].length;
|
|
if (cost == 0)
|
|
right_cost_stereo = 256*bi->num_bands; // if invalid, can't use this path
|
|
else
|
|
right_cost_stereo += cost;
|
|
}
|
|
|
|
if (right_cost_stereo < right_cost_nostereo) {
|
|
stereo_predict_exponent = true;
|
|
num_band_exponents >>= 1;
|
|
for (int i=0; i < bi->num_bands; ++i)
|
|
band_exponents[32+i] = (U8) (bd[1].band_exponent[i] - bd[0].band_exponent[i]);
|
|
}
|
|
}
|
|
|
|
U8 subband_value[128*2];
|
|
U8 subband_correction[32*2];
|
|
S8 big_coefficients[1024*2];
|
|
U8 nonzero_coefficients[1024*2];
|
|
U8 runlength_data[1025*2];
|
|
U8 runlength_bottom[1024*2];
|
|
U8 nonzero_flagbits[1024*2/8*16];
|
|
U8 subband_stereo_correct[128];
|
|
|
|
int num_subband_values0=0;
|
|
int num_subband_corrections=0;
|
|
int num_subband_stereo_correct=0;
|
|
int num_big_coefficients=0;
|
|
int num_nonzero_coefficients=0;
|
|
int num_runlength_data=0;
|
|
int num_runlength_bottom=0;
|
|
|
|
rrbool disable_subband_predict = false;
|
|
// compute subband predictions
|
|
for (c=0; c < out_channels; ++c) {
|
|
for (int j=0; j < bi->num_bands; ++j) {
|
|
// skip subband data if subband occupies whole band
|
|
if (bi->num_subbands_for_band[j] == 1)
|
|
continue;
|
|
|
|
int start = bi->first_subband_for_band[j];
|
|
int predicted_sum = es->subband_predicted_sum[j];
|
|
int actual_sum = 0;
|
|
for (int i=0; i < bi->num_subbands_for_band[j]; ++i) {
|
|
int v = bd[c].quantized_subbands[start+i];
|
|
actual_sum += v;
|
|
}
|
|
int predict = (actual_sum - predicted_sum);
|
|
if (predict < -128 || predict > 127) {
|
|
disable_subband_predict = true;
|
|
}
|
|
else if (rada_subband_value_last_in_band_correct_huff.encode[(U8) predict].length == 0) {
|
|
disable_subband_predict = true;
|
|
}
|
|
}
|
|
}
|
|
es->stats.block_events[E_subband_nopredict] += disable_subband_predict ? 1 : 0;
|
|
|
|
rrbool stereo_predict_subbands = false;
|
|
if (!es->current_block_short) {
|
|
if (out_channels == 2) {
|
|
// try stereo predicting subbands
|
|
int right_cost_nostereo=0, right_cost_stereo=0;
|
|
for (int j=0; j < bi->num_bands; ++j) {
|
|
if (bi->num_subbands_for_band[j] == 1)
|
|
continue;
|
|
|
|
if (bd[1].band_exponent[j] == BAND_EXPONENT_NONE && SUBBANDS_SKIP_EMPTY_BANDS)
|
|
continue;
|
|
|
|
int start = bi->first_subband_for_band[j];
|
|
int num = bi->num_subbands_for_band[j];
|
|
|
|
int bias = es->subband_bias[j];
|
|
|
|
int actual_sum=0;
|
|
for (int i=0; i < num; ++i) {
|
|
actual_sum += bd[1].quantized_subbands[start+i];
|
|
if (i == num-1 && !disable_subband_predict)
|
|
right_cost_nostereo += rada_subband_value_last_in_band_correct_huff.encode[(U8) (actual_sum - es->subband_predicted_sum[j])].length;
|
|
else
|
|
right_cost_nostereo += rada_subband_value_huff.encode[(bias + bd[1].quantized_subbands[start+i])&63].length;
|
|
|
|
int correct = bd[1].quantized_subbands[start+i] - bd[0].quantized_subbands[start+i];
|
|
int cost = rada_subband_value_stereo_correct_huff.encode[(U8) correct].length;
|
|
if (cost == 0)
|
|
right_cost_stereo = 256*bi->num_subbands; // if invalid, can't use this path
|
|
else
|
|
right_cost_stereo += cost;
|
|
}
|
|
}
|
|
|
|
if (right_cost_stereo < right_cost_nostereo) {
|
|
stereo_predict_subbands = true;
|
|
}
|
|
}
|
|
|
|
// compute subband value encodings
|
|
for (c=0; c < out_channels; ++c) {
|
|
for (int j=0; j < bi->num_bands; ++j) {
|
|
// skip subband data if subband occupies whole band, should be first 12
|
|
if (bi->num_subbands_for_band[j] == 1)
|
|
continue;
|
|
|
|
int start = bi->first_subband_for_band[j];
|
|
int num_to_write = bi->num_subbands_for_band[j];
|
|
|
|
if (bd[c].band_exponent[j] == BAND_EXPONENT_NONE && SUBBANDS_SKIP_EMPTY_BANDS)
|
|
continue;
|
|
|
|
if (c == 1 && stereo_predict_subbands) {
|
|
for (int i = 0; i < num_to_write; ++i) {
|
|
int correct = bd[1].quantized_subbands[start+i] - bd[0].quantized_subbands[start+i];
|
|
subband_stereo_correct[num_subband_stereo_correct++] = (U8) correct;
|
|
}
|
|
} else {
|
|
int predicted_sum = es->subband_predicted_sum[j];
|
|
int bias = es->subband_bias[j];
|
|
int actual_sum = 0;
|
|
|
|
if (!disable_subband_predict)
|
|
--num_to_write;
|
|
|
|
for (int i=0; i < num_to_write; ++i) {
|
|
int v = bd[c].quantized_subbands[start+i];
|
|
actual_sum += v;
|
|
int code = (v + bias)&63;
|
|
if (code < 0 || code > LARGEST_BIASED_SUBBAND) fprintf(stderr, "encoded subband value %d (value %d) was outside of range 0..%d\n", code, v, LARGEST_BIASED_SUBBAND);
|
|
subband_value[num_subband_values0++] = (U8) code;
|
|
}
|
|
|
|
if (!disable_subband_predict) {
|
|
actual_sum += bd[c].quantized_subbands[start+num_to_write];
|
|
U8 correct = (U8) (actual_sum - predicted_sum);
|
|
subband_correction[num_subband_corrections++] = correct;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
int nz_mode = 0; // for short blocks, use 0 value to avoid sending extra header flags
|
|
|
|
if (!es->current_block_short) {
|
|
// encode the nonzero coefficient positions in bitarray
|
|
for (c=0; c < out_channels; ++c) {
|
|
int p=c*(1024/8);
|
|
for (int j=0; j < 1024; j += 8) {
|
|
U8 flags=0;
|
|
for (int k=0; k < 8; ++k) {
|
|
int bits = bd[c].quantized_coeff_encode[j+k];
|
|
if (bits != 0) {
|
|
flags |= (1 << k);
|
|
}
|
|
}
|
|
nonzero_flagbits[p++] = flags;
|
|
}
|
|
}
|
|
|
|
int best_no_rle=0, best_num=0;
|
|
int best_mode=0, best_cost=0x7fffffff;
|
|
// for each mode, measure the cost
|
|
for (int m=0; m < 4; ++m) {
|
|
radaudio_nonzero_blockmode_descriptor *nz_desc = &es->nz_desc[m];
|
|
int bits= (m==0 ? 0 : 6); // anything other than mode 0 forces an extra header byte, though we might already be paying it for other reasons so approximate as 3/4ths of the header byte
|
|
int nc = nz_desc->num_8byte_chunks;
|
|
int num=0, numsym=0;
|
|
int base;
|
|
int bitc[8] = { 0 };
|
|
//bits += compute_rle_length(es, bd, out_channels, nc*64);
|
|
bits = compute_rle_length(es, bd, out_channels, nc*64, &numsym);
|
|
base = bits;
|
|
for (c=0; c < out_channels; ++c) {
|
|
int p = (1024/8)*c;
|
|
for (int i=0; i < nc; ++i) {
|
|
radaudio_huffman *h = rada_nonzero_bitflags_huff[nz_desc->huffman_table_for_chunk[i]];
|
|
num += 8;
|
|
int pre = bits;
|
|
|
|
if (nz_desc->invert_chunk[i])
|
|
for (int j=0; j < 8; ++j)
|
|
bits += h->encode[255^nonzero_flagbits[p++]].length;
|
|
else
|
|
for (int j=0; j < 8; ++j)
|
|
bits += h->encode[ nonzero_flagbits[p++]].length;
|
|
if (m == 0)
|
|
bitc[nz_desc->huffman_table_for_chunk[i]] += (bits - pre);
|
|
}
|
|
}
|
|
|
|
if (bits < best_cost) {
|
|
best_cost = bits;
|
|
best_mode = m;
|
|
}
|
|
if (m == 0) {
|
|
best_no_rle = bits - base;
|
|
best_num = num;
|
|
}
|
|
}
|
|
|
|
nz_mode = best_mode;
|
|
}
|
|
es->stats.block_events[E_nzmode0 + nz_mode] += 1;
|
|
|
|
// encode non-zero coefficient locations
|
|
for (c=0; c < out_channels; ++c) {
|
|
int i,j,k;
|
|
|
|
if (!es->current_block_short && nz_mode != 3) {
|
|
int count = es->nz_desc[nz_mode].num_8byte_chunks*64;
|
|
|
|
// encode the first `count` coefficients as non-zero based on 1-bit flags to be compressed later
|
|
for (j=0; j < count; j += 8) {
|
|
U8 flags=0;
|
|
for (k=0; k < 8; ++k) {
|
|
int bits = bd[c].quantized_coeff_encode[j+k];
|
|
if (bits != 0) {
|
|
flags |= (1 << k);
|
|
if (abs(bits) > 7) {
|
|
nonzero_coefficients[num_nonzero_coefficients++] = 0;
|
|
big_coefficients [num_big_coefficients++ ] = (S8) bits;
|
|
} else {
|
|
nonzero_coefficients[num_nonzero_coefficients++] = (U8) (bits+8);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// encode the remaining non-zero coefficient locations by run-length compressing the intervening 0s
|
|
|
|
k=0;
|
|
int skip = es->nz_desc[nz_mode].num_8byte_chunks * 64;
|
|
for (j=0; j < bi->num_bands; ++j) {
|
|
int start = bi->first_coeff_for_band[j];
|
|
int count = bi->num_coeffs_for_band[j];
|
|
|
|
rrAssert(bi->num_coeffs <= 1024);
|
|
for (i=0; i < count; ++i) {
|
|
// skip the coefficients we encoded in the previous loop above
|
|
if (!es->current_block_short && nz_mode != 3 && start+i < skip)
|
|
continue;
|
|
|
|
int bits = bd[c].quantized_coeff_encode[start+i];
|
|
if (bits == 0)
|
|
++k;
|
|
else {
|
|
// new zero-run-length encoding
|
|
int zr = k;
|
|
|
|
while (zr >= MAX_RUNLEN) {
|
|
runlength_data[num_runlength_data++] = (U8) MAX_RUNLEN;
|
|
zr -= MAX_RUNLEN;
|
|
}
|
|
if (zr >= COARSE_RUNLEN_THRESHOLD) {
|
|
int coarse = zr & ~3;
|
|
runlength_data[num_runlength_data++] = (U8) coarse;
|
|
zr -= coarse;
|
|
runlength_bottom[num_runlength_bottom++] = (U8) zr;
|
|
} else
|
|
runlength_data[num_runlength_data++] = (U8) zr;
|
|
|
|
if (abs(bits) > 7) {
|
|
nonzero_coefficients[num_nonzero_coefficients++] = 0;
|
|
big_coefficients [num_big_coefficients++ ] = (S8) bits;
|
|
} else {
|
|
nonzero_coefficients[num_nonzero_coefficients++] = (U8) (bits+8);
|
|
}
|
|
k = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
// end of channel's data
|
|
if (c != out_channels-1)
|
|
runlength_data[num_runlength_data++] = (U8) END_OF_ZERORUN; // end of run marker
|
|
// don't bother outputting the terminating end of zerorun marker, we can infer that from length we have to use with OodleData huffman
|
|
}
|
|
|
|
U8 coeff_pairs[1024/2 * 2];
|
|
if ((num_nonzero_coefficients & 1) != 0)
|
|
nonzero_coefficients[num_nonzero_coefficients++] = 7; // cheapest symbol to encode
|
|
|
|
int num_coeff_pairs = (num_nonzero_coefficients+1)/2;
|
|
for (int i=0; i < num_coeff_pairs; ++i)
|
|
coeff_pairs[i] = nonzero_coefficients[i*2+0] | (nonzero_coefficients[i*2+1]<<4);
|
|
|
|
int nz_selector = es->current_block_short ? 4 : nz_mode;
|
|
|
|
huff3_encoder he;
|
|
encode_vbstream_init(&he.stream[0], &es->buffer[0] , 1000);
|
|
encode_vbstream_init(&he.stream[1], &es->buffer[0]+1024*1, 1000);
|
|
encode_vbstream_init(&he.stream[2], &es->buffer[0]+1024*2, 3000);
|
|
he.total_bits = 0;
|
|
|
|
size_t prev_bitcount;
|
|
prev_bitcount = he.total_bits;
|
|
|
|
// band exponents
|
|
PROF_BEGIN(huffman);
|
|
encode_huff_array(&he, &rada_band_exponent_correct_huff, band_exponents, num_band_exponents, __FILE__, __LINE__, &error);
|
|
if (stereo_predict_exponent)
|
|
encode_huff_array(&he, &rada_band_exponent_stereo_correct_huff, &band_exponents[32], num_band_exponents, __FILE__, __LINE__, &error);
|
|
PROF_END(huffman);
|
|
|
|
es->stats.bit_allocation[S_band_exponent] += (he.total_bits - prev_bitcount);
|
|
|
|
// band mantissas -- pack them directly into varbits-array decoder
|
|
U8 m_sizes[64];
|
|
U32 m_values[64];
|
|
int num_mantissas=0;
|
|
|
|
prev_bitcount = he.total_bits;
|
|
for (c=0; c < out_channels; ++c) {
|
|
for (int j=0; j < bi->num_bands; ++j) {
|
|
m_values[num_mantissas] = bd[c].band_mantissa[j];
|
|
m_sizes [num_mantissas] = (U8) bd[c].band_mantissa_bitcount[j];
|
|
++num_mantissas;
|
|
}
|
|
}
|
|
|
|
for (int j=0; j < num_mantissas; ++j)
|
|
encode_vbstream_bits(&he.stream[2], m_values[j], m_sizes[j]);
|
|
|
|
encode_recompute_stats(&he);
|
|
es->stats.bit_allocation[S_band_mantissa] += (he.total_bits - prev_bitcount);
|
|
|
|
PROF_BEGIN(huffman);
|
|
if (!es->current_block_short) {
|
|
// subband values
|
|
prev_bitcount = he.total_bits;
|
|
encode_huff_array(&he, &rada_subband_value_huff , subband_value , num_subband_values0 , __FILE__, __LINE__, &error);
|
|
if (!disable_subband_predict)
|
|
encode_huff_array(&he, &rada_subband_value_last_in_band_correct_huff, subband_correction , num_subband_corrections , __FILE__, __LINE__, &error);
|
|
if (stereo_predict_subbands)
|
|
encode_huff_array(&he, &rada_subband_value_stereo_correct_huff , subband_stereo_correct, num_subband_stereo_correct, __FILE__, __LINE__, &error);
|
|
es->stats.bit_allocation[S_subband] += (he.total_bits - prev_bitcount);
|
|
}
|
|
PROF_END(huffman);
|
|
|
|
if (!es->current_block_short && nz_mode != 3) {
|
|
U8 nonzero_flag_huff[1024*2/8*16];
|
|
radaudio_nonzero_blockmode_descriptor *nz_desc = &es->nz_desc[nz_mode];
|
|
|
|
// encode the non-zero flag bits
|
|
// reorder the bits into an array where each huffman encoding is contiguous
|
|
// also invert any chunks needing inversion
|
|
int p=0, s=out_channels-1;
|
|
for (c=0; c < out_channels; ++c) {
|
|
p = (1024/8/8) * c;
|
|
for (int i=0; i < nz_desc->num_8byte_chunks; ++i) {
|
|
int off = nz_desc->source_pos[s+c][i];
|
|
U64 xor = (U64)0 - nz_desc->invert_chunk[i]; // ~0 if invert_chunk, else 0 (assuming invert_chunk is either 0 or 1)
|
|
RR_PUT64_NATIVE(&nonzero_flag_huff[8*off], xor ^ RR_GET64_NATIVE(&nonzero_flagbits[8*p]));
|
|
p++;
|
|
}
|
|
}
|
|
|
|
// now output the huffman arrays
|
|
prev_bitcount = he.total_bits;
|
|
p=0;
|
|
PROF_BEGIN(huffman);
|
|
for (int i=0; i < NUM_NZ_HUFF; ++i) {
|
|
int nc = nz_desc->num_chunks_per_huff[i];
|
|
if (nc != 0) {
|
|
encode_huff_array(&he, rada_nonzero_bitflags_huff[i], &nonzero_flag_huff[p], nc*8*out_channels, __FILE__, __LINE__, &error);
|
|
p += nc * 8 * out_channels;
|
|
}
|
|
}
|
|
PROF_END(huffman);
|
|
es->stats.bit_allocation[S_coeff_location] += (he.total_bits - prev_bitcount);
|
|
}
|
|
|
|
PROF_BEGIN(huffman);
|
|
prev_bitcount = he.total_bits;
|
|
encode_huff_array(&he, &rada_zero_runlength_huff, runlength_data, num_runlength_data, __FILE__, __LINE__, &error);
|
|
es->stats.bit_allocation[S_coeff_location] += (he.total_bits - prev_bitcount);
|
|
|
|
// coefficients -- need to have decoded the runlength data to know how many coefficients
|
|
prev_bitcount = he.total_bits;
|
|
int tp = es->nz_correlated_huffman_selectors[HS_COEFF_PAIR][nz_selector];
|
|
encode_huff_array(&he, rada_nonzero_coefficient_pair_huff[tp], coeff_pairs, num_coeff_pairs, __FILE__, __LINE__, &error);
|
|
es->stats.bit_allocation[S_coeff_value] += (he.total_bits - prev_bitcount);
|
|
PROF_END(huffman);
|
|
|
|
PROF_BEGIN(varbits);
|
|
// encode bottom runlength bits to vbstream 2
|
|
prev_bitcount = he.total_bits;
|
|
for (int j=0; j < num_runlength_bottom; ++j)
|
|
encode_vbstream_bits(&he.stream[2], runlength_bottom[j], 2);
|
|
es->stats.bit_allocation[S_coeff_location] += (he.total_bits - prev_bitcount);
|
|
PROF_END(varbits);
|
|
|
|
PROF_BEGIN(huffman);
|
|
// huffman encode big coefficients
|
|
prev_bitcount = he.total_bits;
|
|
int tb = es->nz_correlated_huffman_selectors[HS_COEFF_BIG][nz_selector];
|
|
encode_huff_array(&he, rada_nonzero_coefficient_big_huff[tb], (U8*) big_coefficients, num_big_coefficients, __FILE__, __LINE__, &error);
|
|
es->stats.bit_allocation[S_coeff_value_large] += (he.total_bits - prev_bitcount);
|
|
PROF_END(huffman);
|
|
|
|
prev_bitcount = he.total_bits;
|
|
for (int i=0; i < 3; ++i)
|
|
encode_vbstream_flush(&he.stream[i]);
|
|
encode_recompute_stats(&he);
|
|
es->stats.bit_allocation[S_padding] += (he.total_bits - prev_bitcount);
|
|
|
|
// assemble the final block
|
|
radaudio_block_header_unpacked bh = { 0 };
|
|
|
|
// advance to the new center, which means advance by half of the current and half of the next
|
|
int completed_samples = es->info[es->current_block_short]->num_coeffs/2
|
|
+ es->info[es->next_block_short ]->num_coeffs/2;
|
|
|
|
*poffset = offset + completed_samples;
|
|
|
|
if (error)
|
|
return RADAUDIOENC_INTERNAL_ERROR;
|
|
|
|
// this is the condition for being done:
|
|
// if (offset >= input_len + RADAUDIO_SHORT_BLOCK_LEN)
|
|
// so the last block is the one that puts us in that state
|
|
// Note this is the condition because we already update offset to point to the middle of the next block;
|
|
// the next block minus RADAUDIO_SHORT_BLOCK_LEN is the number of overlapped samples that block will make if it or this block is short, so every sample before that is complete.
|
|
// that's not quite right if the this block and the next block are long, but we never make long blocks at end of file, so
|
|
// even if one long block ends right on the input, the next block will be short
|
|
|
|
bh.final_block = (*poffset >= input_len + RADAUDIO_SHORT_BLOCK_LEN);
|
|
int discard_samples = 0;
|
|
if (bh.final_block)
|
|
discard_samples = (int) (*poffset-RADAUDIO_SHORT_BLOCK_LEN - input_len); // number of fully decoded samples we didn't use
|
|
|
|
bh.this_block_short = es->current_block_short;
|
|
bh.next_block_short = es->next_block_short;
|
|
bh.num_channels_encoded = out_channels;
|
|
bh.final_samples_discard = discard_samples;
|
|
|
|
bh.vbstream0_length = he.stream[0].length;
|
|
bh.num_runlength_array = num_runlength_data;
|
|
bh.nonzero_bitarray_mode = nz_mode;
|
|
bh.disable_final_subband_predict = disable_subband_predict;
|
|
bh.predict_stereo_subband = stereo_predict_subbands;
|
|
bh.predict_stereo_exponent = stereo_predict_exponent;
|
|
bh.mid_side_encoded = do_mid_side;
|
|
bh.mid_side_bands = do_mid_side_bands;
|
|
|
|
size_t byte_offset = 0;
|
|
|
|
if (encode_buffer_max < 10)
|
|
return RADAUDIOENC_INSUFFICIENT_BUFFER;
|
|
|
|
size_t midside_len = do_mid_side_bands ? (24/MACRO_BAND_SIZE+7)/8 : 0;
|
|
|
|
size_t non_header_length = he.stream[0].length + he.stream[1].length + he.stream[2].length + midside_len;
|
|
bh.block_bytes = (U32) non_header_length;
|
|
|
|
int header_size = radaudio_encode_block_header(encode_buffer, &es->biases, &bh);
|
|
rrAssert(header_size >= 0);
|
|
if (header_size < 0)
|
|
return RADAUDIOENC_INTERNAL_ERROR;
|
|
|
|
byte_offset = header_size;
|
|
es->stats.bit_allocation[S_header] += byte_offset * 8;
|
|
rrAssert(encode_buffer[0] != 0);
|
|
|
|
#ifdef _DEBUG
|
|
size_t total_size = byte_offset + non_header_length;
|
|
#endif
|
|
|
|
if (byte_offset + midside_len > encode_buffer_max)
|
|
return RADAUDIOENC_INSUFFICIENT_BUFFER;
|
|
for (int i=0; i < (int)midside_len; ++i)
|
|
encode_buffer[byte_offset++] = (U8) ((mid_side_bands >> (8*i)) & 255);
|
|
|
|
if (byte_offset + he.stream[0].length > encode_buffer_max)
|
|
return RADAUDIOENC_INSUFFICIENT_BUFFER;
|
|
for (int i=0; i < he.stream[0].length; ++i)
|
|
encode_buffer[byte_offset++] = he.stream[0].bitstream[i];
|
|
|
|
if (byte_offset + he.stream[2].length > encode_buffer_max)
|
|
return RADAUDIOENC_INSUFFICIENT_BUFFER;
|
|
for (int i=0; i < he.stream[2].length; ++i)
|
|
encode_buffer[byte_offset++] = he.stream[2].bitstream[i];
|
|
|
|
if (byte_offset + he.stream[1].length > encode_buffer_max)
|
|
return RADAUDIOENC_INSUFFICIENT_BUFFER;
|
|
for (int i=he.stream[1].length-1; i >= 0; --i)
|
|
encode_buffer[byte_offset++] = he.stream[1].bitstream[i];
|
|
|
|
if (he.stream[0].error || he.stream[1].error | he.stream[2].error)
|
|
return RADAUDIOENC_INTERNAL_ERROR;
|
|
|
|
rrAssert(encode_buffer[0] != 0);
|
|
|
|
es->prev_block_short = es->current_block_short;
|
|
es->current_block_short = es->next_block_short;
|
|
es->samples_fully_coded = (int) *poffset;
|
|
++es->block_number;
|
|
|
|
es->lastblock_vbstream0_length = bh.vbstream0_length;
|
|
es->lastblock_num_runlength_array = bh.num_runlength_array;
|
|
es->lastblock_block_bytes = bh.block_bytes;
|
|
|
|
rrAssert(byte_offset == total_size);
|
|
|
|
PROF_END(encode_all);
|
|
|
|
return (int) byte_offset;
|
|
}
|
|
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Transient analysis
|
|
//
|
|
|
|
#define TA_MAX(a,b) ((a) > (b) ? (a) : (b))
|
|
|
|
int transient_analysis(float* input, int N, int stride, float transient_threshold)
|
|
{
|
|
RAD_ALIGN(F32, w[512], 16);
|
|
|
|
rrAssert(N >= 40 && N <= 1024 && N%2 == 0);
|
|
int N2 = N/2;
|
|
|
|
// IIR highpass, combined with 2:1 decimation
|
|
float x1 = 0, x2 = 0;
|
|
for (int i = 0; i < N2; i++)
|
|
{
|
|
float x0 = *input + x1 - 0.5f * x2;
|
|
float ya = 0.625f * (x0 + x2) - 1.25f * x1;
|
|
x2 = x1;
|
|
x1 = x0;
|
|
input += stride;
|
|
|
|
x0 = *input + x1 - 0.5f * x2;
|
|
float yb = 0.625f * (x0 + x2) - 1.25f * x1;
|
|
x2 = x1;
|
|
x1 = x0;
|
|
input += stride;
|
|
|
|
w[i] = ya*ya + yb*yb;
|
|
}
|
|
|
|
// clear boundary samples
|
|
for (int i = 0; i < 6; i++)
|
|
{
|
|
w[i] = 0;
|
|
}
|
|
|
|
// compute sum, do forward exponential moving average
|
|
float sum_sq = 0;
|
|
float ema = 0;
|
|
for (int i = 6; i < N2; i++)
|
|
{
|
|
sum_sq += w[i];
|
|
const float k = 1.f / 16.f;
|
|
ema = ema + k * (w[i] - ema);
|
|
w[i] = ema;
|
|
}
|
|
|
|
// do backwards exponential moving average, find maximum
|
|
ema = 0;
|
|
float max_sq = 0;
|
|
for (int i = N2-1; i >= 0; i--)
|
|
{
|
|
const float k = 1.f / 8.f;
|
|
ema = ema + k * (w[i] - ema);
|
|
w[i] = ema;
|
|
max_sq = TA_MAX(ema, max_sq);
|
|
}
|
|
|
|
// set up for final pass
|
|
const float eps = 1e-15f;
|
|
float geomean = sqrtf(sum_sq * (max_sq * N2 * 0.5f));
|
|
float scale = N2 / (geomean + eps);
|
|
|
|
// subsampled harmonic mean
|
|
const float min_threshold = 1.5f / 64.f;
|
|
float rcp_sum = 0;
|
|
for (int i = 12; i < N2-5; i += 4)
|
|
{
|
|
rcp_sum += 1.f / TA_MAX((w[i] + eps) * scale, min_threshold);
|
|
}
|
|
const int num_summed = (N2 - 17 + 3) >> 2;
|
|
|
|
return rcp_sum > (transient_threshold / 64.f) * num_summed;
|
|
}
|
|
|
|
// BAND_EXPONENT_BASE0/1
|
|
// initial guess 1.43
|
|
// This constant affects how many coefficient pulses are assigned per band by
|
|
// weighting each band with this number raised to the computed band exponent
|
|
// (e.g. if this number were 2.0, then it would weight by the band energy, and 1.0 weights evenly)
|
|
static float band_exponent_base[4][2][10] =
|
|
{ // 1.1 instead of 1.2 has 1% better average ODG, but 8% higher standard deviation
|
|
{
|
|
{ 1.2f, 1.2f, 1.2f, 1.2f, 1.2f, 1.18f, 1.14f, 1.10f, 1.08f, 0.96f },
|
|
{ 1.6f, 1.6f, 1.6f, 1.6f, 1.6f, 1.60f, 1.50f, 1.40f, 1.30f, 1.20f },
|
|
},
|
|
{
|
|
{ 1.2f, 1.2f, 1.2f, 1.2f, 1.2f, 1.15f, 1.13f, 1.11f, 1.08f, 1.05f },
|
|
{ 1.6f, 1.6f, 1.6f, 1.6f, 1.6f, 1.60f, 1.50f, 1.40f, 1.30f, 1.20f },
|
|
},
|
|
{
|
|
{ 1.2f, 1.2f, 1.2f, 1.2f, 1.2f, 1.20f, 1.16f, 1.08f, 1.00f, 0.98f },
|
|
{ 1.6f, 1.6f, 1.6f, 1.6f, 1.6f, 1.60f, 1.50f, 1.40f, 1.30f, 1.20f },
|
|
},
|
|
{
|
|
{ 1.2f, 1.2f, 1.2f, 1.2f, 1.2f, 1.30f, 1.20f, 1.06f, 1.04f, 1.02f },
|
|
{ 1.6f, 1.6f, 1.6f, 1.6f, 1.6f, 1.60f, 1.50f, 1.40f, 1.30f, 1.20f },
|
|
},
|
|
};
|
|
|
|
// BAND_COUNT_EXPONENT0/1
|
|
// This constant affects how many coefficient pulses are assigned per band by
|
|
// weighting each band with the number of coefficients in the band raised to this power
|
|
// (if this number is 1.0, then they're weighted evenly by # coefficients_sum)
|
|
// initial guess ~0.85.
|
|
// std.dev appears to be minimized closer to 1.0, depending on the mode
|
|
static float band_count_exponent[4][2][10] =
|
|
{
|
|
{
|
|
{ 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.96f, 0.98f, 0.98f, 0.96f, 0.95f },
|
|
{ 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f },
|
|
},
|
|
{
|
|
{ 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.88f, 0.90f, 0.94f, 0.96f, 0.98f },
|
|
{ 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f },
|
|
},
|
|
{
|
|
{ 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.98f, 0.98f, 0.98f, 0.98f, 0.98f },
|
|
{ 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f },
|
|
},
|
|
{
|
|
{ 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.96f, 0.96f, 0.96f, 0.95f, 0.95f },
|
|
{ 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f },
|
|
},
|
|
};
|
|
|
|
// weight to assign to lowest bands, lerps towards 1.0 at highest bands
|
|
// QUALITY_WEIGHT_LOW0/1
|
|
static float quality_weight_low[4][2][10] =
|
|
{
|
|
// 0 1 2 3 4 5 6 7 8 9
|
|
{
|
|
{ 3.50f, 3.50f, 3.50f, 3.50f, 3.50f, 7.00f, 7.00f, 5.50f, 1.75f, 1.50f },
|
|
{ 3.50f, 3.50f, 3.50f, 3.50f, 3.50f, 4.00f, 4.00f, 4.00f, 4.00f, 4.00f },
|
|
},
|
|
{ // 6.5f
|
|
{ 3.50f, 3.50f, 3.50f, 3.50f, 3.50f, 5.25f, 4.50f, 3.25f, 2.25f, 2.00f },
|
|
{ 3.50f, 3.50f, 3.50f, 3.50f, 3.50f, 4.00f, 4.00f, 4.00f, 4.00f, 4.00f },
|
|
},
|
|
{
|
|
{ 3.50f, 3.50f, 3.50f, 3.50f, 3.50f, 5.50f, 4.25f, 3.50f, 3.50f, 3.50f },
|
|
{ 3.50f, 3.50f, 3.50f, 3.50f, 3.50f, 4.00f, 4.00f, 4.00f, 4.00f, 4.00f },
|
|
},
|
|
{
|
|
{ 3.50f, 3.50f, 3.50f, 3.50f, 3.50f, 2.50f, 2.50f, 2.00f, 1.50f, 1.50f },
|
|
{ 3.50f, 3.50f, 3.50f, 3.50f, 3.50f, 4.00f, 4.00f, 4.00f, 4.00f, 4.00f },
|
|
},
|
|
};
|
|
|
|
// SHORT_BLOCK_PULSES - initial guess 0.2
|
|
static float short_block_pulses[4][10] =
|
|
{
|
|
// 0 1 2 3 4 5 6 7 8 9
|
|
{ 0.11f, 0.11f, 0.11f, 0.11f, 0.11f, 0.11f, 0.11f, 0.13f, 0.15f, 0.080f },
|
|
{ 0.12f, 0.12f, 0.12f, 0.12f, 0.12f, 0.12f, 0.13f, 0.14f, 0.18f, 0.075f },
|
|
{ 0.09f, 0.09f, 0.09f, 0.09f, 0.09f, 0.09f, 0.10f, 0.13f, 0.17f, 0.045f },
|
|
{ 0.08f, 0.08f, 0.08f, 0.08f, 0.08f, 0.08f, 0.09f, 0.09f, 0.09f, 0.060f },
|
|
};
|
|
|
|
// EXTRA_THRESHOLD_BIG0/1
|
|
static float extra_threshold_big[4][2][10] =
|
|
{
|
|
{
|
|
{ 16.0,16.0,16.0,16.0,16.0, 16.0,16.0,16.0,16.0,16.0, },
|
|
{ 16.0,16.0,16.0,16.0,16.0, 16.0,16.0,16.0,16.0,16.0, },
|
|
},
|
|
{
|
|
{ 16.0,16.0,16.0,16.0,16.0, 16.0,16.0,16.0,16.0,16.0, },
|
|
{ 16.0,16.0,16.0,16.0,16.0, 16.0,16.0,16.0,16.0,16.0, },
|
|
},
|
|
{
|
|
{ 16.0,16.0,16.0,16.0,16.0, 16.0, 8.0, 6.0, 6.0, 6.0, },
|
|
{ 16.0,16.0,16.0,16.0,16.0, 16.0,16.0,16.0,16.0,16.0, },
|
|
},
|
|
{
|
|
{ 16.0,16.0,16.0,16.0,16.0, 16.0,16.0,16.0,16.0,16.0, },
|
|
{ 16.0,16.0,16.0,16.0,16.0, 12.0,16.0,16.0,16.0,16.0, },
|
|
},
|
|
};
|
|
|
|
// EXTRA_THRESHOLD_SMALL0/1
|
|
static float extra_threshold_small[4][2][10] =
|
|
{
|
|
{
|
|
{ 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, },
|
|
{ 2.0, 2.0, 2.0, 2.0, 2.0, 8.0, 8.0, 8.0, 8.0, 8.0, },
|
|
},
|
|
{
|
|
{ 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, },
|
|
{ 2.0, 2.0, 2.0, 2.0, 2.0, 8.0, 8.0, 8.0, 8.0, 8.0, },
|
|
},
|
|
{
|
|
{ 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 4.0, 3.0, 1.5, 2.0, },
|
|
{ 2.0, 2.0, 2.0, 2.0, 2.0, 6.0, 8.0, 8.0, 8.0, 8.0, },
|
|
},
|
|
{
|
|
{ 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, },
|
|
{ 2.0, 2.0, 2.0, 2.0, 2.0, 8.0, 8.0, 8.0, 8.0, 8.0, },
|
|
},
|
|
};
|
|
|
|
#define MANTISSA_SCALE 1.00f // used while tuning, env=MANTISSA_SCALE
|
|
static float band_mantissa_base[11][2][6] =
|
|
{
|
|
{ { 5.50f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 3.25f,3.25f,3.25f,3.25f,2.95f,2.66f }, },
|
|
{ { 5.50f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 3.25f,3.25f,3.25f,3.25f,2.95f,2.66f }, },
|
|
{ { 5.50f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 3.25f,3.25f,3.25f,3.25f,2.95f,2.66f }, },
|
|
{ { 5.50f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 3.25f,3.25f,3.25f,3.25f,2.95f,2.66f }, },
|
|
{ { 5.50f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 3.25f,3.25f,3.25f,3.25f,2.95f,2.66f }, },
|
|
{ { 5.50f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 3.25f,3.25f,3.25f,3.25f,2.95f,2.66f }, },
|
|
{ { 5.50f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 3.25f,3.25f,3.25f,3.25f,2.95f,2.66f }, },
|
|
{ { 6.00f,6.00f,6.00f,6.00f,6.00f,5.50f }, { 3.50f,3.50f,3.50f,3.50f,3.25f,3.00f }, },
|
|
{ { 6.50f,6.50f,6.50f,6.50f,6.00f,5.50f }, { 4.00f,4.00f,4.00f,4.00f,3.75f,3.25f }, },
|
|
{ { 6.50f,6.50f,6.50f,6.50f,6.50f,5.50f }, { 4.00f,4.00f,4.00f,4.00f,3.75f,3.25f }, },
|
|
{ { 5.50f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 3.25f,3.25f,3.25f,3.25f,2.95f,2.66f }, },
|
|
};
|
|
|
|
static float band_mantissa_decay[11][2][6] =
|
|
{
|
|
{ { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.11f,0.10f,0.09f,0.08f,0.08f,0.07f }, },
|
|
{ { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.11f,0.10f,0.09f,0.08f,0.08f,0.07f }, },
|
|
{ { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.11f,0.10f,0.09f,0.08f,0.08f,0.07f }, },
|
|
{ { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.11f,0.10f,0.09f,0.08f,0.08f,0.07f }, },
|
|
{ { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.11f,0.10f,0.09f,0.08f,0.08f,0.07f }, },
|
|
{ { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.11f,0.10f,0.09f,0.08f,0.08f,0.07f }, },
|
|
{ { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.11f,0.10f,0.09f,0.08f,0.08f,0.07f }, },
|
|
{ { 0.19f,0.18f,0.17f,0.16f,0.15f,0.14f }, { 0.12f,0.11f,0.10f,0.09f,0.09f,0.08f }, },
|
|
{ { 0.21f,0.20f,0.19f,0.18f,0.16f,0.15f }, { 0.13f,0.12f,0.10f,0.11f,0.10f,0.09f }, },
|
|
{ { 0.21f,0.20f,0.19f,0.18f,0.16f,0.15f }, { 0.13f,0.12f,0.10f,0.11f,0.10f,0.09f }, },
|
|
{ { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.11f,0.10f,0.09f,0.08f,0.08f,0.07f }, },
|
|
};
|
|
|
|
static float band_mantissa_base_improved_short_dc[11][2][6] =
|
|
{
|
|
{ { 5.50f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 5.25f,3.25f,3.25f,3.25f,2.95f,2.66f }, },
|
|
{ { 5.50f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 5.25f,3.25f,3.25f,3.25f,2.95f,2.66f }, },
|
|
{ { 5.50f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 5.25f,3.25f,3.25f,3.25f,2.95f,2.66f }, },
|
|
{ { 5.50f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 5.25f,3.25f,3.25f,3.25f,2.95f,2.66f }, },
|
|
{ { 5.50f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 5.25f,3.25f,3.25f,3.25f,2.95f,2.66f }, },
|
|
{ { 5.50f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 4.50f,3.25f,3.25f,3.25f,2.95f,2.66f }, },
|
|
{ { 5.75f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 5.00f,3.25f,3.25f,3.25f,2.95f,2.66f }, },
|
|
{ { 6.00f,6.00f,6.00f,6.00f,6.00f,5.50f }, { 5.50f,3.50f,3.50f,3.50f,3.25f,3.00f }, },
|
|
{ { 6.50f,6.50f,6.50f,6.50f,6.00f,5.50f }, { 6.00f,4.00f,4.00f,4.00f,3.75f,3.25f }, },
|
|
{ { 7.00f,6.50f,6.50f,6.50f,6.50f,5.50f }, { 6.50f,4.00f,4.00f,4.00f,3.75f,3.25f }, },
|
|
{ { 6.50f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 7.50f,3.25f,3.25f,3.25f,2.95f,2.66f }, },
|
|
};
|
|
|
|
static float band_mantissa_decay_improved_short_dc[11][2][6] =
|
|
{
|
|
{ { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.11f,0.10f,0.09f,0.08f,0.08f,0.07f }, },
|
|
{ { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.11f,0.10f,0.09f,0.08f,0.08f,0.07f }, },
|
|
{ { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.11f,0.10f,0.09f,0.08f,0.08f,0.07f }, },
|
|
{ { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.11f,0.10f,0.09f,0.08f,0.08f,0.07f }, },
|
|
{ { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.11f,0.10f,0.09f,0.08f,0.08f,0.07f }, },
|
|
{ { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.13f,0.10f,0.09f,0.08f,0.08f,0.07f }, },
|
|
{ { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.15f,0.10f,0.09f,0.08f,0.08f,0.07f }, },
|
|
{ { 0.19f,0.18f,0.17f,0.16f,0.15f,0.14f }, { 0.17f,0.11f,0.10f,0.09f,0.09f,0.08f }, },
|
|
{ { 0.21f,0.20f,0.19f,0.18f,0.16f,0.15f }, { 0.19f,0.12f,0.10f,0.11f,0.10f,0.09f }, },
|
|
{ { 0.22f,0.20f,0.19f,0.18f,0.16f,0.15f }, { 0.21f,0.12f,0.10f,0.11f,0.10f,0.09f }, },
|
|
{ { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.11f,0.10f,0.09f,0.08f,0.08f,0.07f }, },
|
|
};
|
|
|
|
|
|
#define SUBBAND_PULSES_SCALE 1.0 // SUBBAND_SCALE
|
|
static float subband_pulses_for_band[4][10][24] =
|
|
{
|
|
{
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f },
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f },
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f },
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f },
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f },
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f },
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 14.3f,13.8f,13.2f,12.8f,7.50f,6.75f,6.00f,4.50f,3.80f }, // 6
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 14.3f,13.8f,13.2f,12.8f,7.50f,6.75f,6.00f,4.50f,3.80f }, // 7
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 19.0f,18.4f,17.6f,17.0f,10.0f,9.00f,8.00f,6.00f,5.00f }, // 8
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 23.8f,23.0f,22.0f,21.3f,12.5f,11.3f,10.0f,7.50f,6.25f }, // 9
|
|
},
|
|
{
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f },
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f },
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f },
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f },
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f },
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f },
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 14.3f,13.8f,13.2f,12.8f,7.50f,6.75f,6.00f,4.50f,3.80f }, // 6
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 14.3f,13.8f,13.2f,12.8f,7.50f,6.75f,6.00f,4.50f,3.80f }, // 7
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 19.0f,18.4f,17.6f,17.0f,10.0f,9.00f,8.00f,6.00f,5.00f }, // 8
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 23.8f,23.0f,22.0f,21.3f,12.5f,11.3f,10.0f,7.50f,6.25f }, // 9
|
|
},
|
|
{
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f,2.50f,2.50f },
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f,2.50f,2.50f },
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f,2.50f,2.50f },
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f,2.50f,2.50f },
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f,2.50f,2.50f },
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f,2.50f,2.50f },
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 14.3f,13.8f,13.2f,12.8f,7.50f,6.75f,6.00f,4.50f,3.80f,2.50f,2.50f }, // 6
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 14.3f,13.8f,13.2f,12.8f,7.50f,6.75f,6.00f,4.50f,3.80f,3.50f,3.50f }, // 7
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 19.0f,18.4f,17.6f,17.0f,10.0f,9.00f,8.00f,6.00f,5.00f,3.50f,3.50f }, // 8
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 23.8f,23.0f,22.0f,21.3f,12.5f,11.3f,10.0f,7.50f,6.25f,4.50f,4.50f }, // 9
|
|
},
|
|
{
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f,2.50f,2.50f },
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f,2.50f,2.50f },
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f,2.50f,2.50f },
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f,2.50f,2.50f },
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f,2.50f,2.50f },
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f,2.50f,2.50f },
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 14.3f,13.8f,13.2f,12.8f,7.50f,6.75f,6.00f,4.50f,3.80f,2.50f,2.50f }, // 6
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 14.3f,13.8f,13.2f,12.8f,7.50f,6.75f,6.00f,4.50f,3.80f,3.50f,3.50f }, // 7
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 19.0f,18.4f,17.6f,17.0f,10.0f,9.00f,8.00f,6.00f,5.00f,3.50f,3.50f }, // 8
|
|
{ 0,0,0,0, 0,0,0,0, 0,0,0,0, 23.8f,23.0f,22.0f,21.3f,12.5f,11.3f,10.0f,7.50f,6.25f,4.50f,4.50f }, // 9
|
|
}
|
|
};
|
|
|
|
static U16 header_size_bias[10] =
|
|
{
|
|
0,0,0,0,0, 90,140,250,500,1000
|
|
};
|
|
|
|
static float pulse_quality_table[4][10] =
|
|
{
|
|
{ 0.03f,0.05f,0.08f,0.1500f,0.1650f, 0.1908f, 0.2131f, 0.2559f, 0.3383f, 0.5489f },
|
|
{ 0.03f,0.05f,0.08f,0.1500f,0.1650f, 0.1910f, 0.2124f, 0.2541f, 0.3405f, 0.5622f },
|
|
{ 0.03f,0.05f,0.08f,0.1500f,0.1650f, 0.1935f, 0.2075f, 0.2415f, 0.3104f, 0.6206f },
|
|
{ 0.03f,0.05f,0.08f,0.1500f,0.1650f, 0.1998f, 0.2228f, 0.2696f, 0.3841f, 0.7721f },
|
|
};
|
|
|
|
#ifdef RADAUDIO_DEVELOPMENT
|
|
static void radaudio_load_heuristics_from_environment_variables(radaudio_encoder_state *es, radaudio_stream_header_unpacked *h)
|
|
{
|
|
char *env;
|
|
|
|
int qmode = es->quality_mode;
|
|
int rate_mode = es->samprate_mode;
|
|
RR_UNUSED_VARIABLE(qmode);
|
|
RR_UNUSED_VARIABLE(rate_mode);
|
|
|
|
int side_exp_spacing = es->heur.side_exp_start2 - es->heur.side_exp_threshold;
|
|
int side_exp_deadzone = es->heur.side_exp_end - es->heur.side_exp_start;
|
|
|
|
env = getenv("SIDE_EXP_START" ); if (env) es->heur.side_exp_start = atoi(env);
|
|
env = getenv("SIDE_EXP_THRESHOLD"); if (env) es->heur.side_exp_threshold = atoi(env);
|
|
env = getenv("SIDE_EXP_DEADZONE" ); if (env) side_exp_deadzone = atoi(env);
|
|
env = getenv("SIDE_EXP_SPACING" ); if (env) side_exp_spacing = atoi(env);
|
|
|
|
es->heur.side_exp_start2 = es->heur.side_exp_threshold + side_exp_spacing;
|
|
es->heur.side_exp_end = es->heur.side_exp_start + side_exp_deadzone;
|
|
|
|
env = getenv("MID_SIDE_TINY" ); if (env) es->heur.mid_side_tiny = atoi(env);
|
|
env = getenv("MID_SIDE_OFFSET" ); if (env) es->heur.mid_side_offset = atoi(env);
|
|
env = getenv("MID_SIDE_THRESHOLD" ); if (env) es->heur.mid_side_threshold = atoi(env);
|
|
env = getenv("MID_SIDE_MAX_BAD_BANDS"); if (env) es->heur.mid_side_max_bad_bands = atoi(env);
|
|
|
|
env = getenv("EXPECTATION_SCALE" ); if (env) es->heur.expectation_scale = strtof(env,NULL);
|
|
env = getenv("EXPECTATION_BASE" ); if (env) es->heur.expectation_base = strtof(env,NULL);
|
|
env = getenv("SHORT_OVERLAP_SCALE1" ); if (env) es->heur.short_overlap_scale1 = strtof(env,NULL);
|
|
env = getenv("SHORT_OVERLAP_SCALE2" ); if (env) es->heur.short_overlap_scale2 = strtof(env,NULL);
|
|
|
|
env = getenv("BAND_EXPONENT_BASE0" ); if (env) es->heur.band_exponent_base [0] = strtof(env,NULL);
|
|
env = getenv("BAND_EXPONENT_BASE1" ); if (env) es->heur.band_exponent_base [1] = strtof(env,NULL);
|
|
env = getenv("BAND_COUNT_EXPONENT0" ); if (env) es->heur.band_count_exponent [0] = strtof(env,NULL);
|
|
env = getenv("BAND_COUNT_EXPONENT1" ); if (env) es->heur.band_count_exponent [1] = strtof(env,NULL);
|
|
env = getenv("QUALITY_WEIGHT_LOW0" ); if (env) es->heur.quality_weight_low [0] = strtof(env,NULL);
|
|
env = getenv("QUALITY_WEIGHT_LOW1" ); if (env) es->heur.quality_weight_low [1] = strtof(env,NULL);
|
|
env = getenv("EXTRA_THRESHOLD_BIG0" ); if (env) es->heur.large_boost_median_test[0] = strtof(env,NULL);
|
|
env = getenv("EXTRA_THRESHOLD_BIG1" ); if (env) es->heur.large_boost_median_test[1] = strtof(env,NULL);
|
|
env = getenv("EXTRA_THRESHOLD_SMALL0" ); if (env) es->heur.small_boost_median_test[0] = strtof(env,NULL);
|
|
env = getenv("EXTRA_THRESHOLD_SMALL1" ); if (env) es->heur.small_boost_median_test[1] = strtof(env,NULL);
|
|
|
|
env = getenv("SHORT_BLOCK_PULSES" ); if (env) es->heur.short_block_pulse_scale = strtof(env,NULL);
|
|
#if 0
|
|
env = getenv("BAND_MASK_8" ); if (env) es->heur.band_mask_8 [0] = strtof(env,NULL);
|
|
env = getenv("BAND_MASK_4" ); if (env) es->heur.band_mask_4 [0] = strtof(env,NULL);
|
|
env = getenv("BAND_MASK_2" ); if (env) es->heur.band_mask_2 [0] = strtof(env,NULL);
|
|
env = getenv("BAND_MASK_1" ); if (env) es->heur.band_mask_1 [0] = strtof(env,NULL);
|
|
#endif
|
|
|
|
float mantissa_scale[2] = { 1.00,1.00f };
|
|
env = getenv("MANTISSA_SCALE_LONG" ); if (env) mantissa_scale[0] = strtof(env,NULL);
|
|
env = getenv("MANTISSA_SCALE_SHORT" ); if (env) mantissa_scale[1] = strtof(env,NULL);
|
|
|
|
float band_mantissa_base0 = 0, band_mantissa_decay0 = 0;
|
|
int band_mantissa_slot=-1;
|
|
|
|
env = getenv("BAND_MANTISSA_BASE0_I" ); if (env) band_mantissa_base0 = strtof(env,NULL);
|
|
env = getenv("BAND_MANTISSA_DECAY0_I"); if (env) band_mantissa_decay0 = strtof(env,NULL);
|
|
env = getenv("BAND_MANTISSA_I" ); if (env) band_mantissa_slot = atoi(env);
|
|
|
|
for (int i=0; i < radaudio_rateinfo[0][rate_mode].num_bands; ++i) {
|
|
F32 base0 = band_mantissa_base [qmode][0][i/4];
|
|
F32 base1 = band_mantissa_base [qmode][1][i/4];
|
|
F32 decay0 = band_mantissa_decay[qmode][0][i/4];
|
|
F32 decay1 = band_mantissa_decay[qmode][1][i/4];
|
|
if (i/4 == band_mantissa_slot) {
|
|
base0 = band_mantissa_base0;
|
|
decay0 = band_mantissa_decay0;
|
|
}
|
|
base0 *= mantissa_scale[0];
|
|
base1 *= mantissa_scale[1];
|
|
decay0 *= mantissa_scale[0];
|
|
decay1 *= mantissa_scale[1];
|
|
|
|
h->mantissa_param[0][i][0] = (S8) ( base0 * 8 + 0.5);
|
|
h->mantissa_param[1][i][0] = (S8) ( base1 * 8 + 0.5);
|
|
h->mantissa_param[0][i][1] = (S8) (decay0 * 256 + 0.5);
|
|
h->mantissa_param[1][i][1] = (S8) (decay1 * 256 + 0.5);
|
|
}
|
|
|
|
float pulse_value_lo = 5.0;
|
|
float pulse_value_hi = 5.0;
|
|
int pulse_range_lo = -1;
|
|
int pulse_range_hi = -1;
|
|
|
|
env = getenv("PULSE_VALUE_LO" ); if (env) pulse_value_lo = strtof(env,NULL);
|
|
pulse_value_hi = pulse_value_lo;
|
|
env = getenv("PULSE_VALUE_HI" ); if (env) pulse_value_hi = strtof(env,NULL);
|
|
env = getenv("PULSE_RANGE_LO" ); if (env) pulse_range_lo = atoi(env);
|
|
env = getenv("PULSE_RANGE_HI" ); if (env) pulse_range_hi = atoi(env);
|
|
|
|
float subband_scale = 1.0f;
|
|
env = getenv("SUBBAND_SCALE" ); if (env) subband_scale = strtof(env,NULL);
|
|
for (int i=0; i < radaudio_rateinfo[0][rate_mode].num_bands; ++i) {
|
|
float pulses_per = subband_pulses_for_band[rate_mode][qmode][i] * subband_scale;
|
|
if (i >= pulse_range_lo && i <= pulse_range_hi) {
|
|
pulses_per = linear_remap((float) i, (float) pulse_range_lo, (float) pulse_range_hi, (float) pulse_value_lo, (float) pulse_value_hi);
|
|
}
|
|
int predicted_subband_sum = (int) (pulses_per * radaudio_rateinfo[0][rate_mode].num_subbands_for_band[i] + 0.5f);
|
|
h->subband_predicted_sum[i] = (U8) RR_MIN(255, predicted_subband_sum);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
static radaudio_nonzero_blockmode_descriptor nz_encode[RADAUDIO_NUM_RATES][10][NUM_SELECTOR_MODES] =
|
|
{
|
|
// 48 Khz
|
|
{
|
|
{ { 0 } }, // 0
|
|
{ { 0 } }, // 1
|
|
{ { 0 } }, // 2
|
|
{ { 0 } }, // 3
|
|
{ { 0 } }, // 4
|
|
// bitrate:5
|
|
{
|
|
{ 4, { 0,2,1,1 }, },
|
|
{ 6, { 0,2,1,1,1,1 }, },
|
|
{ 2, { 2,1 }, },
|
|
{ 0, },
|
|
{ 0, }
|
|
},
|
|
// bitrate:6
|
|
{
|
|
{ 6, { 0,2,1,1,1,1 }, },
|
|
{ 4, { 0,2,1,1 }, },
|
|
{ 2, { 2,1 }, },
|
|
{ 0, },
|
|
{ 0, }
|
|
},
|
|
// bitrate:7
|
|
{
|
|
{ 8, { 0,0,2,1,1,1,1,1, }, },
|
|
{ 6, { 0,2,1,1,1,1 }, },
|
|
{ 2, { 5,1 }, },
|
|
{ 0, },
|
|
{ 0, }
|
|
},
|
|
// bitrate:8
|
|
{
|
|
{ 12, { 0,0,0,0, 0,0,2,1, 2,1,2,1 }, { 0,0,0,0, 0,0,0,1, }, },
|
|
{ 11, { 0,0,1,1,1,1, 1,1,1,1,1 }, { 0,0,1,1, 1,1,1 }, },
|
|
{ 4, { 5,5,1,1 }, },
|
|
{ 0, },
|
|
{ 0, }
|
|
},
|
|
// bitrate:9
|
|
{
|
|
{ 12, { 4,4,4,4, 4,4,4,4, 4,1,4,0 }, { 0,0,0,0, 0,0,0,0, 0,1,0,0 } },
|
|
{ 12, { 4,4,5,5, 5,5,5,5, 5,1,5,1 }, { 0,0,0,0, 0,0,0,0, 0,0,0,1 } },
|
|
{ 6, { 5,5,5,5, 1,1 }, },
|
|
{ 0, },
|
|
{ 0, }
|
|
},
|
|
},
|
|
|
|
// 44.1 Khz
|
|
{
|
|
{ { 0 } }, // bitrate:0
|
|
{ { 0 } }, // 1
|
|
{ { 0 } }, // 2
|
|
{ { 0 } }, // 3
|
|
{ { 0 } }, // 4
|
|
// bitrate:5
|
|
{
|
|
{ 4, { 0,2,1,1 }, },
|
|
{ 2, { 2,1 }, },
|
|
{ 1, { 1 }, },
|
|
{ 0, },
|
|
{ 0, }
|
|
},
|
|
// bitrate:6
|
|
{
|
|
{ 5, { 0,2,1,1,1 }, },
|
|
{ 3, { 0,2,1, }, },
|
|
{ 2, { 5,1 }, },
|
|
{ 0, },
|
|
{ 0, }
|
|
},
|
|
// bitrate:7
|
|
{
|
|
{ 7, { 0,0,2,2,1,1,1 }, },
|
|
{ 6, { 0,2,1,1,1,1 }, },
|
|
{ 2, { 5,1 }, },
|
|
{ 0, },
|
|
{ 0, }
|
|
},
|
|
// bitrate:8
|
|
{
|
|
{ 12, { 4,0,0,0, 2,2,2,3, 2,2,2,2, }, },
|
|
{ 11, { 4,0,5,5, 5,1,1,1, 1,1,1 }, },
|
|
{ 5, { 5,5,1,1, 1 }, },
|
|
{ 0, },
|
|
{ 0, }
|
|
},
|
|
// bitrate:9
|
|
{
|
|
{ 12, { 3,3,4,4,4,4,4,0,4,0,5,4 }, },
|
|
{ 12, { 3,4,5,5,5,5,5,5,5,5,5,5 }, },
|
|
{ 6, { 5,5,5,5,5,1 }, },
|
|
{ 0, },
|
|
{ 0, }
|
|
},
|
|
},
|
|
|
|
// 32 Khz
|
|
{
|
|
{ { 0 } }, // bitrate:0
|
|
{ { 0 } }, // 1
|
|
{ { 0 } }, // 2
|
|
{ { 0 } }, // 3
|
|
{ { 0 } }, // 4
|
|
// bitrate:5
|
|
{
|
|
{ 6, { 0,2,2,1,1,1 }, },
|
|
{ 5, { 0,2,1,1,1,1 }, },
|
|
{ 2, { 5,1 }, },
|
|
{ 0, },
|
|
{ 0, }
|
|
},
|
|
// bitrate:6
|
|
{
|
|
{ 8, { 0,0,2,1,1,1,1,1 }, },
|
|
{ 6, { 0,2,1,1,1,1 }, },
|
|
{ 2, { 5,1 }, },
|
|
{ 0, },
|
|
{ 0, }
|
|
},
|
|
// bitrate:7
|
|
{
|
|
{ 12, { 0,0,2,2, 2,1,1,1, 1,1,1,1 }, },
|
|
{ 10, { 0,2,1,1, 1,1,1,1, 1,1 }, { 0,0,1 } },
|
|
{ 5, { 0,1,1,1, 1 }, },
|
|
{ 0, },
|
|
{ 0, }
|
|
},
|
|
// bitrate:8
|
|
{
|
|
{ 12, { 4,0,0,0, 0,0,0,2, 0,2,2,5 }, },
|
|
{ 12, { 0,0,1,1, 1,1,1,1, 1,1,1,1 }, { 0,0,1,1,1,1,1 } },
|
|
{ 3, { 5,5,1, }, },
|
|
{ 0, },
|
|
{ 0, }
|
|
},
|
|
// bitrate:9
|
|
{
|
|
{ 12, { 3,3,4,4,4,4,4,4,4,4,4,4 }, },
|
|
{ 12, { 3,4,5,5,5,5,5,5,5,5,5,5 }, },
|
|
{ 6, { 4,5,5,5,5,5 }, },
|
|
{ 0, },
|
|
{ 0, }
|
|
},
|
|
},
|
|
|
|
// 24 Khz
|
|
{
|
|
{ { 0 } }, // bitrate:0
|
|
{ { 0 } }, // 1
|
|
{ { 0 } }, // 2
|
|
{ { 0 } }, // 3
|
|
{ { 0 } }, // 4
|
|
// bitrate:5
|
|
{
|
|
{ 9, { 0,0,1,1, 1,1,1,1, 1 }, { 0,0,1 } },
|
|
{ 6, { 0,2,1,1,1,1 }, },
|
|
{ 2, { 5,1 }, },
|
|
{ 0, },
|
|
{ 0, }
|
|
},
|
|
// bitrate:6
|
|
{
|
|
{ 12, { 0,0,1,1, 1,1,1,1, 1,1,1,1 }, { 0,0,1 } },
|
|
{ 12, { 0,0,2,2, 2,1,1,1, 1,1,1,1 }, },
|
|
{ 7, { 0,2,1,1, 1,1,1 }, },
|
|
{ 0, },
|
|
{ 0, }
|
|
},
|
|
// bitrate:7
|
|
{
|
|
{ 12, { 0,0,5,5, 5,5,1,1, 1,1,1,1 }, },
|
|
{ 12, { 0,0,0,0, 0,2,2,2, 2,1,2,2 }, { 0,0,0,0, 0,0,0,0, 0,1,0,0 } },
|
|
{ 5, { 5,5,1,1, 1 }, },
|
|
{ 0, },
|
|
{ 0, }
|
|
},
|
|
// bitrate:8
|
|
{
|
|
{ 12, { 4,4,4,4, 4,4,4,4, 0,4,4,5 }, },
|
|
{ 12, { 4,4,5,5, 5,5,5,5, 5,5,5,5 }, },
|
|
{ 7, { 5,5,1,1, 1 }, },
|
|
{ 0, },
|
|
{ 0, }
|
|
},
|
|
// bitrate:9
|
|
{
|
|
{ 12, { 3,3,3,3,3,4,4,3,3,4,3,3 }, },
|
|
{ 12, { 3,3,5,5,5,5,5,5,5,5,5,5 }, },
|
|
{ 12, { 5,5,5,5,5,5,5,5,5,5,5,5 }, },
|
|
{ 0, },
|
|
{ 0, }
|
|
},
|
|
},
|
|
};
|
|
|
|
static U8 nz_mode_correlated_selectors_pair[4][10][NUM_SELECTOR_MODES] =
|
|
{
|
|
{
|
|
{ 0,0,2,2,0,},
|
|
{ 0,0,2,2,0,},
|
|
{ 0,0,2,2,0,},
|
|
{ 0,0,2,2,0,},
|
|
{ 0,0,2,2,0,},
|
|
{ 0,0,2,2,0,},
|
|
{ 0,2,1,2,0,},
|
|
{ 3,2,1,1,3,},
|
|
{ 1,1,1,1,1,},
|
|
{ 1,1,1,3,1,},
|
|
},{
|
|
{ 0,2,2,2,0,},
|
|
{ 0,2,2,2,0,},
|
|
{ 0,2,2,2,0,},
|
|
{ 0,2,2,2,0,},
|
|
{ 0,2,2,2,0,},
|
|
{ 0,2,2,2,0,},
|
|
{ 0,2,2,1,0,},
|
|
{ 3,2,1,1,3,},
|
|
{ 1,1,1,1,1,},
|
|
{ 1,1,1,3,1,},
|
|
},{
|
|
{ 0,2,1,1,0,},
|
|
{ 0,2,1,1,0,},
|
|
{ 0,2,1,1,0,},
|
|
{ 0,2,1,1,0,},
|
|
{ 0,2,1,1,0,},
|
|
{ 0,2,1,1,0,},
|
|
{ 0,2,1,1,0,},
|
|
{ 3,2,1,1,3,},
|
|
{ 1,1,1,1,1,},
|
|
{ 1,1,1,3,1,},
|
|
},{
|
|
{ 3,1,1,1,3,},
|
|
{ 3,1,1,1,3,},
|
|
{ 3,1,1,1,3,},
|
|
{ 3,1,1,1,3,},
|
|
{ 3,1,1,1,3,},
|
|
{ 3,1,1,1,3,},
|
|
{ 2,3,1,1,3,},
|
|
{ 1,1,1,1,1,},
|
|
{ 1,1,1,1,1,},
|
|
{ 1,1,1,1,1,},
|
|
}
|
|
};
|
|
|
|
static U8 nz_mode_correlated_selectors_big[4][10][NUM_SELECTOR_MODES] =
|
|
{
|
|
{
|
|
{ 0,0,0,1,0,},
|
|
{ 0,0,0,1,0,},
|
|
{ 0,0,0,1,0,},
|
|
{ 0,0,0,1,0,},
|
|
{ 0,0,0,1,0,},
|
|
{ 0,0,0,1,0,},
|
|
{ 0,0,1,1,0,},
|
|
{ 0,1,1,2,0,},
|
|
{ 0,1,1,2,1,},
|
|
{ 1,2,2,2,1,},
|
|
},{
|
|
{ 0,0,1,1,0,},
|
|
{ 0,0,1,1,0,},
|
|
{ 0,0,1,1,0,},
|
|
{ 0,0,1,1,0,},
|
|
{ 0,0,1,1,0,},
|
|
{ 0,0,1,1,0,},
|
|
{ 0,0,1,1,0,},
|
|
{ 0,1,1,2,0,},
|
|
{ 0,1,2,2,1,},
|
|
{ 2,2,2,3,2,},
|
|
},{
|
|
{ 0,1,2,2,0,},
|
|
{ 0,1,2,2,0,},
|
|
{ 0,1,2,2,0,},
|
|
{ 0,1,2,2,0,},
|
|
{ 0,1,2,2,0,},
|
|
{ 0,1,2,2,0,},
|
|
{ 0,1,2,2,0,},
|
|
{ 0,1,2,2,0,},
|
|
{ 1,2,2,2,1,},
|
|
{ 2,3,3,3,2,},
|
|
},{
|
|
{ 1,2,2,2,1,},
|
|
{ 1,2,2,2,1,},
|
|
{ 1,2,2,2,1,},
|
|
{ 1,2,2,2,1,},
|
|
{ 1,2,2,2,1,},
|
|
{ 1,2,2,2,1,},
|
|
{ 1,1,2,2,1,},
|
|
{ 2,0,2,2,1,},
|
|
{ 1,2,2,3,1,},
|
|
{ 3,3,3,3,3,},
|
|
}
|
|
};
|
|
|
|
static void set_nz_desc(radaudio_nonzero_blockmode_descriptor nz_desc[NUM_NZ_MODE], U8 nz_correlated_huffman_selectors[NUM_NZ_SELECTOR][NUM_SELECTOR_MODES], int ratemode, int quality_mode)
|
|
{
|
|
// quality under 5 is untuned, so just use 5
|
|
if (quality_mode < 5)
|
|
quality_mode = 5;
|
|
memcpy(nz_desc, nz_encode[ratemode][quality_mode], sizeof(nz_desc[0]) * NUM_NZ_MODE);
|
|
memset(nz_correlated_huffman_selectors, 0, NUM_NZ_SELECTOR*NUM_SELECTOR_MODES);
|
|
memcpy(nz_correlated_huffman_selectors[HS_COEFF_PAIR], nz_mode_correlated_selectors_pair[ratemode][quality_mode], NUM_SELECTOR_MODES);
|
|
memcpy(nz_correlated_huffman_selectors[HS_COEFF_BIG ], nz_mode_correlated_selectors_big [ratemode][quality_mode], NUM_SELECTOR_MODES);
|
|
}
|
|
|
|
// returns 1 on success, or 0 if inputs are invalid or internal error
|
|
size_t radaudio_encode_create_internal(radaudio_encoder *rae,
|
|
U8 header[RADAUDIO_STREAM_HEADER_MAX],
|
|
int num_channels, // 1..2
|
|
int sample_rate, // in HZ
|
|
int qmode, // 0..9
|
|
float quality_pulse,
|
|
U32 flags) // used for ratesearch during development
|
|
{
|
|
radaudio_encoder_state *es = (radaudio_encoder_state *) rae;
|
|
int i, rate_mode;
|
|
size_t pack_length, unpack_length;
|
|
|
|
if (qmode > 9)
|
|
qmode = 9;
|
|
if (qmode < 0)
|
|
qmode = 0;
|
|
|
|
// need to know samprate mode before we can fill full header, so do a first conversion
|
|
|
|
rate_mode = radaudio_code_sample_rate(sample_rate);
|
|
if (rate_mode < 0)
|
|
return 0;
|
|
|
|
memset(es, 0, sizeof(*es));
|
|
|
|
es->quality_mode = (U8) qmode;
|
|
es->cpu = cpu_detect();
|
|
|
|
es->num_channels = num_channels;
|
|
es->sample_rate = sample_rate;
|
|
es->samprate_mode = rate_mode;
|
|
es->allow_mid_side = true;
|
|
|
|
if (quality_pulse == 0)
|
|
es->heur.pulse_quality = pulse_quality_table[es->samprate_mode][es->quality_mode];
|
|
else
|
|
es->heur.pulse_quality = quality_pulse / 100.0f;
|
|
|
|
for (i=0; i < 2; ++i) {
|
|
es->heur.band_exponent_base [i] = band_exponent_base [rate_mode][i][qmode];
|
|
es->heur.band_count_exponent[i] = band_count_exponent[rate_mode][i][qmode];
|
|
es->heur.quality_weight_low [i] = quality_weight_low [rate_mode][i][qmode];
|
|
es->heur.large_boost_median_test[i] = extra_threshold_big [rate_mode][i][qmode];
|
|
es->heur.small_boost_median_test[i] = extra_threshold_small[rate_mode][i][qmode];
|
|
}
|
|
|
|
es->heur.short_block_pulse_scale = short_block_pulses[rate_mode][qmode];
|
|
|
|
es->heur.side_exp_threshold_all = 3;
|
|
es->heur.side_exp_start2_all = 6;
|
|
es->heur.side_exp_threshold = 2;
|
|
es->heur.side_exp_end_all = -13;
|
|
es->heur.side_exp_start_all = -15;
|
|
es->heur.side_exp_start = -15;
|
|
es->heur.side_exp_start2 = 4;
|
|
es->heur.side_exp_end = -13;
|
|
|
|
es->heur.mid_side_tiny = -15;
|
|
es->heur.mid_side_offset = - 4;
|
|
es->heur.mid_side_threshold = -16;
|
|
es->heur.mid_side_max_bad_bands = 6;
|
|
|
|
es->heur.expectation_base = -16;
|
|
es->heur.expectation_scale = 0.195f;
|
|
es->heur.short_overlap_scale1 = 1.0f;
|
|
es->heur.short_overlap_scale2 = 1.0f;
|
|
|
|
set_nz_desc(es->nz_desc, es->nz_correlated_huffman_selectors, rate_mode, qmode);
|
|
|
|
radaudio_stream_header_unpacked h;
|
|
memset(&h, 0, sizeof(h));
|
|
|
|
h.num_channels = num_channels;
|
|
h.sample_rate = sample_rate;
|
|
h.version = ENCODER_VERSION;
|
|
h.bytes_bias = header_size_bias[qmode];
|
|
compute_bias_set(&es->biases, h.bytes_bias);
|
|
|
|
for (i=0; i < radaudio_rateinfo[0][rate_mode].num_bands; ++i) {
|
|
F32 base0,base1,decay0,decay1;
|
|
if (flags & RADAUDIO_ENC_FLAG_improve_seamless_loop) {
|
|
base0 = band_mantissa_base_improved_short_dc [qmode][0][i/4];
|
|
base1 = band_mantissa_base_improved_short_dc [qmode][1][i/4];
|
|
decay0 = band_mantissa_decay_improved_short_dc[qmode][0][i/4];
|
|
decay1 = band_mantissa_decay_improved_short_dc[qmode][1][i/4];
|
|
} else {
|
|
base0 = band_mantissa_base [qmode][0][i/4];
|
|
base1 = band_mantissa_base [qmode][1][i/4];
|
|
decay0 = band_mantissa_decay[qmode][0][i/4];
|
|
decay1 = band_mantissa_decay[qmode][1][i/4];
|
|
}
|
|
|
|
h.mantissa_param[0][i][0] = (S8) ( base0 * 8 + 0.5);
|
|
h.mantissa_param[1][i][0] = (S8) ( base1 * 8 + 0.5);
|
|
h.mantissa_param[0][i][1] = (S8) (decay0 * 256 + 0.5);
|
|
h.mantissa_param[1][i][1] = (S8) (decay1 * 256 + 0.5);
|
|
}
|
|
|
|
for (i=0; i < radaudio_rateinfo[0][rate_mode].num_bands; ++i) {
|
|
float pulses_per = subband_pulses_for_band[rate_mode][qmode][i];
|
|
int predicted_subband_sum = (int) (pulses_per * radaudio_rateinfo[0][rate_mode].num_subbands_for_band[i] + 0.5f);
|
|
h.subband_predicted_sum[i] = (U8) RR_MIN(255, predicted_subband_sum);
|
|
}
|
|
|
|
for (i=0; i < NUM_NZ_MODE; ++i) {
|
|
h.nzmode_num64[i] = es->nz_desc[i].num_8byte_chunks;
|
|
for (int j=0; j < MAX_NZ_BLOCKS; ++j)
|
|
h.nzmode_huff[i][j] = es->nz_desc[i].huffman_table_for_chunk[j] | (es->nz_desc[i].invert_chunk[j] ? NZ_MODE_INVERT : 0);
|
|
}
|
|
for (int j=0; j < NUM_NZ_SELECTOR; ++j)
|
|
for (i=0; i < NUM_SELECTOR_MODES; ++i)
|
|
h.nzmode_selectors[j][i] = es->nz_correlated_huffman_selectors[j][i];
|
|
|
|
#ifdef RADAUDIO_DEVELOPMENT
|
|
radaudio_load_heuristics_from_environment_variables(es, &h);
|
|
#endif
|
|
|
|
pack_length = radaudio_pack_stream_header(header, &h);
|
|
if (pack_length == 0)
|
|
return 0;
|
|
|
|
unpack_length = radaudio_unpack_stream_header(header, RADAUDIO_STREAM_HEADER_MAX, &h);
|
|
if (unpack_length != pack_length)
|
|
return 0;
|
|
|
|
|
|
memcpy(es->subband_predicted_sum, h.subband_predicted_sum, 24);
|
|
memcpy(es->mantissa_param , h.mantissa_param , sizeof(es->mantissa_param ));
|
|
memcpy(es->subband_bias , h.subband_bias , 24*sizeof(es->subband_bias[0]));
|
|
|
|
for (i=0; i < 2; ++i)
|
|
es->info[i] = &radaudio_rateinfo[i][es->samprate_mode];
|
|
|
|
radaudio_init_nz_desc(es->nz_desc);
|
|
|
|
return unpack_length;
|
|
}
|
|
|
|
size_t radaudio_encode_create(radaudio_encoder *es, U8 header[RADAUDIO_STREAM_HEADER_MAX], int num_channels, int sample_rate, int quality, U32 flags)
|
|
{
|
|
return radaudio_encode_create_internal(es, header, num_channels, sample_rate, quality, 0.0f, flags);
|
|
}
|
|
|
|
#ifdef RADAUDIO_DEVELOPMENT
|
|
// internal use
|
|
int RadAudioCompressGetProfileData(radaudio_encoder *hradaud, radaudio_eprofile_value *profile, int num_profile)
|
|
{
|
|
radaudio_encoder_state *es = (radaudio_encoder_state *) hradaud;
|
|
int n = RR_MIN(num_profile, PROF_total_count);
|
|
static const char *names[] = {
|
|
#define PROF(x) #x,
|
|
PROFILE_ZONES()
|
|
#undef PROF
|
|
};
|
|
for (int i=0; i < n; ++i) {
|
|
profile[i].name = names[i];
|
|
profile[i].time = rrTicksToSeconds(es->profile_times[i]);
|
|
}
|
|
return n;
|
|
}
|
|
#else
|
|
int RadAudioCompressGetProfileData(radaudio_encoder *hradaud, radaudio_eprofile_value *profile, int num_profile)
|
|
{
|
|
RR_UNUSED_VARIABLE(hradaud); RR_UNUSED_VARIABLE(profile); RR_UNUSED_VARIABLE(num_profile);
|
|
return 0;
|
|
}
|
|
#endif
|