1348 lines
48 KiB
C++
1348 lines
48 KiB
C++
// Copyright Epic Games Tools, LLC. All Rights Reserved.
|
|
#include "rrCore.h"
|
|
|
|
#include "rada_encode.h"
|
|
#include "rada_file_header.h"
|
|
#include "radaudio_encoder.h"
|
|
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
|
|
#define MAX_STREAMS (RADA_MAX_CHANNELS / 2)
|
|
|
|
namespace {
|
|
|
|
static_assert(sizeof(size_t) == sizeof(uint64_t), "requires 64 bit size_t"); // we cast between these freely
|
|
|
|
static void sanity(bool to_test)
|
|
{
|
|
if (to_test == false)
|
|
*(volatile int*)0 = 0;
|
|
}
|
|
|
|
typedef void* (allocator_fn_type)(uintptr_t bytes);
|
|
typedef void (free_fn_type)(void* ptr);
|
|
|
|
struct UInt64Array
|
|
{
|
|
uint64_t* data;
|
|
size_t count;
|
|
size_t allocated;
|
|
allocator_fn_type* memalloc;
|
|
free_fn_type* memfree;
|
|
void construct(allocator_fn_type* _memalloc, free_fn_type* _memfree) { data = nullptr; count = allocated = 0; memalloc = _memalloc; memfree = _memfree; }
|
|
void destroy() { memfree(data); }
|
|
};
|
|
|
|
static size_t UInt64ArrayLowerBound(UInt64Array* sa, uint64_t search_value)
|
|
{
|
|
uint64_t* first = sa->data;
|
|
size_t count = sa->count;
|
|
while (count > 0)
|
|
{
|
|
uint64_t* it = first;
|
|
size_t step = count / 2;
|
|
it += step;
|
|
if (*it < search_value)
|
|
{
|
|
first = it + 1;
|
|
count -= step + 1;
|
|
}
|
|
else
|
|
count = step;
|
|
}
|
|
return first - sa->data;
|
|
}
|
|
|
|
static void UInt64ArrayMakeFit(UInt64Array* sa, size_t count_to_add)
|
|
{
|
|
if (sa->count + count_to_add <= sa->allocated)
|
|
return;
|
|
|
|
size_t new_allocated = sa->allocated * 2;
|
|
if (new_allocated < count_to_add)
|
|
new_allocated = count_to_add;
|
|
if (new_allocated < 16)
|
|
new_allocated = 16;
|
|
|
|
size_t* new_data = (size_t*)sa->memalloc(sizeof(size_t) * new_allocated);
|
|
memcpy(new_data, sa->data, sizeof(size_t)*sa->count);
|
|
sa->memfree(sa->data);
|
|
sa->data = (uint64_t*)new_data;
|
|
sa->allocated = (uint64_t)new_allocated;
|
|
}
|
|
|
|
static void UInt64ArrayAdd(UInt64Array* sa, size_t to_add)
|
|
{
|
|
UInt64ArrayMakeFit(sa, 1);
|
|
sa->data[sa->count] = to_add;
|
|
sa->count++;
|
|
}
|
|
|
|
static void UInt64ArrayFree(UInt64Array* sa)
|
|
{
|
|
sa->memfree(sa->data);
|
|
sa->data = 0; sa->count = 0; sa->allocated = 0;
|
|
}
|
|
|
|
struct MemBufferEntry
|
|
{
|
|
size_t allocated;
|
|
size_t count;
|
|
MemBufferEntry* next;
|
|
uint8_t bytes[1];
|
|
|
|
size_t extra_needed(size_t request_size) const { return (count + request_size <= allocated) ? 0 : (request_size - (allocated - count)); }
|
|
};
|
|
|
|
struct MemBuffer
|
|
{
|
|
allocator_fn_type* memalloc;
|
|
free_fn_type* memfree;
|
|
MemBufferEntry* head;
|
|
MemBufferEntry* tail;
|
|
size_t total_bytes;
|
|
|
|
void construct(allocator_fn_type* _memalloc, free_fn_type* _memfree) { head = tail = nullptr; total_bytes = 0; memalloc = _memalloc; memfree = _memfree; }
|
|
void destroy()
|
|
{
|
|
MemBufferEntry* entry = head;
|
|
while (entry)
|
|
{
|
|
MemBufferEntry* next = entry->next;
|
|
memfree(entry);
|
|
entry = next;
|
|
}
|
|
total_bytes = 0;
|
|
head = 0;
|
|
tail = 0;
|
|
}
|
|
};
|
|
|
|
static const size_t membuffer_default_buffer_size = 64 << 10;
|
|
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
static MemBufferEntry* MemBufferMakeFit(MemBuffer* mem, size_t amount_needed)
|
|
{
|
|
if (mem->tail == nullptr)
|
|
{
|
|
// first buffer.
|
|
size_t allocate_amount = membuffer_default_buffer_size;
|
|
if (amount_needed > allocate_amount)
|
|
amount_needed = allocate_amount;
|
|
|
|
// allocate the buffer to hold the entire thing.
|
|
MemBufferEntry* entry = (MemBufferEntry*)mem->memalloc(sizeof(MemBufferEntry) + allocate_amount);
|
|
entry->allocated = allocate_amount;
|
|
entry->count = 0;
|
|
entry->next = 0;
|
|
mem->head = entry;
|
|
mem->tail = entry;
|
|
return entry;
|
|
}
|
|
|
|
// buffer exists - does it fit in current allocation?
|
|
MemBufferEntry* existing = mem->tail;
|
|
size_t extra_needed = existing->extra_needed(amount_needed);
|
|
if (extra_needed == 0)
|
|
return existing;
|
|
|
|
// doesn't fit, need another one.
|
|
// make sure the next buffer is bigger than min size.
|
|
if (extra_needed < membuffer_default_buffer_size)
|
|
extra_needed = membuffer_default_buffer_size;
|
|
|
|
MemBufferEntry* new_entry = (MemBufferEntry*)mem->memalloc(sizeof(MemBufferEntry) + extra_needed);
|
|
new_entry->allocated = extra_needed;
|
|
new_entry->count = 0;
|
|
new_entry->next = 0;
|
|
mem->tail->next = new_entry;
|
|
mem->tail = new_entry;
|
|
|
|
// return the first buffer that has space
|
|
if (existing->allocated == existing->count)
|
|
return new_entry; // no space, it goes in the new entry;
|
|
return existing; // at least some bytes can go in the existing.
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
static void MemBufferAdd(MemBuffer* mem, void* data, size_t data_len)
|
|
{
|
|
mem->total_bytes += data_len;
|
|
|
|
MemBufferEntry* dest = MemBufferMakeFit(mem, data_len);
|
|
size_t extra_needed = dest->extra_needed(data_len);
|
|
if (extra_needed == 0)
|
|
{
|
|
// single memcpy
|
|
memcpy(dest->bytes + dest->count, data, data_len);
|
|
dest->count += data_len;
|
|
return;
|
|
}
|
|
|
|
// split
|
|
size_t bytes_first = data_len - extra_needed;
|
|
memcpy(dest->bytes + dest->count, data, bytes_first);
|
|
dest->count += bytes_first;
|
|
dest = dest->next;
|
|
memcpy(dest->bytes, (uint8_t*)data + bytes_first, extra_needed);
|
|
dest->count += extra_needed;
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
static MemBufferEntry* MemBufferCopy(MemBuffer* dest, MemBuffer* source, MemBufferEntry* source_entry, size_t start_offset, size_t byte_count, size_t* end_offset)
|
|
{
|
|
// copy starting at source_entry->data + start_offset.
|
|
while (byte_count)
|
|
{
|
|
size_t available_bytes = source_entry->count - start_offset;
|
|
sanity(available_bytes != 0);
|
|
if (available_bytes == 0)
|
|
return nullptr;
|
|
size_t from_this = byte_count;
|
|
if (available_bytes < from_this)
|
|
from_this = available_bytes;
|
|
|
|
MemBufferAdd(dest, source_entry->bytes + start_offset, from_this);
|
|
|
|
byte_count -= from_this;
|
|
start_offset += from_this;
|
|
|
|
if (start_offset == source_entry->count)
|
|
{
|
|
source_entry = source_entry->next;
|
|
start_offset = 0;
|
|
}
|
|
}
|
|
|
|
*end_offset = start_offset;
|
|
return source_entry;
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
static uint8_t* MemBufferCanCopyDirect(MemBuffer* mem, size_t data_len)
|
|
{
|
|
MemBufferEntry* dest = MemBufferMakeFit(mem, data_len);
|
|
size_t extra_needed = dest->extra_needed(data_len);
|
|
if (extra_needed == 0)
|
|
return dest->bytes + dest->count;
|
|
return nullptr;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
static void MemBufferCommitDirect(MemBuffer* mem, size_t data_len)
|
|
{
|
|
// if we were direct, then by def its the last entry. undefined if commit more than requested...
|
|
MemBufferEntry* dest = mem->tail;
|
|
dest->count += data_len;
|
|
mem->total_bytes += data_len;
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
static uint8_t* MemBufferWriteBuffer(MemBuffer* mem, uint8_t* buffer)
|
|
{
|
|
uint8_t* write_cursor = buffer;
|
|
MemBufferEntry* entry = mem->head;
|
|
while (entry)
|
|
{
|
|
MemBufferEntry* next = entry->next;
|
|
memcpy(write_cursor, entry->bytes, entry->count);
|
|
write_cursor += entry->count;
|
|
entry = next;
|
|
}
|
|
return write_cursor;
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
static bool SeekTableBufferTrim(UInt64Array* samples, UInt64Array* offsets, size_t max_entry_count)
|
|
{
|
|
sanity(offsets->count == samples->count);
|
|
|
|
while (samples->count > max_entry_count)
|
|
{
|
|
// collapse pairs.
|
|
size_t read_index = 0;
|
|
size_t write_index = 0;
|
|
while (read_index < samples->count)
|
|
{
|
|
{ // samples
|
|
uint64_t total_samples = samples->data[read_index];
|
|
if (read_index + 1 < samples->count)
|
|
{
|
|
total_samples += samples->data[read_index + 1];
|
|
}
|
|
|
|
if (total_samples > 65535)
|
|
{
|
|
// we can't fit the offset in the table - fail to trim, this file is too big
|
|
// with the given max seek table size.
|
|
return false;
|
|
}
|
|
|
|
samples->data[write_index] = total_samples;
|
|
}
|
|
{ // offsets
|
|
uint64_t total_offsets = offsets->data[read_index];
|
|
if (read_index + 1 < offsets->count)
|
|
{
|
|
total_offsets += offsets->data[read_index + 1];
|
|
}
|
|
|
|
if (total_offsets > 65535)
|
|
{
|
|
// we can't fit the offset in the table - fail to trim, this file is too big
|
|
// with the given max seek table size.
|
|
return false;
|
|
}
|
|
|
|
offsets->data[write_index] = total_offsets;
|
|
}
|
|
write_index++;
|
|
read_index += 2;
|
|
}
|
|
|
|
samples->count = write_index;
|
|
offsets->count = write_index;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
struct BitContainer
|
|
{
|
|
UInt64Array container;
|
|
size_t total_bit_count;
|
|
size_t working_bits;
|
|
uint32_t working_bit_count;
|
|
|
|
void construct(allocator_fn_type* allocator_fn, free_fn_type* free_fn)
|
|
{
|
|
container.construct(allocator_fn, free_fn);
|
|
working_bits = 0;
|
|
working_bit_count = 0;
|
|
total_bit_count = 0;
|
|
}
|
|
void destroy()
|
|
{
|
|
container.destroy();
|
|
}
|
|
};
|
|
|
|
static uint64_t BitContainerExtract(BitContainer* bit_container, uint64_t bit_position, uint32_t bit_count)
|
|
{
|
|
// extract the bits.
|
|
uint64_t base_bit = bit_position;
|
|
uint64_t base_word = base_bit / 64;
|
|
uint64_t base_offset = base_bit - base_word * 64;
|
|
|
|
sanity(base_word < bit_container->container.count);
|
|
uint64_t output = 0;
|
|
|
|
uint64_t bits_avail = 64 - base_offset;
|
|
|
|
uint64_t bits_from_first = bit_count;
|
|
if (bits_from_first > bits_avail)
|
|
bits_from_first = bits_avail;
|
|
|
|
uint64_t bits_to_clear = 64 - (base_offset + bits_from_first);
|
|
output = (bit_container->container.data[base_word] << bits_to_clear) >> (bits_to_clear + base_offset);
|
|
|
|
if (bits_from_first != bit_count)
|
|
{
|
|
// need some from the next word
|
|
base_word++;
|
|
base_offset = 0;
|
|
bits_to_clear = 64 - (bit_count - bits_from_first);
|
|
|
|
output |= ((bit_container->container.data[base_word] << bits_to_clear) >> bits_to_clear) << bits_from_first;
|
|
}
|
|
return output;
|
|
}
|
|
|
|
static void BitContainerPut(BitContainer* bit_container, uint64_t bits, uint32_t bit_count_to_add)
|
|
{
|
|
bit_container->total_bit_count += bit_count_to_add;
|
|
|
|
// clear any bits we aren't adding.
|
|
uint64_t sanitized_bits = (bits << (64 - bit_count_to_add)) >> (64 - bit_count_to_add);
|
|
|
|
// add the bits in at our position... any bits off the "end" are lost, we handle later.
|
|
bit_container->working_bits |= sanitized_bits << bit_container->working_bit_count;
|
|
|
|
// update the position
|
|
bit_container->working_bit_count += bit_count_to_add;
|
|
|
|
// did we not fit/finish the entry?
|
|
if (bit_container->working_bit_count >= 64)
|
|
{
|
|
// flush the working bits.
|
|
UInt64ArrayAdd(&bit_container->container, bit_container->working_bits);
|
|
|
|
// reset our working state.
|
|
bit_container->working_bit_count -= 64;
|
|
bit_container->working_bits = 0;
|
|
|
|
// if there are some left over, we need to add them to our new working.
|
|
if (bit_container->working_bit_count)
|
|
{
|
|
bit_container->working_bits = sanitized_bits >> (bit_count_to_add - bit_container->working_bit_count);
|
|
}
|
|
}
|
|
}
|
|
|
|
// ensure all working bits are in the container.
|
|
static void BitContainerFlush(BitContainer* bit_container)
|
|
{
|
|
uint32_t bits_remaining_in_working = (64 - bit_container->working_bit_count) & (64 - 1);
|
|
if (bits_remaining_in_working)
|
|
{
|
|
BitContainerPut(bit_container, 0, bits_remaining_in_working);
|
|
}
|
|
}
|
|
|
|
|
|
// Scope guard to set up FP state as desired and reset it on exit
|
|
struct FPStateScope
|
|
{
|
|
U32 saved_state;
|
|
|
|
FPStateScope();
|
|
~FPStateScope();
|
|
};
|
|
|
|
#if defined(__RADSSE2__)
|
|
FPStateScope::FPStateScope()
|
|
{
|
|
saved_state = _mm_getcsr();
|
|
// Set up our expected FP state: no exception flags set,
|
|
// all exceptions masked (suppressed), round to nearest,
|
|
// flush to zero and denormals are zero both off.
|
|
_mm_setcsr(_MM_MASK_MASK /* all exceptions masked */ | _MM_ROUND_NEAREST | _MM_FLUSH_ZERO_OFF);
|
|
}
|
|
|
|
FPStateScope::~FPStateScope()
|
|
{
|
|
_mm_setcsr(saved_state);
|
|
}
|
|
|
|
#elif defined(__RADARM__) && defined(__RAD64__)
|
|
|
|
#ifdef _MSC_VER
|
|
|
|
#include <intrin.h>
|
|
static U32 read_fpcr()
|
|
{
|
|
// The system register R/W instructions use 64-bit GPRs, but the
|
|
// architectural FPCR is 32b
|
|
return (U32)_ReadStatusReg(ARM64_FPCR);
|
|
}
|
|
|
|
static void write_fpcr(U32 state)
|
|
{
|
|
_WriteStatusReg(ARM64_FPCR, state);
|
|
}
|
|
|
|
#elif defined(__clang__) || defined(__GNUC__)
|
|
|
|
static U32 read_fpcr()
|
|
{
|
|
// The system register R/W instructions use 64-bit GPRs, but the
|
|
// architectural FPCR is 32b
|
|
U64 value;
|
|
__asm__ volatile("mrs %0, fpcr" : "=r"(value));
|
|
return (U32)value;
|
|
}
|
|
|
|
static void write_fpcr(U32 state)
|
|
{
|
|
U64 state64 = state;
|
|
__asm__ volatile("msr fpcr, %0" : : "r"(state64));
|
|
}
|
|
|
|
#else
|
|
|
|
#error compiler? Not clang or msvc
|
|
|
|
#endif
|
|
|
|
FPStateScope::FPStateScope()
|
|
{
|
|
saved_state = read_fpcr();
|
|
|
|
// IEEE compliant mode in FPCR is just all-0
|
|
write_fpcr(0);
|
|
}
|
|
|
|
FPStateScope::~FPStateScope()
|
|
{
|
|
write_fpcr(saved_state);
|
|
}
|
|
|
|
#else // neither SSE2 nor ARM64
|
|
|
|
FPStateScope::FPStateScope()
|
|
: saved_state(0)
|
|
{
|
|
}
|
|
|
|
FPStateScope::~FPStateScope()
|
|
{
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
static uint8_t GetBitsToShift(uint64_t val) // count trailing zeros, pulled from rrbits.
|
|
{
|
|
// ctz4(x)
|
|
static uint8_t const lut[16] = { 4,0,1,0, 2,0,1,0, 3,0,1,0, 2,0,1,0 };
|
|
|
|
uint8_t nz = 0;
|
|
if ((val & 0xffffffff) == 0) { nz += 32; val >>= 32; }
|
|
if ((val & 0x0000ffff) == 0) { nz += 16; val >>= 16; }
|
|
if ((val & 0x000000ff) == 0) { nz += 8; val >>= 8; }
|
|
if ((val & 0x0000000f) == 0) { nz += 4; val >>= 4; }
|
|
return nz + lut[val & 0xf];
|
|
}
|
|
|
|
|
|
static uint8_t GetBitsRequiredToSend(uint64_t value)
|
|
{
|
|
uint8_t bits = 0;
|
|
|
|
if (value & 0xffffffff00000000ULL) { bits += 32; value >>= 32; }
|
|
if (value & 0xffff0000U) { bits += 16; value >>= 16; }
|
|
if (value & 0xff00) { bits += 8; value >>= 8; }
|
|
if (value & 0xf0) { bits += 4; value >>= 4; }
|
|
static U8 table[16] = {
|
|
// 0 1 10 11 100 101 110 111
|
|
0, 1, 2, 2, 3, 3, 3, 3,
|
|
// 1000 1001 1010 1011 1100 1101 1110 1111
|
|
4, 4, 4, 4, 4, 4, 4, 4 };
|
|
|
|
return bits + table[value & 0xf];
|
|
|
|
}
|
|
|
|
static size_t align4(size_t in) { return (in + 3) & ~3; }
|
|
static size_t align64(size_t in) { return (in + 63) & ~63; }
|
|
|
|
struct LineEquation
|
|
{
|
|
int64_t slope_numerator, slope_denomenator;
|
|
int64_t intercept;
|
|
int64_t bias;
|
|
};
|
|
|
|
static void LinearRegressionDeltasToBits(UInt64Array* resolved_deltas,
|
|
uint8_t delta_shift, uint64_t* deltas, size_t count,
|
|
LineEquation* out_regression, uint8_t* out_bits_per_entry)
|
|
{
|
|
resolved_deltas->count = 0;
|
|
|
|
if (count == 0)
|
|
{
|
|
out_regression->intercept = 0;
|
|
out_regression->slope_numerator = 0;
|
|
out_regression->slope_denomenator = 1;
|
|
out_regression->bias = 0;
|
|
*out_bits_per_entry = 0;
|
|
return;
|
|
}
|
|
|
|
// We are delivered deltas, but we want to regress on the resolved positions, so sum in to
|
|
// our temp space
|
|
uint64_t delta_sum = 0;
|
|
uint64_t resolved_delta_sum = 0;
|
|
for (size_t index = 0; index < count; index++)
|
|
{
|
|
UInt64ArrayAdd(resolved_deltas, delta_sum);
|
|
|
|
resolved_delta_sum += resolved_deltas->data[index];
|
|
delta_sum += deltas[index] >> delta_shift;
|
|
}
|
|
|
|
int64_t resolved_delta_avg = resolved_delta_sum / count;
|
|
int64_t index_avg = count / 2;
|
|
|
|
// beta = sum( (xi - xavg) ( yi - yavg) ) / sum ( (xi - xavg) * (xi -xavg) )
|
|
int64_t beta_numerator = 0;
|
|
int64_t beta_denomenator = 0;
|
|
for (size_t index = 0; index < count; index++)
|
|
{
|
|
beta_numerator += (index - index_avg) * (resolved_deltas->data[index] - resolved_delta_avg);
|
|
beta_denomenator += (index - index_avg) * (index - index_avg);
|
|
}
|
|
|
|
// y = a + Bx
|
|
// a = resolved_delta_avg - (beta_numerator / beta_denomenator)*index_avg;
|
|
// b = (beta_numerator / beta_denomenator)
|
|
// y = resolved_delta_avg - (beta_numerator / beta_denomenator)*index_avg + (beta_numerator / beta_denomenator)*X
|
|
// y = resolved_delta_avg + (beta_numerator / beta_denomenator)*X - (beta_numerator / beta_denomenator)*index_avg
|
|
// y = resolved_delta_avg + (beta_numerator / beta_denomenator)(X - index_avg)
|
|
// y = resolved_delta_avg + (beta_numerator * (X - index_avg)) / beta_denomenator
|
|
|
|
// To send bits we need to bias in to positive, so find the most-negative error so we can offset it.
|
|
int64_t bias = resolved_deltas->data[0] - (resolved_delta_avg + (beta_numerator * (0 - index_avg)) / beta_denomenator);
|
|
for (size_t index = 1; index < count; index++)
|
|
{
|
|
int64_t estimated_seek_table = resolved_delta_avg + (beta_numerator * ((int64_t)index - index_avg)) / beta_denomenator;
|
|
int64_t error = resolved_deltas->data[index] - estimated_seek_table;
|
|
|
|
if (error < bias)
|
|
bias = error;
|
|
}
|
|
|
|
// Find how many bits we need to encode the residuals
|
|
uint8_t max_bits_required = 0;
|
|
for (size_t index = 0; index < count; index++)
|
|
{
|
|
int64_t estimated_seek_table = resolved_delta_avg + (beta_numerator * ((int64_t)index - index_avg)) / beta_denomenator;
|
|
int64_t error = resolved_deltas->data[index] - estimated_seek_table;
|
|
int64_t biased_error = error - bias;
|
|
|
|
uint8_t bits_required = GetBitsRequiredToSend(biased_error);
|
|
|
|
if (bits_required > max_bits_required)
|
|
max_bits_required = bits_required;
|
|
}
|
|
|
|
|
|
out_regression->intercept = resolved_delta_avg;
|
|
out_regression->slope_numerator = beta_numerator;
|
|
out_regression->slope_denomenator = beta_denomenator;
|
|
out_regression->bias = bias;
|
|
*out_bits_per_entry = max_bits_required;
|
|
}
|
|
|
|
struct SeekTableInfo
|
|
{
|
|
LineEquation sample_line;
|
|
LineEquation byte_line;
|
|
uint8_t sample_bits_per_entry, byte_bits_per_entry;
|
|
|
|
RadASeekTableHeader get_header()
|
|
{
|
|
RadASeekTableHeader header = {};
|
|
header.byte_bias = byte_line.bias;
|
|
header.byte_intercept = byte_line.intercept;
|
|
header.byte_line_slope[0] = byte_line.slope_numerator;
|
|
header.byte_line_slope[1] = byte_line.slope_denomenator;
|
|
|
|
header.sample_bias = sample_line.bias;
|
|
header.sample_intercept = sample_line.intercept;
|
|
header.sample_line_slope[0] = sample_line.slope_numerator;
|
|
header.sample_line_slope[1] = sample_line.slope_denomenator;
|
|
return header;
|
|
}
|
|
|
|
|
|
void construct(allocator_fn_type* _memalloc, free_fn_type* _memfree)
|
|
{
|
|
memset(this, 0, sizeof(*this));
|
|
}
|
|
void destroy()
|
|
{
|
|
}
|
|
};
|
|
|
|
struct stream_info
|
|
{
|
|
free_fn_type* free_fn;
|
|
float* samples;
|
|
uint8_t channels; // 1 or 2.
|
|
uint8_t channel_offset; // where our chans starts in the source streams
|
|
uint8_t header_size;
|
|
|
|
radaudio_encoder encoder;
|
|
unsigned char encoder_header_buffer[128];
|
|
radaudio_blocktype first_block_type;
|
|
|
|
uint64_t current_offset_frame;
|
|
size_t consumed_frames_last_block;
|
|
|
|
UInt64Array bytes_in_block;
|
|
UInt64Array samples_in_block;
|
|
MemBuffer encoded_data;
|
|
|
|
uint64_t block_count;
|
|
MemBufferEntry* writing_entry;
|
|
uint64_t writing_entry_offset;
|
|
|
|
void construct(allocator_fn_type* allocator_fn, free_fn_type* in_free_fn)
|
|
{
|
|
free_fn = in_free_fn;
|
|
bytes_in_block.construct(allocator_fn, in_free_fn);
|
|
samples_in_block.construct(allocator_fn, in_free_fn);
|
|
encoded_data.construct(allocator_fn, in_free_fn);
|
|
samples = nullptr;
|
|
channels = 0;
|
|
channel_offset = 0;
|
|
current_offset_frame = 0;
|
|
writing_entry = nullptr;
|
|
writing_entry_offset = 0;
|
|
block_count = 0;
|
|
header_size = 0;
|
|
first_block_type = (radaudio_blocktype)0;
|
|
consumed_frames_last_block = 0;
|
|
}
|
|
void destroy()
|
|
{
|
|
bytes_in_block.destroy();
|
|
samples_in_block.destroy();
|
|
encoded_data.destroy();
|
|
free_fn(samples);
|
|
}
|
|
};
|
|
|
|
} // end anon namespace
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
//-----------------------------------------------------------------------------
|
|
uint8_t EncodeRadAFile(
|
|
void* WavData, uint64_t WavDataLen, uint32_t WavRate, uint8_t WavChannels,
|
|
uint8_t Quality, uint8_t SeamlessLooping, uint8_t GenerateSeekTable, uint16_t SeekTableMaxEntries,
|
|
RadACompressAllocFnType* MemAlloc, RadACompressFreeFnType* MemFree,
|
|
void** OutData, uint64_t* OutDataLen)
|
|
{
|
|
FPStateScope FixFloatingPoint;
|
|
|
|
if (WavChannels == 0 || WavChannels > (MAX_STREAMS*2))
|
|
return RADA_COMPRESS_ERROR_CHANS;
|
|
if (Quality > 9 || Quality < 1)
|
|
return RADA_COMPRESS_ERROR_QUALITY;
|
|
if (MemAlloc == nullptr ||
|
|
MemFree == nullptr)
|
|
return RADA_COMPRESS_ERROR_ALLOCATORS;
|
|
if (OutData == nullptr ||
|
|
OutDataLen == nullptr)
|
|
return RADA_COMPRESS_ERROR_OUTPUT;
|
|
if (GenerateSeekTable && SeekTableMaxEntries < 2)
|
|
return RADA_COMPRESS_ERROR_SEEKTABLE;
|
|
|
|
{
|
|
uint32_t rate_index = 0;
|
|
for (; rate_index < sizeof(RADA_VALID_RATES) / sizeof(RADA_VALID_RATES[0]); rate_index++)
|
|
{
|
|
if (RADA_VALID_RATES[rate_index] == WavRate)
|
|
break;
|
|
}
|
|
if (rate_index == sizeof(RADA_VALID_RATES) / sizeof(RADA_VALID_RATES[0]))
|
|
return RADA_COMPRESS_ERROR_RATE;
|
|
}
|
|
|
|
//
|
|
// Deinterlace the input.
|
|
//
|
|
uint64_t SamplesPerChannel = WavDataLen / (sizeof(short) * WavChannels);
|
|
uint8_t StreamCount = (WavChannels / 2) + (WavChannels & 1);
|
|
if (SamplesPerChannel == 0)
|
|
return RADA_COMPRESS_ERROR_SAMPLES;
|
|
if (StreamCount > MAX_STREAMS)
|
|
return RADA_COMPRESS_ERROR_CHANS;
|
|
|
|
//
|
|
// Input wav data needs to be in float and in stereo pairs.
|
|
//
|
|
|
|
|
|
stream_info* streams = (stream_info*)MemAlloc(sizeof(stream_info) * StreamCount);
|
|
|
|
for (S32 i = 0; i < StreamCount; i++)
|
|
{
|
|
streams[i].construct(MemAlloc, MemFree);
|
|
|
|
if (i)
|
|
streams[i].channel_offset = streams[i-1].channel_offset + streams[i-1].channels;
|
|
streams[i].channels = (WavChannels - streams[i].channel_offset) > 1 ? 2 : 1;
|
|
streams[i].samples = (float*)MemAlloc(sizeof(float) * SamplesPerChannel * streams[i].channels);
|
|
|
|
int16_t* input_samples = (int16_t*)WavData;
|
|
|
|
if (streams[i].channels == 2)
|
|
{
|
|
for (uint64_t sample = 0; sample < SamplesPerChannel; sample++)
|
|
{
|
|
streams[i].samples[2*sample + 0] = input_samples[WavChannels*sample + streams[i].channel_offset + 0] / 32768.0f;
|
|
streams[i].samples[2*sample + 1] = input_samples[WavChannels*sample + streams[i].channel_offset + 1] / 32768.0f;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (uint64_t sample = 0; sample < SamplesPerChannel; sample++)
|
|
{
|
|
streams[i].samples[sample] = input_samples[WavChannels*sample + streams[i].channel_offset + 0] / 32768.0f;
|
|
}
|
|
}
|
|
}
|
|
|
|
size_t encoder_total_size = 0;
|
|
for (S32 i = 0; i < StreamCount; i++)
|
|
{
|
|
size_t result = radaudio_encode_create(
|
|
&streams[i].encoder, streams[i].encoder_header_buffer, streams[i].channels,
|
|
WavRate, Quality, SeamlessLooping ? RADAUDIO_ENC_FLAG_improve_seamless_loop : 0);
|
|
if (result == 0)
|
|
{
|
|
return RADA_COMPRESS_ERROR_ENCODER;
|
|
}
|
|
encoder_total_size += align4(result);
|
|
sanity(result <= 255);
|
|
streams[i].header_size = (uint8_t)align4(result);
|
|
}
|
|
|
|
uint32_t block_header_bytes = 1;
|
|
if (StreamCount > 1)
|
|
block_header_bytes += 2;
|
|
|
|
radaudio_blocktype first_block_type = radaudio_determine_preferred_first_block_length(&streams[0].encoder, streams[0].samples, SamplesPerChannel);;
|
|
for (uint8_t StreamIndex = 1; StreamIndex < StreamCount; StreamIndex++)
|
|
{
|
|
radaudio_blocktype check_block_type = radaudio_determine_preferred_first_block_length(&streams[StreamIndex].encoder, streams[StreamIndex].samples, SamplesPerChannel);;
|
|
if (check_block_type == RADAUDIO_BLOCKTYPE_short)
|
|
first_block_type = RADAUDIO_BLOCKTYPE_short;
|
|
}
|
|
for (uint8_t StreamIndex = 0; StreamIndex < StreamCount; StreamIndex++)
|
|
streams[StreamIndex].first_block_type = first_block_type;
|
|
|
|
// track how much data we'll need to read in to start getting audio out.
|
|
size_t bytes_for_inital_data = 0;
|
|
uint32_t max_compressed_size = 0;
|
|
for (;;)
|
|
{
|
|
bool all_done = false;
|
|
|
|
// Determine the block size we need to use. We need to keep all streams in sync, so we check all
|
|
// streams and if any want to be short, then we are short, otherwise we are long.
|
|
radaudio_blocktype block_type_for_all_streams = RADAUDIO_BLOCKTYPE_long;
|
|
|
|
for (uint8_t StreamIndex = 0; StreamIndex < StreamCount; StreamIndex++)
|
|
{
|
|
stream_info* stream = streams + StreamIndex;
|
|
radaudio_blocktype block_type = (radaudio_blocktype)radaudio_determine_preferred_next_block_length(&stream->encoder, stream->first_block_type, stream->samples, SamplesPerChannel, stream->current_offset_frame);
|
|
if (block_type == RADAUDIO_BLOCKTYPE_short)
|
|
block_type_for_all_streams = RADAUDIO_BLOCKTYPE_short;
|
|
}
|
|
|
|
uint32_t compressed_size_sum = 0;
|
|
|
|
for (uint8_t StreamIndex = 0; StreamIndex < StreamCount; StreamIndex++)
|
|
{
|
|
stream_info* stream = streams + StreamIndex;
|
|
uint8_t encoded_buffer[MAX_ENCODED_BLOCK_SIZE];
|
|
|
|
radaudio_encode_info encode_info = {};
|
|
encode_info.force_first_blocktype = first_block_type;
|
|
encode_info.force_next_blocktype = block_type_for_all_streams;
|
|
|
|
// If desired for looping, set up the padding so that we get seamless across the whole file.
|
|
if (SeamlessLooping)
|
|
{
|
|
if (SamplesPerChannel < 2048)
|
|
{
|
|
// it's so close we just always provide the whole thing
|
|
encode_info.padding = stream->samples;
|
|
encode_info.padding_len = SamplesPerChannel;
|
|
}
|
|
else if (stream->current_offset_frame < 1024)
|
|
{
|
|
// We're at the beginning, so we want the padding to be the end of the stream.
|
|
encode_info.padding = stream->samples + SamplesPerChannel - 2048*WavChannels;
|
|
encode_info.padding_len = 2048;
|
|
}
|
|
else // just always provide the beginning, it won't use it if it doesn't need it.
|
|
{
|
|
encode_info.padding = stream->samples;
|
|
encode_info.padding_len = SamplesPerChannel;
|
|
}
|
|
}
|
|
|
|
uint8_t* contiguous_dest = MemBufferCanCopyDirect(&stream->encoded_data, MAX_ENCODED_BLOCK_SIZE);
|
|
uint8_t* encode_dest = encoded_buffer;
|
|
if (contiguous_dest)
|
|
encode_dest = contiguous_dest;
|
|
|
|
size_t starting_offset_frame = stream->current_offset_frame;
|
|
int32_t encode_result = radaudio_encode_block_ext(&stream->encoder,
|
|
stream->samples,
|
|
SamplesPerChannel,
|
|
(size_t*)&stream->current_offset_frame,
|
|
encode_dest,
|
|
MAX_ENCODED_BLOCK_SIZE,
|
|
&encode_info);
|
|
|
|
if (encode_result == RADAUDIOENC_AT_EOF)
|
|
{
|
|
// done.
|
|
all_done = true;
|
|
break;
|
|
}
|
|
else if (encode_result <= 0)
|
|
{
|
|
// misc error
|
|
all_done = true;
|
|
break;
|
|
}
|
|
|
|
if (contiguous_dest)
|
|
MemBufferCommitDirect(&stream->encoded_data, encode_result);
|
|
else
|
|
MemBufferAdd(&stream->encoded_data, encoded_buffer, encode_result);
|
|
|
|
compressed_size_sum += encode_result;
|
|
|
|
// we need 2 decodes to start getting data.
|
|
if (stream->block_count <= 1)
|
|
bytes_for_inital_data += encode_result;
|
|
|
|
UInt64ArrayAdd(&stream->bytes_in_block, encode_result);
|
|
|
|
size_t produced_samples = stream->consumed_frames_last_block;
|
|
UInt64ArrayAdd(&stream->samples_in_block, produced_samples);
|
|
|
|
stream->consumed_frames_last_block = stream->current_offset_frame - starting_offset_frame;
|
|
stream->block_count++;
|
|
}
|
|
|
|
if (compressed_size_sum > max_compressed_size)
|
|
max_compressed_size = compressed_size_sum;
|
|
|
|
if (all_done)
|
|
break;
|
|
}
|
|
|
|
// since everything is the same size, we should have the same block count.
|
|
for (uint8_t stream_index = 1; stream_index < StreamCount; stream_index++)
|
|
{
|
|
sanity(streams[stream_index].block_count == streams[0].block_count);
|
|
}
|
|
|
|
//
|
|
// Create an interleaved output of the encoded streams. The streams will
|
|
// be different sized blocks, but we need to ensure that when the decoder
|
|
// runs a stream out, that is the next block ready to decode.
|
|
//
|
|
|
|
// for encoding, just add in whichever one is farthest behind on the output
|
|
// sample.
|
|
|
|
MemBuffer encoded_stream;
|
|
encoded_stream.construct(MemAlloc, MemFree);
|
|
|
|
for (uint8_t stream = 0; stream < StreamCount; stream++)
|
|
{
|
|
streams[stream].writing_entry = streams[stream].encoded_data.head;
|
|
}
|
|
|
|
|
|
for (size_t current_block_index = 0; current_block_index < streams[0].block_count; current_block_index++)
|
|
{
|
|
// Get the block size for each stream. We can get the first stream from the bitstream
|
|
// itself, but we need to be able to get the total chunk size in a small amount of data,
|
|
// so for multistream files, we have a bit of a bigger header.
|
|
|
|
uint64_t multi_stream_bytes = 0;
|
|
for (uint8_t stream_index = 1; stream_index < StreamCount; stream_index++)
|
|
{
|
|
stream_info* stream = streams + stream_index;
|
|
|
|
uint64_t block_bytes = stream->bytes_in_block.data[current_block_index];
|
|
sanity(block_bytes < 4096);
|
|
|
|
multi_stream_bytes += block_bytes;
|
|
}
|
|
|
|
uint8_t block_header = RADA_SYNC_BYTE;
|
|
MemBufferAdd(&encoded_stream, &block_header, 1);
|
|
|
|
if (StreamCount > 1)
|
|
{
|
|
sanity(multi_stream_bytes < 65535);
|
|
uint16_t multi_stream_bytes_16 = (uint16_t)multi_stream_bytes;
|
|
MemBufferAdd(&encoded_stream, &multi_stream_bytes_16, 2);
|
|
}
|
|
|
|
for (uint8_t stream_index = 0; stream_index < StreamCount; stream_index++)
|
|
{
|
|
stream_info* stream = streams + stream_index;
|
|
|
|
uint64_t block_bytes = stream->bytes_in_block.data[current_block_index];
|
|
sanity(block_bytes < 4096);
|
|
|
|
stream->writing_entry = MemBufferCopy(&encoded_stream, &stream->encoded_data, stream->writing_entry, stream->writing_entry_offset, block_bytes, (size_t*)&stream->writing_entry_offset);
|
|
}
|
|
}
|
|
|
|
// Seeking requires priming the decoder with a chunk, so we can't actually seek with
|
|
// less than 3 chunks as 2 chunks just means you're starting at the beginning.
|
|
if (streams[0].samples_in_block.count <= 2)
|
|
GenerateSeekTable = 0;
|
|
|
|
// \todo... seek table ends up returning the block size for the seek for the entire seek chunk,
|
|
// not just what's needed to decode the _next_ block. To fix this properly requires a whole nother stream
|
|
// which is likely not worth it. So instead we cap the seek table request with the max encoded block size
|
|
|
|
UInt64Array seek_table_bytes, seek_table_samples;
|
|
seek_table_bytes.construct(MemAlloc, MemFree);
|
|
seek_table_samples.construct(MemAlloc, MemFree);
|
|
|
|
// Collapse the seek tables to the correct size.
|
|
uint8_t Result = RADA_COMPRESS_SUCCESS;
|
|
|
|
//
|
|
// The seek table for a mDCT codec is a bit more complex because we need to
|
|
// adjust the target such that we decode an extra block. This block emits no samples,
|
|
// so we don't need to touch the samples at all.
|
|
//
|
|
// This functionally just means that the byte position that we emit is
|
|
// current position + [-1, blocks_per_entry-1] rather than [0, blocks_per_entry].
|
|
|
|
size_t seek_table_entries = 0;
|
|
if (GenerateSeekTable)
|
|
{
|
|
uint64_t frames_per_seek_table_block = SamplesPerChannel / SeekTableMaxEntries;
|
|
|
|
// we don't want to emit a _ton_ of seek table entries if we have a bunch of small blocks,
|
|
// so sanity limit the density. We're expecting basically everything to be at 48000, and certainly
|
|
// stuff that isn't we're not likely to be interested in seek density, so this makes about 17ms max density.
|
|
if (frames_per_seek_table_block < 8192)
|
|
frames_per_seek_table_block = 8192;
|
|
|
|
uint64_t current_seek_table_frame = 0;
|
|
|
|
size_t current_block_index = 1;
|
|
for (; current_block_index < streams[0].samples_in_block.count; )
|
|
{
|
|
// Since we are tracking on an uneven chunk size, we can end up
|
|
// with more than desired by one or two - just cap.
|
|
if (seek_table_bytes.count == SeekTableMaxEntries)
|
|
break;
|
|
|
|
//
|
|
// The first entry already compensates for this as there was no way for the encoder
|
|
// to emit anything... so we don't need to do this adjustment for the first block.
|
|
//
|
|
|
|
// find how many blocks to catch up to where we should be in frames.
|
|
size_t end = current_block_index + 1;
|
|
uint64_t end_seek_table_frame = current_seek_table_frame;
|
|
for (; end < streams[0].samples_in_block.count; end++)
|
|
{
|
|
end_seek_table_frame += streams[0].samples_in_block.data[end];
|
|
if (end_seek_table_frame > frames_per_seek_table_block * seek_table_bytes.count)
|
|
break;
|
|
}
|
|
size_t blocks_per_entry = end - current_block_index;
|
|
|
|
current_seek_table_frame = end_seek_table_frame;
|
|
|
|
uint64_t entry_bytes = 0;
|
|
uint64_t entry_samples = 0;
|
|
|
|
size_t entry_block_start_index = current_block_index ? current_block_index - 1 : current_block_index;
|
|
size_t entry_block_end_index = current_block_index + blocks_per_entry - 1;
|
|
if (entry_block_end_index >= streams[0].samples_in_block.count)
|
|
entry_block_end_index = streams[0].samples_in_block.count - 1;
|
|
|
|
size_t entry_block_end_index_unadjusted = current_block_index + blocks_per_entry;
|
|
if (entry_block_end_index_unadjusted >= streams[0].samples_in_block.count)
|
|
entry_block_end_index_unadjusted = streams[0].samples_in_block.count - 1;
|
|
|
|
|
|
for (uint8_t stream_index = 0; stream_index < StreamCount; stream_index++)
|
|
{
|
|
for (size_t entry_block_index = entry_block_start_index; entry_block_index < entry_block_end_index; entry_block_index++)
|
|
entry_bytes += streams[stream_index].bytes_in_block.data[entry_block_index];
|
|
}
|
|
for (size_t entry_block_index = current_block_index; entry_block_index < entry_block_end_index_unadjusted; entry_block_index++)
|
|
entry_samples += streams[0].samples_in_block.data[entry_block_index];
|
|
|
|
entry_bytes += block_header_bytes * (entry_block_end_index - entry_block_start_index);
|
|
|
|
current_block_index = entry_block_end_index + 1;
|
|
UInt64ArrayAdd(&seek_table_bytes, entry_bytes);
|
|
UInt64ArrayAdd(&seek_table_samples, entry_samples);
|
|
}
|
|
|
|
seek_table_entries = seek_table_samples.count;
|
|
}
|
|
|
|
// find the shift for the sample offsets. likely 64, could be larger.
|
|
uint8_t seek_table_samples_shift_bits = GenerateSeekTable ? 255 : 0;
|
|
for (size_t entry_index = 0; entry_index < seek_table_entries; entry_index++)
|
|
{
|
|
uint8_t shift_bits = GetBitsToShift(seek_table_samples.data[entry_index]);
|
|
if (shift_bits < seek_table_samples_shift_bits)
|
|
seek_table_samples_shift_bits = shift_bits;
|
|
}
|
|
|
|
// NOTE: Tried to subtract off the min seek table size and it didn't really save much.
|
|
SeekTableInfo seek_table_info;
|
|
seek_table_info.construct(MemAlloc, MemFree);
|
|
BitContainer encoded_residuals;
|
|
encoded_residuals.construct(MemAlloc, MemFree);
|
|
|
|
// We do a linear regression on the sample and byte locations of the seek table entries,
|
|
// then encode the residuals.
|
|
{
|
|
UInt64Array sample_positions;
|
|
UInt64Array byte_positions;
|
|
sample_positions.construct(MemAlloc, MemFree);
|
|
byte_positions.construct(MemAlloc, MemFree);
|
|
|
|
LinearRegressionDeltasToBits(&sample_positions, seek_table_samples_shift_bits, seek_table_samples.data, seek_table_entries, &seek_table_info.sample_line, &seek_table_info.sample_bits_per_entry);
|
|
LinearRegressionDeltasToBits(&byte_positions, 0, seek_table_bytes.data, seek_table_entries, &seek_table_info.byte_line, &seek_table_info.byte_bits_per_entry);
|
|
|
|
|
|
// Encode the residuals.
|
|
int64_t index_avg = seek_table_entries / 2;
|
|
for (size_t index = 0; index < seek_table_entries; index++)
|
|
{
|
|
//printf("%zd - %lld %lld\n", index, sample_positions.data[index] << seek_table_samples_shift_bits, byte_positions.data[index]);
|
|
|
|
int64_t predicted_sample = seek_table_info.sample_line.intercept + (seek_table_info.sample_line.slope_numerator * ((int64_t)index - index_avg)) / seek_table_info.sample_line.slope_denomenator;
|
|
int64_t unbiased_sample_error = sample_positions.data[index] - predicted_sample;
|
|
int64_t biased_sample_error = unbiased_sample_error - seek_table_info.sample_line.bias;
|
|
|
|
int64_t predicted_byte = seek_table_info.byte_line.intercept + (seek_table_info.byte_line.slope_numerator * ((int64_t)index - index_avg)) / seek_table_info.byte_line.slope_denomenator;
|
|
int64_t unbiased_byte_error = byte_positions.data[index] - predicted_byte;
|
|
int64_t biased_byte_error = unbiased_byte_error - seek_table_info.byte_line.bias;
|
|
|
|
BitContainerPut(&encoded_residuals, biased_sample_error, seek_table_info.sample_bits_per_entry);
|
|
BitContainerPut(&encoded_residuals, biased_byte_error, seek_table_info.byte_bits_per_entry);
|
|
}
|
|
BitContainerFlush(&encoded_residuals);
|
|
|
|
sample_positions.destroy();
|
|
byte_positions.destroy();
|
|
|
|
if (false)
|
|
{
|
|
// Now, make sure it worked!
|
|
uint64_t current_sample_sum = 0;
|
|
uint64_t current_byte_sum = 0;
|
|
for (int64_t entry_index = 0; entry_index < (int64_t)seek_table_entries; entry_index++)
|
|
{
|
|
int64_t avg_index = seek_table_entries / 2;
|
|
|
|
int64_t estimated_seek_table = seek_table_info.sample_line.intercept + (entry_index - avg_index) * seek_table_info.sample_line.slope_numerator / seek_table_info.sample_line.slope_denomenator;
|
|
|
|
uint64_t biased_error = BitContainerExtract(&encoded_residuals, (seek_table_info.sample_bits_per_entry + seek_table_info.byte_bits_per_entry) * entry_index, seek_table_info.sample_bits_per_entry);
|
|
int64_t unbiased_error = biased_error + seek_table_info.sample_line.bias;
|
|
uint64_t sample_result = (estimated_seek_table + unbiased_error) << seek_table_samples_shift_bits;
|
|
|
|
estimated_seek_table = seek_table_info.byte_line.intercept + (entry_index - avg_index) * seek_table_info.byte_line.slope_numerator / seek_table_info.byte_line.slope_denomenator;
|
|
|
|
biased_error = BitContainerExtract(&encoded_residuals, (seek_table_info.sample_bits_per_entry + seek_table_info.byte_bits_per_entry) * entry_index + seek_table_info.sample_bits_per_entry, seek_table_info.byte_bits_per_entry);
|
|
unbiased_error = biased_error + seek_table_info.byte_line.bias;
|
|
uint64_t byte_result = estimated_seek_table + unbiased_error;
|
|
|
|
if (byte_result != current_byte_sum ||
|
|
sample_result != current_sample_sum)
|
|
{
|
|
// error!
|
|
printf("miss\n");
|
|
}
|
|
|
|
current_byte_sum += seek_table_bytes.data[entry_index];
|
|
current_sample_sum += seek_table_samples.data[entry_index];
|
|
}
|
|
}
|
|
}
|
|
|
|
size_t seek_table_header_bytes = GenerateSeekTable ? sizeof(RadASeekTableHeader) : 0;
|
|
size_t packed_seek_table_bytes = encoded_residuals.container.count * sizeof(uint64_t);
|
|
|
|
|
|
size_t bytes_to_data = sizeof(RadAFileHeader);
|
|
bytes_to_data += encoder_total_size;
|
|
bytes_to_data += packed_seek_table_bytes;
|
|
bytes_to_data += seek_table_header_bytes;
|
|
|
|
uint64_t output_file_size = bytes_to_data + encoded_stream.total_bytes;
|
|
|
|
if (encoder_total_size >= 65535)
|
|
Result = RADA_COMPRESS_ERROR_SIZE;
|
|
if (output_file_size > ~0U)
|
|
Result = RADA_COMPRESS_ERROR_SIZE;
|
|
if (bytes_to_data + bytes_for_inital_data > ~0U)
|
|
Result = RADA_COMPRESS_ERROR_SIZE;
|
|
|
|
if (Result == RADA_COMPRESS_SUCCESS)
|
|
{
|
|
RadAFileHeader Header = {};
|
|
Header.tag = 'RADA';
|
|
Header.version = 1;
|
|
Header.channels = (U8)WavChannels;
|
|
Header.rada_header_bytes = (uint16_t)encoder_total_size;
|
|
Header.frame_count = SamplesPerChannel;
|
|
Header.bits_for_seek_table_bytes = seek_table_info.byte_bits_per_entry;
|
|
Header.shift_bits_for_seek_table_samples = seek_table_samples_shift_bits;
|
|
Header.bits_for_seek_table_samples = seek_table_info.sample_bits_per_entry;
|
|
Header.seek_table_entry_count = (uint16_t)seek_table_entries;
|
|
|
|
switch (WavRate)
|
|
{
|
|
case 24000: Header.sample_rate = ERadASampleRate::Rate_24000; break;
|
|
case 32000: Header.sample_rate = ERadASampleRate::Rate_32000; break;
|
|
case 44100: Header.sample_rate = ERadASampleRate::Rate_44100; break;
|
|
case 48000: Header.sample_rate = ERadASampleRate::Rate_48000; break;
|
|
default: sanity(0); // encoding will have failed by here.
|
|
}
|
|
|
|
sanity(packed_seek_table_bytes == 8 * (align64((seek_table_info.byte_bits_per_entry + seek_table_info.sample_bits_per_entry) * seek_table_entries) / 64));
|
|
|
|
// Save off the header byte count + the initial data chunk size.
|
|
Header.bytes_for_first_decode = (uint32_t)(bytes_to_data + bytes_for_inital_data);
|
|
|
|
//printf("seek table total bytes: %zd\n", packed_seek_table_byte_offsets_bytes + packed_seek_table_sample_offsets_bytes);
|
|
|
|
Header.file_size = output_file_size;
|
|
|
|
uint32_t block_header_size = 1;
|
|
if (StreamCount)
|
|
block_header_size += 2;
|
|
Header.max_block_size = (uint16_t)(block_header_size + max_compressed_size);
|
|
|
|
// [FileHeader]
|
|
// Nx[EncoderHeader]
|
|
// SeekTableSampleDeltas,SeekTableByteOffsets
|
|
// Blocks
|
|
|
|
// Create the actual output buffer.
|
|
uint8_t* Output = (uint8_t*)MemAlloc(Header.file_size);
|
|
if (Output == 0)
|
|
{
|
|
Result = RADA_COMPRESS_ERROR_ALLOCATION;
|
|
}
|
|
else
|
|
{
|
|
uint8_t* Current = Output;
|
|
memcpy(Current, &Header, sizeof(RadAFileHeader));
|
|
Current += sizeof(RadAFileHeader);
|
|
|
|
if (GenerateSeekTable)
|
|
{
|
|
RadASeekTableHeader seek_table_header = seek_table_info.get_header();
|
|
memcpy(Current, &seek_table_header, sizeof(RadASeekTableHeader));
|
|
Current += sizeof(RadASeekTableHeader);
|
|
}
|
|
|
|
for (uint8_t stream_index = 0; stream_index < StreamCount; stream_index++)
|
|
{
|
|
memcpy(Current, streams[stream_index].encoder_header_buffer, streams[stream_index].header_size);
|
|
Current += streams[stream_index].header_size;
|
|
}
|
|
|
|
memcpy(Current, encoded_residuals.container.data, packed_seek_table_bytes);
|
|
Current += packed_seek_table_bytes;
|
|
|
|
Current = MemBufferWriteBuffer(&encoded_stream, Current);
|
|
|
|
sanity(Current == Output + Header.file_size);
|
|
|
|
*OutData = Output;
|
|
*OutDataLen = Header.file_size;
|
|
|
|
Result = RADA_COMPRESS_SUCCESS;
|
|
}
|
|
}
|
|
|
|
encoded_residuals.destroy();
|
|
seek_table_info.destroy();
|
|
seek_table_bytes.destroy();
|
|
seek_table_samples.destroy();
|
|
|
|
encoded_stream.destroy();
|
|
for (S32 i = 0; i < StreamCount; i++)
|
|
{
|
|
streams[i].destroy();
|
|
}
|
|
MemFree(streams);
|
|
|
|
|
|
return Result;
|
|
}
|
|
|
|
const char* RadAErrorString(uint8_t InError)
|
|
{
|
|
switch (InError)
|
|
{
|
|
case RADA_COMPRESS_SUCCESS: return "No Error";
|
|
case RADA_COMPRESS_ERROR_CHANS: return "Invalid channel count supplied (max 32 channels)";
|
|
case RADA_COMPRESS_ERROR_SAMPLES: return "No samples provided";
|
|
case RADA_COMPRESS_ERROR_RATE: return "Invalid sample rate provided: only 48khz, 44.1khz, 32khz, and 24khz are allowed.";
|
|
case RADA_COMPRESS_ERROR_QUALITY: return "Invalid quality value specified: my be withing 1 and 9, inclusive.";
|
|
case RADA_COMPRESS_ERROR_ALLOCATORS: return "No allocators provided.";
|
|
case RADA_COMPRESS_ERROR_OUTPUT: return "No output provided.";
|
|
case RADA_COMPRESS_ERROR_SEEKTABLE: return "Invalid seek table size requested.";
|
|
case RADA_COMPRESS_ERROR_SIZE: return "Output file is too big to fit in the container.";
|
|
case RADA_COMPRESS_ERROR_ENCODER: return "Internal encoder error (please report!";
|
|
case RADA_COMPRESS_ERROR_ALLOCATION: return "Allocator returned null pointer.";
|
|
}
|
|
return "Invalid RadA Error Code";
|
|
}
|
|
|
|
uint32_t RadAGetBuildVersion()
|
|
{
|
|
return RADA_BUILD_VERSION;
|
|
}
|
|
|
|
#if 0
|
|
|
|
/* to build this fuzzed:
|
|
* "C:\Program Files\LLVM\bin\clang" -v -O0 -o rada_fuzz.exe -g rada_encode.cpp -DRADAUDIO_WRAP=UERA
|
|
-Ipath\to\radrtl -Ipath\to\radaudio_encoder.h
|
|
-fsanitize=address,fuzzer path/to/radaudio_encoder_win64.lib
|
|
*/
|
|
#include <malloc.h>
|
|
|
|
struct fuzzer_input
|
|
{
|
|
uint32_t Rate;
|
|
uint8_t Chans;
|
|
uint8_t Quality;
|
|
uint8_t GenTable;
|
|
uint16_t GenTableMaxEntries;
|
|
};
|
|
|
|
static int counter = 0;
|
|
static void* AllocThunk(uintptr_t ByteCount) { counter++; return malloc(ByteCount); }
|
|
static void FreeThunk(void* Ptr) { if (Ptr) counter--; free(Ptr); }
|
|
|
|
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size)
|
|
{
|
|
if (Size <= sizeof(fuzzer_input))
|
|
return 0;
|
|
if (Size > 0xFFFFFFFFU)
|
|
return 0;
|
|
|
|
fuzzer_input* input = (fuzzer_input*)Data;
|
|
|
|
void* CompressedData = 0;
|
|
uint32_t CompressedLen;
|
|
|
|
CompressRadAudio((void*)(Data + sizeof(fuzzer_input)), (uint32_t)Size - sizeof(fuzzer_input),
|
|
input->Rate, input->Chans, input->Quality, input->GenTable, input->GenTableMaxEntries, AllocThunk, FreeThunk,
|
|
&CompressedData, &CompressedLen);
|
|
|
|
FreeThunk(CompressedData);
|
|
|
|
sanity(counter == 0);
|
|
|
|
return 0;
|
|
}
|
|
#endif
|