237 lines
13 KiB
C
237 lines
13 KiB
C
// Copyright Epic Games Tools, LLC. All Rights Reserved.
|
|
#ifndef INCLUDE_RADAUDIO_ENCODER_H
|
|
#define INCLUDE_RADAUDIO_ENCODER_H
|
|
|
|
#include "egttypes.h"
|
|
#include <stddef.h>
|
|
|
|
// for feedback to user. actual rate varies, and is sample rate dependent
|
|
// the 0th value is support but should never be used; it allows you to hear what the native artifacts sound like
|
|
static const int approximate_data_rate_for_quality_setting_in_kilobits[2][10] =
|
|
{
|
|
{ 30, 36, 39, 43, 48, 55, 65, 85,125,205 }, // mono (@TODO: measure these, currently just ~half of stereo)
|
|
{ 55, 64, 72, 80, 90,100,120,160,240,400 }, // stereo
|
|
};
|
|
|
|
// encode context
|
|
typedef struct
|
|
{
|
|
// placeholder fields to make it large enough
|
|
RAD_U64 dummy[200];
|
|
U8 buffer[5000];
|
|
} radaudio_encoder;
|
|
|
|
#define RADAUDIOENC_AT_EOF 0
|
|
#define RADAUDIOENC_INSUFFICIENT_BUFFER -1
|
|
#define RADAUDIOENC_INTERNAL_ERROR -2
|
|
#define RADAUDIOENC_MAX_OUTPUT_SAMPLES_PER_CHANNEL_PER_CHUNK 1024
|
|
|
|
RADDEFSTART
|
|
|
|
// RADAUDIO compatibility version - if these are the same the exports are named the same so we expect
|
|
// that if the linker selects a different copy they all work.
|
|
#define RADAUDIO_ENC_LIBRARY_VERSION 1
|
|
|
|
#ifndef RR_STRING_JOIN3
|
|
#define RR_STRING_JOIN3(arg1, arg2, arg3) RR_STRING_JOIN_DELAY3(arg1, arg2, arg3)
|
|
#define RR_STRING_JOIN_DELAY3(arg1, arg2, arg3) RR_STRING_JOIN_IMMEDIATE3(arg1, arg2, arg3)
|
|
#define RR_STRING_JOIN_IMMEDIATE3(arg1, arg2, arg3) arg1 ## arg2 ## arg3
|
|
#endif
|
|
|
|
#ifdef RADAUDIO_WRAP
|
|
#define RADAUDIO_ENC_NAME(name) RR_STRING_JOIN3(RADAUDIO_WRAP, name##_, RADAUDIO_ENC_LIBRARY_VERSION )
|
|
#else
|
|
#define RADAUDIO_ENC_NAME(name) RR_STRING_JOIN( name##_, RADAUDIO_ENC_LIBRARY_VERSION )
|
|
#endif
|
|
|
|
#define radaudio_encode_create RADAUDIO_ENC_NAME(radaudio_encode_create)
|
|
#define radaudio_encode_block RADAUDIO_ENC_NAME(radaudio_encode_block)
|
|
#define radaudio_encode_create_internal RADAUDIO_ENC_NAME(radaudio_encode_create_internal)
|
|
#define RadAudioCompressGetProfileData RADAUDIO_ENC_NAME(RadAudioCompressGetProfileData)
|
|
#define radaudio_determine_preferred_next_block_length RADAUDIO_ENC_NAME(radaudio_determine_preferred_next_block_length)
|
|
#define radaudio_determine_preferred_first_block_length RADAUDIO_ENC_NAME(radaudio_determine_preferred_first_block_length)
|
|
#define radaudio_encode_block_ext RADAUDIO_ENC_NAME(radaudio_encode_block_ext)
|
|
|
|
// radaudio_encode_create()
|
|
//
|
|
// Creates a new RADaudio encoder.
|
|
//
|
|
// Pass in an uninitialized radaudio_encoder structure, space to output the stream header,
|
|
// and then a description of the stream and the quality settings desired.
|
|
//
|
|
// - `num_channels` must be 1 or 2
|
|
// - `sample_rate` must be 48000, 44100, 32000, or 24000
|
|
// - if the automated quality measurement is to be trusted, then 32K and 24K encodings are
|
|
// lower quality at same size than converting the files to 44.1K or 48K
|
|
// (but ogg vorbis shows same behavior, so maybe automatic quality measurement is wrong for 32K and 24K)
|
|
// - `quality` should be 1..9.
|
|
// - larger values are higher quality and larger files
|
|
// - 5 is recommended setting; it gives comparable results to vorbis 96 kbps
|
|
// - going above 5 gives larger, higher quality files; could default to 6 if paranoid
|
|
// - settings below 5 have not been tuned, as they are not expected to be "transparent" quality
|
|
// - they are only provided for people who want to experiment and explore what it sounds like
|
|
// - `header` is a buffer to store the header
|
|
// - supply RADAUDIO_STREAM_HEADER_MAX
|
|
// - the actual size of the header is returned
|
|
//
|
|
// returns size of header on success, otherwise returns 0
|
|
//
|
|
// there is no destroy() function as the structure holds no other resources
|
|
// just free the memory you passed in as necessary
|
|
#define RADAUDIO_STREAM_HEADER_MAX 128
|
|
RADDEFFUNC size_t radaudio_encode_create( radaudio_encoder *rae,
|
|
U8 header[RADAUDIO_STREAM_HEADER_MAX],
|
|
int num_channels,
|
|
int sample_rate,
|
|
int quality,
|
|
U32 flags);
|
|
|
|
#define RADAUDIO_ENC_FLAG_improve_seamless_loop 1 // actually boosts precision of short block low-frequency coefficients
|
|
|
|
|
|
// radaudio_encode_block()
|
|
//
|
|
// Generate a block of audio data in RADaudio format.
|
|
//
|
|
// Pass in a radaudio_encoder initialized by radaudio_encode_init.
|
|
//
|
|
// - `encode_buffer` : storage where the encoder will write compressed data
|
|
// - `encode_buffer_max` : size of encode buffer
|
|
// - `input` : an array of audio samples
|
|
// - values from [-1..1] (can exceed this range, but will distort if you go too far)
|
|
// - for stereo input, interleave the channels in a single array
|
|
// - `input_len` : the number of mono samples or stereo sample pairs in the input
|
|
// - `offset` : the current encoding offset within the input buffer (in mono samples or stereo pairs)
|
|
// - initialize to 0 for the first call
|
|
// - the encoder will encode samples in the range up to [offset-1024, offset+1024)
|
|
// - the encoder will potentially look at samples up to [offset-1024, offset+2047)
|
|
// - the encoder will update `*offset` to reflect the number of samples partially encoded
|
|
// - samples up to `*offset` are only fully encoded after the NEXT encoder step
|
|
//
|
|
// returns:
|
|
// n > 0 number of bytes of data output for this block
|
|
// RADAUDIOENC_AT_EOF at end of stream, previously generated block is last block
|
|
// RADAUDIOENC_INSUFFICIENT_BUFFER if output buffer isn't large enough
|
|
// RADAUDIOENC_INTERNAL_ERROR an internal error occurred. this is probably a bug
|
|
RADDEFFUNC int radaudio_encode_block(radaudio_encoder *es,
|
|
float *input,
|
|
size_t input_len, // in samples (stereo pairs count as one)
|
|
size_t *offset , // in samples (stereo pairs count as one)
|
|
unsigned char *encode_buffer, // recommend MAX_ENCODED_BLOCK_SIZE
|
|
size_t encode_buffer_size);
|
|
// In normal use, just load an audio file, convert to float, and pass the obvious
|
|
// values to input / input_len, and allow the encoder to control the value of `*offset`.
|
|
//
|
|
// It is possible to use this API to stream input data by controlling
|
|
// `input`, `input_len, and `*offset`.
|
|
//
|
|
// - you can freely manipulate the value of `*offset`.
|
|
// - the encoder will look at samples i in [*offset-1024, *offset+1024)
|
|
// - with the caveat that 0 <= i < input_len (samples outside this range are treated as 0, and reaching input_len ends the stream)
|
|
// - The initial value of `*offset` must be 0.
|
|
//
|
|
// So, you could do something like:
|
|
//
|
|
// - Keep track of the actual offset within the file where `*offset` indexes; call this `fo`, set to 0.
|
|
// - Iterate:
|
|
// - set `*offset` to 1024, so [input..input+2048) is used, aka input[offset-1024, offset+1024)
|
|
// - set `input_len` = subtract `fo` from the actual length of the file in mono samples or stereo pairs, add *offset
|
|
// - load the file data from [fo-1024,fo+1024) into input[0..2048)
|
|
// - encode a block
|
|
// - update `fo` by the delta change in `*offset`
|
|
// - note that the encoder will read the overlapping samples in each pair of successive blocks
|
|
// - it is crucial that those samples be identical each time
|
|
// - the naive approach, and the above strategy, both make sure this happens
|
|
|
|
#define MAX_ENCODED_BLOCK_SIZE 5000 // 5000 plus slop due to forgetting to count stuff
|
|
// upper bound on largest size; in reality, 400 Kbps is less than 2KB per block
|
|
//
|
|
// largest possible block:
|
|
// max header size => 8 bytes
|
|
// 48 largest-encoding band exponents@11b => 480 + 48 bits
|
|
// 48 largest-encoding band mantissa @16b => 96 bytes
|
|
// 112 largest-encoding subbands @11b => 1120 + 112 bits
|
|
// 1536 run-length entries of 0 => 1536 bits
|
|
// run-length end of stream code @7b => 7 bits
|
|
// 512 1-bit coefficient locations w/huff => 576 bits
|
|
// 1024 signals for 1-byte coefficients@11b => 10240 + 1024 bits
|
|
// 2048 coefficients encoded 11 bits => 20480 + 2048 bits
|
|
// 3 streams of padding => 3 bytes
|
|
// 32320 + 5351 bits + 107 bytes
|
|
// == 4816 bytes
|
|
|
|
typedef enum
|
|
{
|
|
RADAUDIO_BLOCKTYPE_short = 1,
|
|
RADAUDIO_BLOCKTYPE_long = 2,
|
|
|
|
// radaudio_encode_block_ext only
|
|
RADAUDIO_BLOCKTYPE_default = 0 // use transient detector like normal
|
|
} radaudio_blocktype;
|
|
|
|
typedef struct
|
|
{
|
|
radaudio_blocktype force_first_blocktype; // only used on first call
|
|
radaudio_blocktype force_next_blocktype; // used on all calls
|
|
|
|
// you can achieve seamless looping using the following fields:
|
|
// use this as the data to pad the beginning and/or end of the stream with, instead of 0s
|
|
F32 *padding;
|
|
size_t padding_len; // in samples (stereo pairs count as one). Set to 0 to disable.
|
|
//
|
|
// At the beginning and end of the stream, based on offset going from 0..input_len, the above
|
|
// data will be used to "pad" the audio stream instead of the 0 value used by default.
|
|
//
|
|
// - at the beginning of the stream, the END of the padding block will be used, pushed up against the start of the stream audio
|
|
// - at the end of the stream, the BEGINNING of the padding block will be used, pushed right up against the end of the stream audio
|
|
//
|
|
// for best results when looping, supply 2048 samples, or the entire stream if the stream is shorter than 2048 samples.
|
|
// the above logic should do the right thing if the entire stream is very small, even smaller than a block
|
|
//
|
|
// In theory the logic should be something like this (assuming the stream is > 2048 samples):
|
|
// if (offset < 1024)
|
|
// point padding at end of stream
|
|
// if (offset > length-2048)
|
|
// point padding at beginning of stream
|
|
//
|
|
// In theory the padding data should only be needed in the first block and the last two blocks, but
|
|
// in debugging I actually saw the data accessed in the last 3 blocks. I didn't investigate since the
|
|
// output was correct, but I suspect when this happened it was on short/long block transitions in which
|
|
// the data in the third-to-last-block was used but windowed to 0 anyway, so it wouldn't matter if it
|
|
// wasn't available. Also because it requires a short/long transition the above logic should cover the
|
|
// case anyway. But maybe I'm misunderstanding and the end logic might need to be triggered further from the end--stb
|
|
} radaudio_encode_info;
|
|
|
|
RADDEFFUNC int radaudio_encode_block_ext(radaudio_encoder *es,
|
|
float *input,
|
|
size_t input_len, // in samples (stereo pairs count as one)
|
|
size_t *offset , // in samples (stereo pairs count as one)
|
|
unsigned char *encode_buffer, // recommend MAX_ENCODED_BLOCK_SIZE
|
|
size_t encode_buffer_size,
|
|
radaudio_encode_info *info);
|
|
|
|
// determine the preferred blocktype for the first block in the stream;
|
|
// pass this in as radaudio_encode_info.force_first_blocktype.
|
|
// you can specify a different value than this function returns by overriding radaudio_encode_info,
|
|
// but specifying long when short is requested will decrease quality, and
|
|
// specifying short when long is requested will increase rate
|
|
RADDEFFUNC radaudio_blocktype radaudio_determine_preferred_first_block_length(radaudio_encoder *rae,
|
|
F32 *input,
|
|
size_t input_len);
|
|
|
|
// determine the preferred blocktype for the next block in the stream;
|
|
// this is stateful; it assumes `offset` points into the middle of the block to encode, as described for encode_block
|
|
// you can specify a different value than this function returns by overriding radaudio_encode_info,
|
|
// but specifying long when short is requested will decrease quality, and
|
|
// specifying short when long is requested will increase rate
|
|
RADDEFFUNC int radaudio_determine_preferred_next_block_length(radaudio_encoder *rae,
|
|
radaudio_blocktype firsttype,
|
|
F32 *input,
|
|
size_t input_len,
|
|
size_t offset);
|
|
|
|
|
|
RADDEFEND
|
|
|
|
#endif//INCLUDE_RADAUDIO_ENCODER_H
|