// Copyright Epic Games Tools, LLC. All Rights Reserved. #ifndef INCLUDE_RADAUDIO_ENCODER_H #define INCLUDE_RADAUDIO_ENCODER_H #include "egttypes.h" #include // for feedback to user. actual rate varies, and is sample rate dependent // the 0th value is support but should never be used; it allows you to hear what the native artifacts sound like static const int approximate_data_rate_for_quality_setting_in_kilobits[2][10] = { { 30, 36, 39, 43, 48, 55, 65, 85,125,205 }, // mono (@TODO: measure these, currently just ~half of stereo) { 55, 64, 72, 80, 90,100,120,160,240,400 }, // stereo }; // encode context typedef struct { // placeholder fields to make it large enough RAD_U64 dummy[200]; U8 buffer[5000]; } radaudio_encoder; #define RADAUDIOENC_AT_EOF 0 #define RADAUDIOENC_INSUFFICIENT_BUFFER -1 #define RADAUDIOENC_INTERNAL_ERROR -2 #define RADAUDIOENC_MAX_OUTPUT_SAMPLES_PER_CHANNEL_PER_CHUNK 1024 RADDEFSTART // RADAUDIO compatibility version - if these are the same the exports are named the same so we expect // that if the linker selects a different copy they all work. #define RADAUDIO_ENC_LIBRARY_VERSION 1 #ifndef RR_STRING_JOIN3 #define RR_STRING_JOIN3(arg1, arg2, arg3) RR_STRING_JOIN_DELAY3(arg1, arg2, arg3) #define RR_STRING_JOIN_DELAY3(arg1, arg2, arg3) RR_STRING_JOIN_IMMEDIATE3(arg1, arg2, arg3) #define RR_STRING_JOIN_IMMEDIATE3(arg1, arg2, arg3) arg1 ## arg2 ## arg3 #endif #ifdef RADAUDIO_WRAP #define RADAUDIO_ENC_NAME(name) RR_STRING_JOIN3(RADAUDIO_WRAP, name##_, RADAUDIO_ENC_LIBRARY_VERSION ) #else #define RADAUDIO_ENC_NAME(name) RR_STRING_JOIN( name##_, RADAUDIO_ENC_LIBRARY_VERSION ) #endif #define radaudio_encode_create RADAUDIO_ENC_NAME(radaudio_encode_create) #define radaudio_encode_block RADAUDIO_ENC_NAME(radaudio_encode_block) #define radaudio_encode_create_internal RADAUDIO_ENC_NAME(radaudio_encode_create_internal) #define RadAudioCompressGetProfileData RADAUDIO_ENC_NAME(RadAudioCompressGetProfileData) #define radaudio_determine_preferred_next_block_length RADAUDIO_ENC_NAME(radaudio_determine_preferred_next_block_length) #define radaudio_determine_preferred_first_block_length RADAUDIO_ENC_NAME(radaudio_determine_preferred_first_block_length) #define radaudio_encode_block_ext RADAUDIO_ENC_NAME(radaudio_encode_block_ext) // radaudio_encode_create() // // Creates a new RADaudio encoder. // // Pass in an uninitialized radaudio_encoder structure, space to output the stream header, // and then a description of the stream and the quality settings desired. // // - `num_channels` must be 1 or 2 // - `sample_rate` must be 48000, 44100, 32000, or 24000 // - if the automated quality measurement is to be trusted, then 32K and 24K encodings are // lower quality at same size than converting the files to 44.1K or 48K // (but ogg vorbis shows same behavior, so maybe automatic quality measurement is wrong for 32K and 24K) // - `quality` should be 1..9. // - larger values are higher quality and larger files // - 5 is recommended setting; it gives comparable results to vorbis 96 kbps // - going above 5 gives larger, higher quality files; could default to 6 if paranoid // - settings below 5 have not been tuned, as they are not expected to be "transparent" quality // - they are only provided for people who want to experiment and explore what it sounds like // - `header` is a buffer to store the header // - supply RADAUDIO_STREAM_HEADER_MAX // - the actual size of the header is returned // // returns size of header on success, otherwise returns 0 // // there is no destroy() function as the structure holds no other resources // just free the memory you passed in as necessary #define RADAUDIO_STREAM_HEADER_MAX 128 RADDEFFUNC size_t radaudio_encode_create( radaudio_encoder *rae, U8 header[RADAUDIO_STREAM_HEADER_MAX], int num_channels, int sample_rate, int quality, U32 flags); #define RADAUDIO_ENC_FLAG_improve_seamless_loop 1 // actually boosts precision of short block low-frequency coefficients // radaudio_encode_block() // // Generate a block of audio data in RADaudio format. // // Pass in a radaudio_encoder initialized by radaudio_encode_init. // // - `encode_buffer` : storage where the encoder will write compressed data // - `encode_buffer_max` : size of encode buffer // - `input` : an array of audio samples // - values from [-1..1] (can exceed this range, but will distort if you go too far) // - for stereo input, interleave the channels in a single array // - `input_len` : the number of mono samples or stereo sample pairs in the input // - `offset` : the current encoding offset within the input buffer (in mono samples or stereo pairs) // - initialize to 0 for the first call // - the encoder will encode samples in the range up to [offset-1024, offset+1024) // - the encoder will potentially look at samples up to [offset-1024, offset+2047) // - the encoder will update `*offset` to reflect the number of samples partially encoded // - samples up to `*offset` are only fully encoded after the NEXT encoder step // // returns: // n > 0 number of bytes of data output for this block // RADAUDIOENC_AT_EOF at end of stream, previously generated block is last block // RADAUDIOENC_INSUFFICIENT_BUFFER if output buffer isn't large enough // RADAUDIOENC_INTERNAL_ERROR an internal error occurred. this is probably a bug RADDEFFUNC int radaudio_encode_block(radaudio_encoder *es, float *input, size_t input_len, // in samples (stereo pairs count as one) size_t *offset , // in samples (stereo pairs count as one) unsigned char *encode_buffer, // recommend MAX_ENCODED_BLOCK_SIZE size_t encode_buffer_size); // In normal use, just load an audio file, convert to float, and pass the obvious // values to input / input_len, and allow the encoder to control the value of `*offset`. // // It is possible to use this API to stream input data by controlling // `input`, `input_len, and `*offset`. // // - you can freely manipulate the value of `*offset`. // - the encoder will look at samples i in [*offset-1024, *offset+1024) // - with the caveat that 0 <= i < input_len (samples outside this range are treated as 0, and reaching input_len ends the stream) // - The initial value of `*offset` must be 0. // // So, you could do something like: // // - Keep track of the actual offset within the file where `*offset` indexes; call this `fo`, set to 0. // - Iterate: // - set `*offset` to 1024, so [input..input+2048) is used, aka input[offset-1024, offset+1024) // - set `input_len` = subtract `fo` from the actual length of the file in mono samples or stereo pairs, add *offset // - load the file data from [fo-1024,fo+1024) into input[0..2048) // - encode a block // - update `fo` by the delta change in `*offset` // - note that the encoder will read the overlapping samples in each pair of successive blocks // - it is crucial that those samples be identical each time // - the naive approach, and the above strategy, both make sure this happens #define MAX_ENCODED_BLOCK_SIZE 5000 // 5000 plus slop due to forgetting to count stuff // upper bound on largest size; in reality, 400 Kbps is less than 2KB per block // // largest possible block: // max header size => 8 bytes // 48 largest-encoding band exponents@11b => 480 + 48 bits // 48 largest-encoding band mantissa @16b => 96 bytes // 112 largest-encoding subbands @11b => 1120 + 112 bits // 1536 run-length entries of 0 => 1536 bits // run-length end of stream code @7b => 7 bits // 512 1-bit coefficient locations w/huff => 576 bits // 1024 signals for 1-byte coefficients@11b => 10240 + 1024 bits // 2048 coefficients encoded 11 bits => 20480 + 2048 bits // 3 streams of padding => 3 bytes // 32320 + 5351 bits + 107 bytes // == 4816 bytes typedef enum { RADAUDIO_BLOCKTYPE_short = 1, RADAUDIO_BLOCKTYPE_long = 2, // radaudio_encode_block_ext only RADAUDIO_BLOCKTYPE_default = 0 // use transient detector like normal } radaudio_blocktype; typedef struct { radaudio_blocktype force_first_blocktype; // only used on first call radaudio_blocktype force_next_blocktype; // used on all calls // you can achieve seamless looping using the following fields: // use this as the data to pad the beginning and/or end of the stream with, instead of 0s F32 *padding; size_t padding_len; // in samples (stereo pairs count as one). Set to 0 to disable. // // At the beginning and end of the stream, based on offset going from 0..input_len, the above // data will be used to "pad" the audio stream instead of the 0 value used by default. // // - at the beginning of the stream, the END of the padding block will be used, pushed up against the start of the stream audio // - at the end of the stream, the BEGINNING of the padding block will be used, pushed right up against the end of the stream audio // // for best results when looping, supply 2048 samples, or the entire stream if the stream is shorter than 2048 samples. // the above logic should do the right thing if the entire stream is very small, even smaller than a block // // In theory the logic should be something like this (assuming the stream is > 2048 samples): // if (offset < 1024) // point padding at end of stream // if (offset > length-2048) // point padding at beginning of stream // // In theory the padding data should only be needed in the first block and the last two blocks, but // in debugging I actually saw the data accessed in the last 3 blocks. I didn't investigate since the // output was correct, but I suspect when this happened it was on short/long block transitions in which // the data in the third-to-last-block was used but windowed to 0 anyway, so it wouldn't matter if it // wasn't available. Also because it requires a short/long transition the above logic should cover the // case anyway. But maybe I'm misunderstanding and the end logic might need to be triggered further from the end--stb } radaudio_encode_info; RADDEFFUNC int radaudio_encode_block_ext(radaudio_encoder *es, float *input, size_t input_len, // in samples (stereo pairs count as one) size_t *offset , // in samples (stereo pairs count as one) unsigned char *encode_buffer, // recommend MAX_ENCODED_BLOCK_SIZE size_t encode_buffer_size, radaudio_encode_info *info); // determine the preferred blocktype for the first block in the stream; // pass this in as radaudio_encode_info.force_first_blocktype. // you can specify a different value than this function returns by overriding radaudio_encode_info, // but specifying long when short is requested will decrease quality, and // specifying short when long is requested will increase rate RADDEFFUNC radaudio_blocktype radaudio_determine_preferred_first_block_length(radaudio_encoder *rae, F32 *input, size_t input_len); // determine the preferred blocktype for the next block in the stream; // this is stateful; it assumes `offset` points into the middle of the block to encode, as described for encode_block // you can specify a different value than this function returns by overriding radaudio_encode_info, // but specifying long when short is requested will decrease quality, and // specifying short when long is requested will increase rate RADDEFFUNC int radaudio_determine_preferred_next_block_length(radaudio_encoder *rae, radaudio_blocktype firsttype, F32 *input, size_t input_len, size_t offset); RADDEFEND #endif//INCLUDE_RADAUDIO_ENCODER_H