230 lines
9.7 KiB
C++
230 lines
9.7 KiB
C++
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
#include "DSP/VectorLinearResampler.h"
|
|
|
|
//
|
|
// There's a lot here that's about making sure we never have
|
|
// to convert to float in the loop. So we keep to advancing
|
|
// states - the position in INT space, and the fraction in
|
|
// VECTOR space. The difficulty is we need to mask off the
|
|
// upper bits as we advance the fraction. The trickery is
|
|
// to do this in the actual IEEE float representation. Since
|
|
// the most the step can advance is by 65535.0f / 65536.0f,
|
|
// we can place the addition operation in an area of the float
|
|
// space that prevents the exponent from changing in response
|
|
// to that addition. In this case, [2, 3). Addition can only
|
|
// ever change the highest mantissa bit, so we can just directly
|
|
// mask off that single bit (0xffbfffff).
|
|
//
|
|
// To do this, we have to take advantage of the mod math identity:
|
|
// (a + b) mod c == ((a mod c) + (b mod c)) mod c.
|
|
// so we precompute the frac steps, all modded off (0xffff), and the
|
|
// step advance vector, also modded off, then scale them all down to [0, 1).
|
|
//
|
|
// Now, getting the lerp fraction is an AND, and a SUB to get from [2,3) to
|
|
// [0, 1).
|
|
//
|
|
// On top of that, we use 64 bit loads to get the adjacent samples
|
|
// for lerping.
|
|
//
|
|
uint32 Audio::FVectorLinearResampler::ResampleMono(uint32 OutputFramesNeeded, uint32 FixedPointSampleRate, float const* SourceFrames, float* OutputFrames)
|
|
{
|
|
// we CANT mask off the upper bits, as it's possible the fraction could
|
|
// jump an entire sample and we need to retain that offset.
|
|
const uint32 FarthestPositionFixed = CurrentFrameFraction + (OutputFramesNeeded - 1) * FixedPointSampleRate;
|
|
const uint32 NextRunStartPositionFixed = FarthestPositionFixed + FixedPointSampleRate;
|
|
|
|
float const* LeftSamples = SourceFrames;
|
|
uint32 OutputFrame = 0;
|
|
uint32 CurrentFrameFixed = CurrentFrameFraction;
|
|
|
|
#if (PLATFORM_ENABLE_VECTORINTRINSICS || PLATFORM_ENABLE_VECTORINTRINSICS_NEON) // SIMD path.
|
|
{
|
|
|
|
float CurrentFrameFractionFloat = (float)(CurrentFrameFraction & 0xffff);
|
|
|
|
VectorRegister4Float ScaleVec = VectorSetFloat1(1.0f / 65536.0f);
|
|
VectorRegister4Float TwoVec = VectorSetFloat1(2.0f);
|
|
VectorRegister4Float OneVec = VectorSetFloat1(1.0f);
|
|
VectorRegister4Float ThreeVec = VectorSetFloat1(3.0f);
|
|
|
|
VectorRegister4Float CurFracStepVec = VectorSet(
|
|
(float)((0 * FixedPointSampleRate) & 0xffff),
|
|
(float)((1 * FixedPointSampleRate) & 0xffff),
|
|
(float)((2 * FixedPointSampleRate) & 0xffff),
|
|
(float)((3 * FixedPointSampleRate) & 0xffff));
|
|
|
|
VectorRegister4Float CurFracVec = VectorSetFloat1(CurrentFrameFractionFloat);
|
|
CurFracVec = VectorAdd(CurFracVec, CurFracStepVec);
|
|
CurFracVec = VectorMultiply(CurFracVec, ScaleVec);
|
|
CurFracVec = VectorAdd(CurFracVec, TwoVec);
|
|
|
|
VectorRegister4Float MaskVec = MakeVectorRegisterFloat(0xffbfffff, 0xffbfffff, 0xffbfffff, 0xffbfffff);
|
|
|
|
VectorRegister4Float StepVec = VectorSetFloat1((float)((FixedPointSampleRate * 4) & 0xffff));
|
|
StepVec = VectorMultiply(StepVec, ScaleVec);
|
|
|
|
uint32 OutputFramesNeededSIMD = OutputFramesNeeded & ~3;
|
|
|
|
for (; OutputFrame < OutputFramesNeededSIMD; OutputFrame += 4)
|
|
{
|
|
uint32 SourceOffsets[4] = {
|
|
CurrentFrameFixed >> 16,
|
|
(CurrentFrameFixed + FixedPointSampleRate) >> 16,
|
|
(CurrentFrameFixed + FixedPointSampleRate + FixedPointSampleRate) >> 16,
|
|
(CurrentFrameFixed + FixedPointSampleRate + FixedPointSampleRate + FixedPointSampleRate) >> 16
|
|
};
|
|
|
|
// [0, 0+1, 1, 1+1]
|
|
VectorRegister4Float LeftSamples01 = VectorLoadTwoPairsFloat(LeftSamples + SourceOffsets[0], LeftSamples + SourceOffsets[1]);
|
|
// [2, 2+1, 3, 3+1]
|
|
VectorRegister4Float LeftSamples23 = VectorLoadTwoPairsFloat(LeftSamples + SourceOffsets[2], LeftSamples + SourceOffsets[3]);
|
|
|
|
// [0, 1, 2, 3]
|
|
// [0+1, 1+1, 2+1, 3+1]
|
|
VectorRegister4Float LeftSamplesFrom, LeftSamplesTo;
|
|
VectorDeinterleave(LeftSamplesFrom, LeftSamplesTo, LeftSamples01, LeftSamples23);
|
|
|
|
// our lerp vector is CurFracVec, masking off the 1 bit in the mantissa, subtract 2.
|
|
CurFracVec = VectorBitwiseAnd(CurFracVec, MaskVec);
|
|
VectorRegister4Float LerpFactor = VectorSubtract(CurFracVec, TwoVec);
|
|
VectorRegister4Float OneMinusLerpFactor = VectorSubtract(ThreeVec, CurFracVec);
|
|
|
|
CurFracVec = VectorAdd(CurFracVec, StepVec);
|
|
VectorRegister4Float OutputVec = VectorAdd(VectorMultiply(LeftSamplesFrom, OneMinusLerpFactor), VectorMultiply(LeftSamplesTo, LerpFactor));
|
|
|
|
VectorStore(OutputVec, OutputFrames);
|
|
|
|
CurrentFrameFixed += 4 * FixedPointSampleRate;
|
|
OutputFrames += 4;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// Remnants
|
|
for (; OutputFrame < OutputFramesNeeded; OutputFrame++)
|
|
{
|
|
uint32 SourceOffset = CurrentFrameFixed >> 16;
|
|
uint32 CurFrac = CurrentFrameFixed & 0xffff;
|
|
|
|
float LerpFactor = (float)(CurFrac) * (1.0f / 65536.0f);
|
|
|
|
CurrentFrameFixed += FixedPointSampleRate;
|
|
|
|
float Sample1 = LeftSamples[SourceOffset];
|
|
float Sample2 = LeftSamples[SourceOffset + 1];
|
|
OutputFrames[0] = Sample1 * (1 - LerpFactor) + (LerpFactor)* Sample2;
|
|
|
|
OutputFrames++;
|
|
}
|
|
CurrentFrameFraction = NextRunStartPositionFixed - (FarthestPositionFixed & ~0xffff);
|
|
|
|
return FarthestPositionFixed >> 16;
|
|
}
|
|
|
|
uint32 Audio::FVectorLinearResampler::ResampleStereo(uint32 OutputFramesNeeded, uint32 FixedPointSampleRate, float const* SourceFrames, uint32 SourceFramesStrideFloats, float* OutputFrames, uint32 OutputFramesStrideFloats)
|
|
{
|
|
// we CANT mask off the upper bits, as it's possible the fraction could
|
|
// jump an entire sample and we need to retain that offset.
|
|
const uint32 FarthestPositionFixed = CurrentFrameFraction + (OutputFramesNeeded - 1) * FixedPointSampleRate;
|
|
const uint32 NextRunStartPositionFixed = FarthestPositionFixed + FixedPointSampleRate;
|
|
|
|
float const* LeftSamples = SourceFrames;
|
|
float const* RightSamples = SourceFrames + SourceFramesStrideFloats;
|
|
uint32 OutputFrame = 0;
|
|
uint32 CurrentFrameFixed = CurrentFrameFraction;
|
|
|
|
#if (PLATFORM_ENABLE_VECTORINTRINSICS || PLATFORM_ENABLE_VECTORINTRINSICS_NEON) // SIMD path.
|
|
{
|
|
float CurrentFrameFractionFloat = (float)(CurrentFrameFraction & 0xffff);
|
|
|
|
VectorRegister4Float ScaleVec = VectorSetFloat1(1.0f / 65536.0f);
|
|
VectorRegister4Float TwoVec = VectorSetFloat1(2.0f);
|
|
VectorRegister4Float OneVec = VectorSetFloat1(1.0f);
|
|
VectorRegister4Float ThreeVec = VectorSetFloat1(3.0f);
|
|
|
|
VectorRegister4Float CurFracStepVec = VectorSet(
|
|
(float)((0 * FixedPointSampleRate) & 0xffff),
|
|
(float)((1 * FixedPointSampleRate) & 0xffff),
|
|
(float)((2 * FixedPointSampleRate) & 0xffff),
|
|
(float)((3 * FixedPointSampleRate) & 0xffff));
|
|
|
|
VectorRegister4Float CurFracVec = VectorSetFloat1(CurrentFrameFractionFloat);
|
|
CurFracVec = VectorAdd(CurFracVec, CurFracStepVec);
|
|
CurFracVec = VectorMultiply(CurFracVec, ScaleVec);
|
|
CurFracVec = VectorAdd(CurFracVec, TwoVec);
|
|
|
|
VectorRegister4Float MaskVec = MakeVectorRegisterFloat(0xffbfffff, 0xffbfffff, 0xffbfffff, 0xffbfffff);
|
|
|
|
VectorRegister4Float StepVec = VectorSetFloat1((float)((FixedPointSampleRate * 4) & 0xffff));
|
|
StepVec = VectorMultiply(StepVec, ScaleVec);
|
|
|
|
uint32 OutputFramesNeededSIMD = OutputFramesNeeded & ~3;
|
|
|
|
for (; OutputFrame < OutputFramesNeededSIMD; OutputFrame += 4)
|
|
{
|
|
const uint32 SourceOffsets[4] = {
|
|
CurrentFrameFixed >> 16,
|
|
(CurrentFrameFixed + FixedPointSampleRate) >> 16,
|
|
(CurrentFrameFixed + FixedPointSampleRate + FixedPointSampleRate) >> 16,
|
|
(CurrentFrameFixed + FixedPointSampleRate + FixedPointSampleRate + FixedPointSampleRate) >> 16
|
|
};
|
|
|
|
// [0, 0+1, 1, 1+1]
|
|
VectorRegister4Float LeftSamples01 = VectorLoadTwoPairsFloat(LeftSamples + SourceOffsets[0], LeftSamples + SourceOffsets[1]);
|
|
VectorRegister4Float RightSamples01 = VectorLoadTwoPairsFloat(RightSamples + SourceOffsets[0], RightSamples + SourceOffsets[1]);
|
|
|
|
// [2, 2+1, 3, 3+1]
|
|
VectorRegister4Float LeftSamples23 = VectorLoadTwoPairsFloat(LeftSamples + SourceOffsets[2], LeftSamples + SourceOffsets[3]);
|
|
VectorRegister4Float RightSamples23 = VectorLoadTwoPairsFloat(RightSamples + SourceOffsets[2], RightSamples + SourceOffsets[3]);
|
|
|
|
// want [0, 1, 2, 3]
|
|
// [0+1, 1+1, 2+1, 3+1]
|
|
VectorRegister4Float LeftSamplesFrom, LeftSamplesTo;
|
|
VectorDeinterleave(LeftSamplesFrom, LeftSamplesTo, LeftSamples01, LeftSamples23);
|
|
VectorRegister4Float RightSamplesFrom, RightSamplesTo;
|
|
VectorDeinterleave(RightSamplesFrom, RightSamplesTo, RightSamples01, RightSamples23);
|
|
|
|
// our lerp vector is cur_frac_vec, masking off the 1 bit in the mantissa, subtract 2.
|
|
CurFracVec = VectorBitwiseAnd(CurFracVec, MaskVec);
|
|
VectorRegister4Float LerpFactor = VectorSubtract(CurFracVec, TwoVec);
|
|
VectorRegister4Float OneMinusLerpFactor = VectorSubtract(ThreeVec, CurFracVec);
|
|
|
|
CurFracVec = VectorAdd(CurFracVec, StepVec);
|
|
|
|
VectorRegister4Float LeftOutputVec = VectorAdd(VectorMultiply(LeftSamplesFrom, OneMinusLerpFactor), VectorMultiply(LeftSamplesTo, LerpFactor));
|
|
VectorRegister4Float RightOutputVec = VectorAdd(VectorMultiply(RightSamplesFrom, OneMinusLerpFactor), VectorMultiply(RightSamplesTo, LerpFactor));
|
|
|
|
VectorStore(LeftOutputVec, OutputFrames);
|
|
VectorStore(RightOutputVec, OutputFrames + OutputFramesStrideFloats);
|
|
|
|
CurrentFrameFixed += 4 * FixedPointSampleRate;
|
|
OutputFrames += 4;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
for (; OutputFrame < OutputFramesNeeded; OutputFrame++)
|
|
{
|
|
uint32 SourceOffset = CurrentFrameFixed >> 16;
|
|
uint32 CurFrac = CurrentFrameFixed & 0xffff;
|
|
|
|
float LerpFactor = (float)(CurFrac) * (1.0f / 65536.0f);
|
|
|
|
CurrentFrameFixed += FixedPointSampleRate;
|
|
|
|
float Sample1 = LeftSamples[SourceOffset];
|
|
float Sample2 = LeftSamples[SourceOffset + 1];
|
|
OutputFrames[0] = Sample1 * (1 - LerpFactor) + (LerpFactor)* Sample2;
|
|
|
|
Sample1 = RightSamples[SourceOffset];
|
|
Sample2 = RightSamples[SourceOffset + 1];
|
|
OutputFrames[OutputFramesStrideFloats] = Sample1 * (1 - LerpFactor) + (LerpFactor)* Sample2;
|
|
|
|
OutputFrames++;
|
|
}
|
|
CurrentFrameFraction = NextRunStartPositionFixed - (FarthestPositionFixed & ~0xffff);
|
|
|
|
return FarthestPositionFixed >> 16;
|
|
}
|