Files
UnrealEngine/Engine/Source/Runtime/SignalProcessing/Private/VectorLinearResampler.cpp
2025-05-18 13:04:45 +08:00

230 lines
9.7 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
#include "DSP/VectorLinearResampler.h"
//
// There's a lot here that's about making sure we never have
// to convert to float in the loop. So we keep to advancing
// states - the position in INT space, and the fraction in
// VECTOR space. The difficulty is we need to mask off the
// upper bits as we advance the fraction. The trickery is
// to do this in the actual IEEE float representation. Since
// the most the step can advance is by 65535.0f / 65536.0f,
// we can place the addition operation in an area of the float
// space that prevents the exponent from changing in response
// to that addition. In this case, [2, 3). Addition can only
// ever change the highest mantissa bit, so we can just directly
// mask off that single bit (0xffbfffff).
//
// To do this, we have to take advantage of the mod math identity:
// (a + b) mod c == ((a mod c) + (b mod c)) mod c.
// so we precompute the frac steps, all modded off (0xffff), and the
// step advance vector, also modded off, then scale them all down to [0, 1).
//
// Now, getting the lerp fraction is an AND, and a SUB to get from [2,3) to
// [0, 1).
//
// On top of that, we use 64 bit loads to get the adjacent samples
// for lerping.
//
uint32 Audio::FVectorLinearResampler::ResampleMono(uint32 OutputFramesNeeded, uint32 FixedPointSampleRate, float const* SourceFrames, float* OutputFrames)
{
// we CANT mask off the upper bits, as it's possible the fraction could
// jump an entire sample and we need to retain that offset.
const uint32 FarthestPositionFixed = CurrentFrameFraction + (OutputFramesNeeded - 1) * FixedPointSampleRate;
const uint32 NextRunStartPositionFixed = FarthestPositionFixed + FixedPointSampleRate;
float const* LeftSamples = SourceFrames;
uint32 OutputFrame = 0;
uint32 CurrentFrameFixed = CurrentFrameFraction;
#if (PLATFORM_ENABLE_VECTORINTRINSICS || PLATFORM_ENABLE_VECTORINTRINSICS_NEON) // SIMD path.
{
float CurrentFrameFractionFloat = (float)(CurrentFrameFraction & 0xffff);
VectorRegister4Float ScaleVec = VectorSetFloat1(1.0f / 65536.0f);
VectorRegister4Float TwoVec = VectorSetFloat1(2.0f);
VectorRegister4Float OneVec = VectorSetFloat1(1.0f);
VectorRegister4Float ThreeVec = VectorSetFloat1(3.0f);
VectorRegister4Float CurFracStepVec = VectorSet(
(float)((0 * FixedPointSampleRate) & 0xffff),
(float)((1 * FixedPointSampleRate) & 0xffff),
(float)((2 * FixedPointSampleRate) & 0xffff),
(float)((3 * FixedPointSampleRate) & 0xffff));
VectorRegister4Float CurFracVec = VectorSetFloat1(CurrentFrameFractionFloat);
CurFracVec = VectorAdd(CurFracVec, CurFracStepVec);
CurFracVec = VectorMultiply(CurFracVec, ScaleVec);
CurFracVec = VectorAdd(CurFracVec, TwoVec);
VectorRegister4Float MaskVec = MakeVectorRegisterFloat(0xffbfffff, 0xffbfffff, 0xffbfffff, 0xffbfffff);
VectorRegister4Float StepVec = VectorSetFloat1((float)((FixedPointSampleRate * 4) & 0xffff));
StepVec = VectorMultiply(StepVec, ScaleVec);
uint32 OutputFramesNeededSIMD = OutputFramesNeeded & ~3;
for (; OutputFrame < OutputFramesNeededSIMD; OutputFrame += 4)
{
uint32 SourceOffsets[4] = {
CurrentFrameFixed >> 16,
(CurrentFrameFixed + FixedPointSampleRate) >> 16,
(CurrentFrameFixed + FixedPointSampleRate + FixedPointSampleRate) >> 16,
(CurrentFrameFixed + FixedPointSampleRate + FixedPointSampleRate + FixedPointSampleRate) >> 16
};
// [0, 0+1, 1, 1+1]
VectorRegister4Float LeftSamples01 = VectorLoadTwoPairsFloat(LeftSamples + SourceOffsets[0], LeftSamples + SourceOffsets[1]);
// [2, 2+1, 3, 3+1]
VectorRegister4Float LeftSamples23 = VectorLoadTwoPairsFloat(LeftSamples + SourceOffsets[2], LeftSamples + SourceOffsets[3]);
// [0, 1, 2, 3]
// [0+1, 1+1, 2+1, 3+1]
VectorRegister4Float LeftSamplesFrom, LeftSamplesTo;
VectorDeinterleave(LeftSamplesFrom, LeftSamplesTo, LeftSamples01, LeftSamples23);
// our lerp vector is CurFracVec, masking off the 1 bit in the mantissa, subtract 2.
CurFracVec = VectorBitwiseAnd(CurFracVec, MaskVec);
VectorRegister4Float LerpFactor = VectorSubtract(CurFracVec, TwoVec);
VectorRegister4Float OneMinusLerpFactor = VectorSubtract(ThreeVec, CurFracVec);
CurFracVec = VectorAdd(CurFracVec, StepVec);
VectorRegister4Float OutputVec = VectorAdd(VectorMultiply(LeftSamplesFrom, OneMinusLerpFactor), VectorMultiply(LeftSamplesTo, LerpFactor));
VectorStore(OutputVec, OutputFrames);
CurrentFrameFixed += 4 * FixedPointSampleRate;
OutputFrames += 4;
}
}
#endif
// Remnants
for (; OutputFrame < OutputFramesNeeded; OutputFrame++)
{
uint32 SourceOffset = CurrentFrameFixed >> 16;
uint32 CurFrac = CurrentFrameFixed & 0xffff;
float LerpFactor = (float)(CurFrac) * (1.0f / 65536.0f);
CurrentFrameFixed += FixedPointSampleRate;
float Sample1 = LeftSamples[SourceOffset];
float Sample2 = LeftSamples[SourceOffset + 1];
OutputFrames[0] = Sample1 * (1 - LerpFactor) + (LerpFactor)* Sample2;
OutputFrames++;
}
CurrentFrameFraction = NextRunStartPositionFixed - (FarthestPositionFixed & ~0xffff);
return FarthestPositionFixed >> 16;
}
uint32 Audio::FVectorLinearResampler::ResampleStereo(uint32 OutputFramesNeeded, uint32 FixedPointSampleRate, float const* SourceFrames, uint32 SourceFramesStrideFloats, float* OutputFrames, uint32 OutputFramesStrideFloats)
{
// we CANT mask off the upper bits, as it's possible the fraction could
// jump an entire sample and we need to retain that offset.
const uint32 FarthestPositionFixed = CurrentFrameFraction + (OutputFramesNeeded - 1) * FixedPointSampleRate;
const uint32 NextRunStartPositionFixed = FarthestPositionFixed + FixedPointSampleRate;
float const* LeftSamples = SourceFrames;
float const* RightSamples = SourceFrames + SourceFramesStrideFloats;
uint32 OutputFrame = 0;
uint32 CurrentFrameFixed = CurrentFrameFraction;
#if (PLATFORM_ENABLE_VECTORINTRINSICS || PLATFORM_ENABLE_VECTORINTRINSICS_NEON) // SIMD path.
{
float CurrentFrameFractionFloat = (float)(CurrentFrameFraction & 0xffff);
VectorRegister4Float ScaleVec = VectorSetFloat1(1.0f / 65536.0f);
VectorRegister4Float TwoVec = VectorSetFloat1(2.0f);
VectorRegister4Float OneVec = VectorSetFloat1(1.0f);
VectorRegister4Float ThreeVec = VectorSetFloat1(3.0f);
VectorRegister4Float CurFracStepVec = VectorSet(
(float)((0 * FixedPointSampleRate) & 0xffff),
(float)((1 * FixedPointSampleRate) & 0xffff),
(float)((2 * FixedPointSampleRate) & 0xffff),
(float)((3 * FixedPointSampleRate) & 0xffff));
VectorRegister4Float CurFracVec = VectorSetFloat1(CurrentFrameFractionFloat);
CurFracVec = VectorAdd(CurFracVec, CurFracStepVec);
CurFracVec = VectorMultiply(CurFracVec, ScaleVec);
CurFracVec = VectorAdd(CurFracVec, TwoVec);
VectorRegister4Float MaskVec = MakeVectorRegisterFloat(0xffbfffff, 0xffbfffff, 0xffbfffff, 0xffbfffff);
VectorRegister4Float StepVec = VectorSetFloat1((float)((FixedPointSampleRate * 4) & 0xffff));
StepVec = VectorMultiply(StepVec, ScaleVec);
uint32 OutputFramesNeededSIMD = OutputFramesNeeded & ~3;
for (; OutputFrame < OutputFramesNeededSIMD; OutputFrame += 4)
{
const uint32 SourceOffsets[4] = {
CurrentFrameFixed >> 16,
(CurrentFrameFixed + FixedPointSampleRate) >> 16,
(CurrentFrameFixed + FixedPointSampleRate + FixedPointSampleRate) >> 16,
(CurrentFrameFixed + FixedPointSampleRate + FixedPointSampleRate + FixedPointSampleRate) >> 16
};
// [0, 0+1, 1, 1+1]
VectorRegister4Float LeftSamples01 = VectorLoadTwoPairsFloat(LeftSamples + SourceOffsets[0], LeftSamples + SourceOffsets[1]);
VectorRegister4Float RightSamples01 = VectorLoadTwoPairsFloat(RightSamples + SourceOffsets[0], RightSamples + SourceOffsets[1]);
// [2, 2+1, 3, 3+1]
VectorRegister4Float LeftSamples23 = VectorLoadTwoPairsFloat(LeftSamples + SourceOffsets[2], LeftSamples + SourceOffsets[3]);
VectorRegister4Float RightSamples23 = VectorLoadTwoPairsFloat(RightSamples + SourceOffsets[2], RightSamples + SourceOffsets[3]);
// want [0, 1, 2, 3]
// [0+1, 1+1, 2+1, 3+1]
VectorRegister4Float LeftSamplesFrom, LeftSamplesTo;
VectorDeinterleave(LeftSamplesFrom, LeftSamplesTo, LeftSamples01, LeftSamples23);
VectorRegister4Float RightSamplesFrom, RightSamplesTo;
VectorDeinterleave(RightSamplesFrom, RightSamplesTo, RightSamples01, RightSamples23);
// our lerp vector is cur_frac_vec, masking off the 1 bit in the mantissa, subtract 2.
CurFracVec = VectorBitwiseAnd(CurFracVec, MaskVec);
VectorRegister4Float LerpFactor = VectorSubtract(CurFracVec, TwoVec);
VectorRegister4Float OneMinusLerpFactor = VectorSubtract(ThreeVec, CurFracVec);
CurFracVec = VectorAdd(CurFracVec, StepVec);
VectorRegister4Float LeftOutputVec = VectorAdd(VectorMultiply(LeftSamplesFrom, OneMinusLerpFactor), VectorMultiply(LeftSamplesTo, LerpFactor));
VectorRegister4Float RightOutputVec = VectorAdd(VectorMultiply(RightSamplesFrom, OneMinusLerpFactor), VectorMultiply(RightSamplesTo, LerpFactor));
VectorStore(LeftOutputVec, OutputFrames);
VectorStore(RightOutputVec, OutputFrames + OutputFramesStrideFloats);
CurrentFrameFixed += 4 * FixedPointSampleRate;
OutputFrames += 4;
}
}
#endif
for (; OutputFrame < OutputFramesNeeded; OutputFrame++)
{
uint32 SourceOffset = CurrentFrameFixed >> 16;
uint32 CurFrac = CurrentFrameFixed & 0xffff;
float LerpFactor = (float)(CurFrac) * (1.0f / 65536.0f);
CurrentFrameFixed += FixedPointSampleRate;
float Sample1 = LeftSamples[SourceOffset];
float Sample2 = LeftSamples[SourceOffset + 1];
OutputFrames[0] = Sample1 * (1 - LerpFactor) + (LerpFactor)* Sample2;
Sample1 = RightSamples[SourceOffset];
Sample2 = RightSamples[SourceOffset + 1];
OutputFrames[OutputFramesStrideFloats] = Sample1 * (1 - LerpFactor) + (LerpFactor)* Sample2;
OutputFrames++;
}
CurrentFrameFraction = NextRunStartPositionFixed - (FarthestPositionFixed & ~0xffff);
return FarthestPositionFixed >> 16;
}