// Copyright Epic Games, Inc. All Rights Reserved. #include "Speech2Face.h" #include "Misc/AutomationTest.h" #include "Interfaces/IPluginManager.h" #include "UObject/Package.h" #include "Misc/FileHelper.h" #include "Serialization/JsonSerializer.h" #include "Misc/Paths.h" #if WITH_DEV_AUTOMATION_TESTS && WITH_EDITOR BEGIN_DEFINE_SPEC(TSpeech2FaceTest, "Speech2Face", EAutomationTestFlags::EditorContext | EAutomationTestFlags::ProductFilter | EAutomationTestFlags::MediumPriority) static constexpr float DefaultTolerance = 0.01f; FString TestDataDir; FString TestAssetDir; bool ProcessAudio(const FString& InAssetName, uint32 InExpectedFrameCount, bool bGenerateBlinks = false, bool bInMixChannels = false, uint32 InAudioChannelIndex = 0, float InOutputAnimationFps = 50.0f); bool ProcessAudioAndCompareOutput(const FString& InAssetName, const FString& InValidationDataJson, bool bGenerateBlinks = false, bool bInMixChannels = false, uint32 InAudioChannelIndex = 0, float InOutputAnimationFps = 50.0f, float InTolerance = DefaultTolerance); bool ProcessAudioInternal(const FString& InAssetName, bool bGenerateBlinks, bool bInMixChannels, uint32 InAudioChannelIndex, float InOutputAnimationFps, TArray& OutAnimation, TArray& OutHeadAnimation); void GenerateValidationData(const FString& InPathToJson, const TArray& InValidationData); bool LoadValidationDataFromJsonFile(const FString& InPathToJson, TArray& OutValidationData); bool IsAnimationDataEqualIsh(const TArray& InLeft, const TArray& InRight, float InTolerance); bool IsAnimationFrameEqualIsh(int32 InFrame, const FSpeech2Face::FAnimationFrame& InLeft, const FSpeech2Face::FAnimationFrame& InRight, float InTolerance); END_DEFINE_SPEC(TSpeech2FaceTest) bool TSpeech2FaceTest::ProcessAudio(const FString& InAssetName, uint32 InExpectedFrameCount, bool bGenerateBlinks, bool bInMixChannels, uint32 InAudioChannelIndex, float InOutputAnimationFps) { TArray Animation; TArray HeadAnimation; bool bIsSuccess = ProcessAudioInternal(InAssetName, bGenerateBlinks, bInMixChannels, InAudioChannelIndex, InOutputAnimationFps, Animation, HeadAnimation); if(!bIsSuccess) { return false; } UTEST_EQUAL(TEXT("Correct number of frames was generated for the face"), Animation.Num(), InExpectedFrameCount); UTEST_EQUAL(TEXT("Correct number of frames was generated for the head"), HeadAnimation.Num(), InExpectedFrameCount); return true; } bool TSpeech2FaceTest::ProcessAudioAndCompareOutput(const FString& InAssetName, const FString& InValidationDataJson, bool bGenerateBlinks, bool bInMixChannels, uint32 InAudioChannelIndex, float InOutputAnimationFps, float InTolerance) { TArray Animation; TArray UnusedHeadAnimation; bool bIsSuccess = ProcessAudioInternal(InAssetName, bGenerateBlinks, bInMixChannels, InAudioChannelIndex, InOutputAnimationFps, Animation, UnusedHeadAnimation); if (!bIsSuccess) { return false; } FString ValidationJsonPath = TestDataDir + InValidationDataJson; TArray ValidationData; bIsSuccess = LoadValidationDataFromJsonFile(ValidationJsonPath, ValidationData); UTEST_TRUE(TEXT("Validation data loaded successfully"), bIsSuccess); UTEST_TRUE(TEXT("Produced animation matches validation data with tolerance of ") + FString::Format(TEXT("{0}"), { InTolerance }), IsAnimationDataEqualIsh(ValidationData, Animation, InTolerance)); return true; } bool TSpeech2FaceTest::ProcessAudioInternal(const FString& InAssetName, bool bGenerateBlinks, bool bInMixChannels, uint32 InAudioChannelIndex, float InOutputAnimationFps, TArray& OutAnimation, TArray& OutHeadAnimation) { TUniquePtr Speech2Face = FSpeech2Face::Create(); UTEST_TRUE(TEXT("FSpeech2Face::Create succeeded"), Speech2Face != nullptr); FString AssetPath = TestAssetDir + InAssetName; USoundWave* SoundSample = LoadObject(GetTransientPackage(), AssetPath.GetCharArray().GetData()); UTEST_TRUE(TEXT("Test audio asset loaded successfully"), SoundSample != nullptr); FSpeech2Face::FAudioParams AudioParams(SoundSample, FSpeech2Face::AudioEncoderWarmUpSec, bInMixChannels, InAudioChannelIndex); bool bIsSuccess = Speech2Face->GenerateFaceAnimation(AudioParams, InOutputAnimationFps, bGenerateBlinks, [](){return false;}, OutAnimation, OutHeadAnimation); UTEST_TRUE(TEXT("FSpeech2Face::GenerateFaceAnimation succeeded"), bIsSuccess); return true; } void TSpeech2FaceTest::GenerateValidationData(const FString& InPathToJson, const TArray& InValidationData) { TSharedRef RootJsonObject = MakeShareable(new FJsonObject()); TSharedRef SequenceJsonObject = MakeShareable(new FJsonObject()); RootJsonObject->SetObjectField("sequence", SequenceJsonObject); for (int32 FrameIndex = 0; FrameIndex < InValidationData.Num(); ++FrameIndex) { const FSpeech2Face::FAnimationFrame& FrameData = InValidationData[FrameIndex]; TSharedRef FrameJsonObject = MakeShareable(new FJsonObject()); SequenceJsonObject->SetObjectField(FString::FromInt(FrameIndex), FrameJsonObject); for (const TPair& ControlValue : FrameData) { FrameJsonObject->SetNumberField(ControlValue.Key, ControlValue.Value); } } FString JsonString; TSharedRef> JsonWriter = TJsonWriterFactory<>::Create(&JsonString, 0); FJsonSerializer::Serialize(RootJsonObject, JsonWriter, true); FFileHelper::SaveStringToFile(JsonString, *InPathToJson); } bool TSpeech2FaceTest::LoadValidationDataFromJsonFile(const FString& InPathToJson, TArray& OutValidationData) { FString TestDataString; bool bIsSuccess = FFileHelper::LoadFileToString(TestDataString, InPathToJson.GetCharArray().GetData()); UTEST_TRUE(TEXT("Loading JSON validation file succeeded"), bIsSuccess); TSharedPtr TestDataJson; bIsSuccess = FJsonSerializer::Deserialize(TJsonReaderFactory<>::Create(TestDataString), TestDataJson); UTEST_TRUE(TEXT("Parsing JSON validation file succeeded"), bIsSuccess); TSharedPtr SequenceObject = TestDataJson->GetObjectField(TEXT("sequence")); UTEST_TRUE(TEXT("Find 'sequence' object in JSON validation file"), SequenceObject != nullptr); int32 FrameCountJson = SequenceObject->Values.Num(); OutValidationData.Reset(FrameCountJson); for (int32 I = 0; I < FrameCountJson; ++I) { TSharedPtr FrameObject = SequenceObject->GetObjectField(FString::FromInt(I)); TArray FrameObjectKeys; FrameObject->Values.GetKeys(FrameObjectKeys); FSpeech2Face::FAnimationFrame AnimFrame; for (const FString& Key : FrameObjectKeys) { AnimFrame.Add(Key, static_cast(FrameObject->GetNumberField(Key))); } OutValidationData.Emplace(MoveTemp(AnimFrame)); } return true; } bool TSpeech2FaceTest::IsAnimationDataEqualIsh(const TArray& InExpected, const TArray& InGenerated, float InTolerance) { UTEST_TRUE(TEXT("Generated animation length is the same as in validation data"), InExpected.Num() == InGenerated.Num()); for (int32 I = 0; I < InExpected.Num(); ++I) { UTEST_TRUE(TEXT("Compare animation frame number ") + FString::FromInt(I), IsAnimationFrameEqualIsh(I, InExpected[I], InGenerated[I], InTolerance)); } return true; } bool TSpeech2FaceTest::IsAnimationFrameEqualIsh(int32 InFrame, const FSpeech2Face::FAnimationFrame& InExpected, const FSpeech2Face::FAnimationFrame& InGenerated, float InTolerance) { UTEST_TRUE(TEXT("Check that number of controls in an animation frame is the same: ") + FString::FromInt(InExpected.Num()) + TEXT(", ") + FString::FromInt(InGenerated.Num()), InExpected.Num() == InGenerated.Num()); TArray Controls; InExpected.GetKeys(Controls); for (const TPair& ExpectedControlValue : InExpected) { const float* GeneratedControlValue = InGenerated.Find(ExpectedControlValue.Key); UTEST_TRUE(TEXT("Rig control ") + ExpectedControlValue.Key + TEXT(" is present in the generated frame("), GeneratedControlValue != nullptr); if (GeneratedControlValue) { UTEST_EQUAL_TOLERANCE(TEXT("Rig control (") + ExpectedControlValue.Key + TEXT(") value matches expected"), ExpectedControlValue.Value, *GeneratedControlValue, InTolerance); } } return true; } void TSpeech2FaceTest::Define() { FString PluginDir = IPluginManager::Get().FindPlugin(TEXT(UE_PLUGIN_NAME))->GetContentDir(); TestDataDir = PluginDir / "TestData/Audio/"; TestAssetDir = TEXT("/MetaHuman/TestData/Audio/"); Describe("GenerateAnimation()", [this]() { It("should process mono audio, 44100 Hz and pass checks", [this]() { const uint32 ExpectedFrameCount = 308; return ProcessAudio(TEXT("44_1kHz_1channel.44_1kHz_1channel"), ExpectedFrameCount); }); It("should process mono audio, 16 kHz and match validation data", [this]() { return ProcessAudioAndCompareOutput(TEXT("16kHz_1channel.16kHz_1channel"), TEXT("16kHz_1channel.json")); }); It("should process stereo audio, second channel, 16 kHz and match validation data", [this]() { const bool bGenerateBlinks = false; const bool bMixChannels = false; const int32 AudioChannel = 1; return ProcessAudioAndCompareOutput(TEXT("16kHz_2channels.16kHz_2channels"), TEXT("16kHz_2channels.json"), bGenerateBlinks, bMixChannels, AudioChannel); }); It("should process stereo audio, mix channels, 16 kHz and match validation data", [this]() { const bool bGenerateBlinks = false; const bool bMixChannels = true; return ProcessAudioAndCompareOutput(TEXT("16kHz_2channels.16kHz_2channels"), TEXT("16kHz_2channels_mixed.json"), bGenerateBlinks, bMixChannels); }); It("should process mono audio, 16 kHz, resample to 60 fps and match validation data", [this]() { const bool bGenerateBlinks = false; const bool bMixChannels = false; const int32 AudioChannel = 0; const float OutputFPS = 60; return ProcessAudioAndCompareOutput(TEXT("16kHz_1channel.16kHz_1channel"), TEXT("16kHz_1channel_resample_to_60fps.json"), bGenerateBlinks, bMixChannels, AudioChannel, OutputFPS); }); It("should process mono 16kHz audio, generate blinks and match validation data", [this]() { const bool bGenerateBlinks = true; const bool bMixChannels = false; return ProcessAudioAndCompareOutput(TEXT("16kHz_1channel.16kHz_1channel"), TEXT("16kHz_1channel_blinks.json"), bGenerateBlinks, bMixChannels); }); It("should process mono 16kHz audio, generate blinks, resample to 60pfs and match validation data", [this]() { const bool bGenerateBlinks = true; const bool bMixChannels = false; const int32 AudioChannel = 0; const float OutputFPS = 60; return ProcessAudioAndCompareOutput(TEXT("16kHz_1channel.16kHz_1channel"), TEXT("16kHz_1channel_blinks_resample_to_60fps.json"), bGenerateBlinks, bMixChannels, AudioChannel, OutputFPS); }); }); } #endif