// Copyright Epic Games, Inc. All Rights Reserved. #pragma once #include "LearningArray.h" #include "LearningLog.h" #include "LearningTrainer.h" #include "LearningSharedMemory.h" #include "Misc/Guid.h" #include "Templates/SharedPointer.h" #include "UObject/NameTypes.h" class FJsonObject; class FSocket; class ULearningNeuralNetworkData; namespace UE::Learning { struct FReplayBuffer; enum class ECompletionMode : uint8; /** Interface for communicating with an external trainer process. */ struct IExternalTrainer { virtual ~IExternalTrainer() {} /** Returns true if this trainer is valid. Otherwise, false. */ virtual bool IsValid() = 0; /** Terminate the trainer immediately. */ virtual void Terminate() = 0; /** Signal for the trainer to stop. */ virtual ETrainerResponse SendStop() = 0; /** * Wait for the trainer to finish. * * @param Timeout Timeout to wait in seconds * @returns Trainer response */ virtual ETrainerResponse Wait() = 0; /** Returns true if we can receive a network or training completed. Otherwise, false. */ virtual bool HasNetworkOrCompleted() = 0; /** * Sends the given json configs to the trainer process. * * @param DataConfigObject The config to send with meta-data * @param TrainerConfigObject The config to send with trainer-specific settings * @param LogSettings The log verbosity level * @returns Trainer response */ virtual ETrainerResponse SendConfigs( const TSharedRef& DataConfigObject, const TSharedRef& TrainerConfigObject, const ELogSetting LogSettings = Trainer::DefaultLogSettings) = 0; /** * Adds the network to this external trainer. Allocates buffers, etc. * Must be called for each network prior to calling Send/Receive. * * @params Network The network to be added * @returns The network's unique id */ virtual int32 AddNetwork(const ULearningNeuralNetworkData& Network) = 0; /** * Wait for the trainer to push an updated network. * * @param NetworkId Unique network id * @param OutNetwork Network to update * @param Timeout Timeout to wait in seconds * @param NetworkLock Lock to use when updating network * @param LogSettings The log verbosity level * @returns Trainer response */ virtual ETrainerResponse ReceiveNetwork( const int32 NetworkId, ULearningNeuralNetworkData& OutNetwork, FRWLock* NetworkLock = nullptr, const ELogSetting LogSettings = Trainer::DefaultLogSettings) = 0; /** * Wait for the trainer to push an array of updated networks. * * @param NetworkIds Unique network ids * @param OutNetworks Networks to update * @param Timeout Timeout to wait in seconds * @param NetworkLocks Locks to use when updating networks * @param LogSettings The log verbosity level * @returns Trainer response */ virtual TArray ReceiveNetworks( const TArray& NetworkIds, TArray> Networks, TArray NetworkLocks = TArray(), const ELogSetting LogSettings = Trainer::DefaultLogSettings) = 0; /** * Wait for the trainer to be ready and push the current policy network. * * @param NetworkId Unique network id * @param Network Network to push * @param Timeout Timeout to wait in seconds * @param NetworkLock Lock to use when pushing network * @param LogSettings The log verbosity level * @returns Trainer response */ virtual ETrainerResponse SendNetwork( const int32 NetworkId, const ULearningNeuralNetworkData& Network, FRWLock* NetworkLock = nullptr, const ELogSetting LogSettings = Trainer::DefaultLogSettings) = 0; /** * Adds a named replay buffer to this external trainer. * Must be called for each buffer prior to calling SendReplayBuffer. * * @params ReplayBuffer The buffer to be added * @returns The replay buffer's unique id */ virtual int32 AddReplayBuffer(const FReplayBuffer& ReplayBuffer) = 0; /** * Wait for the trainer to be ready and send new experience. * * @params ReplayBufferId Unique replay buffer id * @params Name The unique name of the buffer, used as a key * @param ReplayBuffer Replay buffer to send * @param Timeout Timeout to wait in seconds * @param LogSettings The log verbosity level * @returns Trainer response */ virtual ETrainerResponse SendReplayBuffer( const int32 ReplayBufferId, const FReplayBuffer& ReplayBuffer, const ELogSetting LogSettings = Trainer::DefaultLogSettings) = 0; }; /** * This object allows you to launch the FSharedMemoryTrainer server as a subprocess, * which is convenient when you want to train locally. */ struct LEARNINGTRAINING_API FSharedMemoryTrainerServerProcess { /** * Creates a training server as a subprocess using shared memory for communication. This will no-op if this UE * process has a non-zero "LearningProcessIdx". * * @param TaskName The name of this training task (used to disambiguate filenames, etc.) * @param CustomTrainerPath Path to check for custom trainer files * @param TrainerFileName The name of the training file to use * @param PythonExecutablePath Path to the python executable used for training. In general should be the * python shipped with Unreal Editor. * @param PythonContentPath Path to the Python Content folder provided by the Learning plugin * @param InIntermediatePath Path to the intermediate folder to write temporary files, logs, and * snapshots to * @param TrainingProcessFlags Training server subprocess flags * @param LogSettings Logging settings to use */ FSharedMemoryTrainerServerProcess( const FString& TaskName, const FString& CustomTrainerPath, const FString& TrainerFileName, const FString& PythonExecutablePath, const FString& PythonContentPath, const FString& InIntermediatePath, const float InTimeout = Trainer::DefaultTimeout, const ESubprocessFlags TrainingProcessFlags = ESubprocessFlags::None, const ELogSetting LogSettings = ELogSetting::Normal); /** Check if the server process is still running. */ bool IsRunning() const; /** * Wait for the server process to end * * @param Timeout Timeout to wait in seconds * @returns true if successful, otherwise false if it times out */ bool Wait(); /** Terminate the server process. */ void Terminate(); /** Get the Controls shared memory array view. */ TSharedMemoryArrayView<1, volatile int32> GetControlsSharedMemoryArrayView() const; /** Get the intermediate path. */ const FString& GetIntermediatePath() const; /** Get the config path. */ const FString& GetConfigPath() const; /** Get the training subprocess. */ FSubprocess* GetTrainingSubprocess(); private: /** Free and deallocate all shared memory. */ void Deallocate(); FString IntermediatePath; FString ConfigPath; TSharedMemoryArrayView<1, volatile int32> Controls; // Mark as volatile to avoid compiler optimizing away reads without writes etc. FSubprocess TrainingProcess; float Timeout = Trainer::DefaultTimeout; }; /** * Trainer that connects to an external training server to perform training * * This trainer can be used to allow the python training process the run * on a different machine to the experience gathering process. */ struct LEARNINGTRAINING_API FSharedMemoryTrainer : public IExternalTrainer { struct FSharedMemoryExperienceContainer { TSharedMemoryArrayView<1, int32> EpisodeStarts; TSharedMemoryArrayView<1, int32> EpisodeLengths; TSharedMemoryArrayView<1, ECompletionMode> EpisodeCompletionModes; TArray, TInlineAllocator<1>> EpisodeFinalObservations; TArray, TInlineAllocator<1>> EpisodeFinalMemoryStates; TArray, TInlineAllocator<1>> Observations; TArray, TInlineAllocator<1>> Actions; TArray, TInlineAllocator<1>> ActionModifiers; TArray, TInlineAllocator<1>> MemoryStates; TArray, TInlineAllocator<1>> Rewards; /** Free and deallocate all shared memory. */ void Deallocate(); }; /** * Creates a new SharedMemory trainer * * @param InTaskName Unique name for this training task - used to avoid config filename conflicts * @param ExternalTrainerProcess Shared memory used for communicating status to the trainer server process * @param bUseExternalTrainingProcess If true, attach to existing external training process. * @param ControlsGuid If bUseExternalTrainingProcess is true, attach to this controls memory. * @param InConfigPath The path to write the config to. * @param InIntermediatePath The Intermediate Path - for writing temporary files. * @param InTimeout Timeout to wait in seconds for connection and initial data transfer */ FSharedMemoryTrainer( const FString& InTaskName, const TSharedPtr& ExternalTrainerProcess, const bool bUseExternalTrainingProcess = false, const FGuid ControlsGuid = FGuid(), const FString& InConfigPath = FString(), const FString& InIntermediatePath = FString(), const float InTimeout = Trainer::DefaultTimeout); ~FSharedMemoryTrainer(); virtual bool IsValid() override final; virtual void Terminate() override final; virtual ETrainerResponse SendStop() override final; virtual ETrainerResponse Wait() override final; virtual bool HasNetworkOrCompleted() override final; virtual ETrainerResponse SendConfigs( const TSharedRef& DataConfigObject, const TSharedRef& TrainerConfigObject, const ELogSetting LogSettings = Trainer::DefaultLogSettings) override final; virtual int32 AddNetwork(const ULearningNeuralNetworkData& Network) override final; virtual ETrainerResponse ReceiveNetwork( const int32 NetworkId, ULearningNeuralNetworkData& OutNetwork, FRWLock* NetworkLock = nullptr, const ELogSetting LogSettings = Trainer::DefaultLogSettings) override final; virtual TArray ReceiveNetworks( const TArray& NetworkIds, TArray> Networks, TArray NetworkLocks = TArray(), const ELogSetting LogSettings = Trainer::DefaultLogSettings) override final; virtual ETrainerResponse SendNetwork( const int32 NetworkId, const ULearningNeuralNetworkData& Network, FRWLock* NetworkLock = nullptr, const ELogSetting LogSettings = Trainer::DefaultLogSettings) override final; virtual int32 AddReplayBuffer(const FReplayBuffer& ReplayBuffer) override final; virtual ETrainerResponse SendReplayBuffer( const int32 ReplayBufferId, const FReplayBuffer& ReplayBuffer, const ELogSetting LogSettings = Trainer::DefaultLogSettings) override final; private: /** Free and deallocate all shared memory. */ void Deallocate(); FString TaskName; FString IntermediatePath; FString ConfigPath; TSharedPtr TrainingProcess = nullptr; float Timeout = Trainer::DefaultTimeout; bool bUseExternalTrainer = false; TSharedMemoryArrayView<1, volatile int32> Controls; TArray> NeuralNetworkSharedMemoryArrayViews; TArray SharedMemoryExperienceContainers; }; /** * This object allows you to launch the FSocketTrainer server as a subprocess, * which is convenient when you want to train using it locally. */ struct LEARNINGTRAINING_API FSocketTrainerServerProcess { /** * Creates a training server as a subprocess * * @param TaskName The name of this training task * @param CustomTrainerPath Path to check for custom trainer files * @param TrainerFileName The name of the training file to use * @param PythonExecutablePath Path to the python executable used for training. In general should be the python shipped with Unreal Editor. * @param PythonContentPath Path to the Python Content folder provided by the Learning plugin * @param IntermediatePath Path to the intermediate folder to write temporary files, logs, and snapshots to * @param IpAddress Ip address to bind the listening socket to. For a local server you will want to use 127.0.0.1 * @param Port Port to use for the listening socket. * @param TrainingProcessFlags Training server subprocess flags * @param LogSettings Logging settings to use */ FSocketTrainerServerProcess( const FString& TaskName, const FString& CustomTrainerPath, const FString& TrainerFileName, const FString& PythonExecutablePath, const FString& PythonContentPath, const FString& IntermediatePath, const TCHAR* IpAddress = Trainer::DefaultIp, const uint32 Port = Trainer::DefaultPort, const float InTimeout = Trainer::DefaultTimeout, const ESubprocessFlags TrainingProcessFlags = ESubprocessFlags::None, const ELogSetting LogSettings = ELogSetting::Normal); /** * Check if the server process is still running */ bool IsRunning() const; /** * Wait for the server process to end * * @param Timeout Timeout to wait in seconds * @returns true if successful, otherwise false if it times out */ bool Wait(); /** * Terminate the server process */ void Terminate(); /** Get the training subprocess. */ FSubprocess* GetTrainingSubprocess(); private: FSubprocess TrainingProcess; float Timeout = Trainer::DefaultTimeout; }; /** * Trainer that connects to an external training server to perform training * * This trainer can be used to allow the python training process the run * on a different machine to the experience gathering process. */ struct LEARNINGTRAINING_API FSocketTrainer : public IExternalTrainer { /** * Creates a new Socket trainer * * @param OutResponse Response to the initial connection * @param ExternalTrainerProcess The external trainer process * @param IpAddress Server Ip address * @param Port Server Port * @param Timeout Timeout to wait in seconds for connection and initial data transfer */ FSocketTrainer( ETrainerResponse& OutResponse, const TSharedPtr& ExternalTrainerProcess, const bool bUseExternalTrainerProcess, const TCHAR* IpAddressOrHostname = Trainer::DefaultIp, const uint32 Port = Trainer::DefaultPort, const float InTimeout = Trainer::DefaultTimeout, const bool IsHostname = false); ~FSocketTrainer(); virtual bool IsValid() override final; virtual void Terminate() override final; virtual ETrainerResponse SendStop() override final; virtual ETrainerResponse Wait() override final; virtual bool HasNetworkOrCompleted() override final; virtual ETrainerResponse SendConfigs( const TSharedRef& DataConfigObject, const TSharedRef& TrainerConfigObject, const ELogSetting LogSettings = Trainer::DefaultLogSettings) override final; virtual int32 AddNetwork(const ULearningNeuralNetworkData& Network) override final; virtual ETrainerResponse ReceiveNetwork( const int32 NetworkId, ULearningNeuralNetworkData& OutNetwork, FRWLock* NetworkLock = nullptr, const ELogSetting LogSettings = Trainer::DefaultLogSettings) override final; virtual TArray ReceiveNetworks( const TArray& NetworkIds, TArray> Networks, TArray NetworkLocks = TArray(), const ELogSetting LogSettings = Trainer::DefaultLogSettings) override final; virtual TArray ReceiveQueuedNetworks( const TArray& NetworkIds, TArray> Networks, TArray NetworkLocks = TArray(), const ELogSetting LogSettings = Trainer::DefaultLogSettings); virtual ETrainerResponse SendNetwork( const int32 NetworkId, const ULearningNeuralNetworkData& Network, FRWLock* NetworkLock = nullptr, const ELogSetting LogSettings = Trainer::DefaultLogSettings) override final; virtual int32 AddReplayBuffer(const FReplayBuffer& ReplayBuffer) override final; virtual ETrainerResponse SendReplayBuffer( const int32 ReplayBufferId, const FReplayBuffer& ReplayBuffer, const ELogSetting LogSettings = Trainer::DefaultLogSettings) override final; private: TArray> NetworkBuffers; int32 LastReplayBufferId = -1; float Timeout = Trainer::DefaultTimeout; bool bUseExternalTrainer = false; TSharedPtr TrainingProcess = nullptr; FSocket* Socket = nullptr; int32 NetworksVersion = -1; }; }