// Copyright Epic Games, Inc. All Rights Reserved. #pragma once #include "Containers/Array.h" #include "Containers/BitArray.h" #include "Containers/Map.h" #include "Containers/SparseArray.h" #include "Containers/UnrealString.h" #include "CoreMinimal.h" #include "Delegates/IDelegateInstance.h" #include "FileCacheUtilities.h" #include "HAL/Platform.h" #include "HAL/PlatformTime.h" #include "HAL/ThreadSafeBool.h" #include "HAL/ThreadSafeCounter.h" #include "IDirectoryWatcher.h" #include "Misc/AssertionMacros.h" #include "Misc/DateTime.h" #include "Misc/Guid.h" #include "Misc/Optional.h" #include "Misc/SecureHash.h" #include "Serialization/Archive.h" #include "Templates/Function.h" #include "Templates/SharedPointer.h" #include "Templates/UnrealTemplate.h" struct FFileChangeData; struct FGuid; namespace DirectoryWatcher { /** Custom serialization version for FFileCache */ struct DIRECTORYWATCHER_API FFileCacheCustomVersion { static const FGuid Key; enum Type { Initial, IncludeFileHash, Latest = IncludeFileHash }; }; /** Structure representing specific information about a particular file */ struct FFileData { /** Constructiors */ FFileData() : Timestamp(0) {} FFileData(const FDateTime& InTimestamp, const FMD5Hash& InFileHash) : Timestamp(InTimestamp), FileHash(InFileHash) {} friend bool operator==(const FFileData& LHS, const FFileData& RHS) { return LHS.Timestamp == RHS.Timestamp && LHS.FileHash == RHS.FileHash; } friend bool operator!=(const FFileData& LHS, const FFileData& RHS) { return LHS.Timestamp != RHS.Timestamp || LHS.FileHash != RHS.FileHash; } /** Serializer for this type */ friend FArchive& operator<<(FArchive& Ar, FFileData& Data) { if (Ar.CustomVer(FFileCacheCustomVersion::Key) >= FFileCacheCustomVersion::Initial) { Ar << Data.Timestamp; } if (Ar.CustomVer(FFileCacheCustomVersion::Key) >= FFileCacheCustomVersion::IncludeFileHash) { Ar << Data.FileHash; } return Ar; } /** The cached timestamp of the file on disk */ FDateTime Timestamp; /** The cached MD5 hash of the file on disk */ FMD5Hash FileHash; }; /** Structure representing the file data for a number of files in a directory */ struct FDirectoryState { /** Default construction */ FDirectoryState() {} /** Move construction */ FDirectoryState(FDirectoryState&& In) : Rules(MoveTemp(In.Rules)), Files(MoveTemp(In.Files)) {} FDirectoryState& operator=(FDirectoryState&& In) { Swap(Rules, In.Rules); Swap(Files, In.Files); return *this; } /** The rules that define what this state applies to */ FMatchRules Rules; /** Filename -> data map */ TMap Files; /** Serializer for this type */ friend FArchive& operator<<(FArchive& Ar, FDirectoryState& State) { Ar.UsingCustomVersion(FFileCacheCustomVersion::Key); // Ignore any old versions to data to ensure that we generate a new cache if (Ar.CustomVer(FFileCacheCustomVersion::Key) >= FFileCacheCustomVersion::IncludeFileHash) { Ar << State.Rules; // Number of files int32 Num = State.Files.Num(); Ar << Num; if (Ar.IsLoading()) { State.Files.Reserve(Num); } Ar << State.Files; } return Ar; } }; enum class EFileAction : uint8 { Added, Modified, Removed, Moved }; /** A transaction issued by FFileCache to describe a change to the cache. The change is only committed once the transaction is returned to the cache (see FFileCache::CompleteTransaction). */ struct FUpdateCacheTransaction { /** The path of the file to which this transaction relates */ FImmutableString Filename; /** In the case of a moved file, this represents the path the file was moved from */ FImmutableString MovedFromFilename; /** File data pertaining to this change at the time of dispatch */ FFileData FileData; /** The type of action that prompted this transaction */ EFileAction Action; /** Publically moveable */ FUpdateCacheTransaction(FUpdateCacheTransaction&& In) : Filename(MoveTemp(In.Filename)), MovedFromFilename(MoveTemp(In.MovedFromFilename)), FileData(MoveTemp(In.FileData)), Action(MoveTemp(In.Action)) {} FUpdateCacheTransaction& operator=(FUpdateCacheTransaction&& In) { Swap(Filename, In.Filename); Swap(MovedFromFilename, In.MovedFromFilename); Swap(FileData, In.FileData); Swap(Action, In.Action); return *this; } private: friend class FFileCache; /** Construction responsibility is held by FFileCache */ FUpdateCacheTransaction(FImmutableString InFilename, EFileAction InAction, const FFileData& InFileData = FFileData()) : Filename(MoveTemp(InFilename)), FileData(InFileData), Action(InAction) {} /** Construction responsibility is held by FFileCache */ FUpdateCacheTransaction(FImmutableString InMovedFromFilename, FImmutableString InMovedToFilename, const FFileData& InFileData) : Filename(MoveTemp(InMovedToFilename)), MovedFromFilename(MoveTemp(InMovedFromFilename)), FileData(InFileData), Action(EFileAction::Moved) {} /** Not Copyable */ FUpdateCacheTransaction(const FUpdateCacheTransaction&) = delete; FUpdateCacheTransaction& operator=(const FUpdateCacheTransaction&) = delete; }; /** Enum specifying whether a path should be relative or absolute */ enum EPathType { /** Paths should be cached relative to the root cache directory */ Relative, /** Paths should be cached as absolute file system paths */ Absolute }; struct IAsyncFileCacheTask : public TSharedFromThis { enum class EProgressResult { Finished, Pending }; IAsyncFileCacheTask() : StartTime(FPlatformTime::Seconds()) {} virtual ~IAsyncFileCacheTask() {} /** Tick this task. Only to be called on the task thread. */ virtual EProgressResult Tick(const FTimeLimit& TimeLimit) = 0; /** Check whether this task is complete. Must be implemented in a thread-safe manner. */ virtual bool IsComplete() const = 0; /** Get the age of this task in seconds */ double GetAge() const { return FPlatformTime::Seconds() - StartTime; } protected: /** The time this task started */ double StartTime; }; /** Simple struct that encapsulates a filename and its associated MD5 hash */ struct FFilenameAndHash { FString AbsoluteFilename; FMD5Hash FileHash; FFilenameAndHash(){} FFilenameAndHash(const FString& File) : AbsoluteFilename(File) {} }; /** Async task responsible for MD5 hashing a number of files, reporting completed hashes to the client when done */ struct FAsyncFileHasher : public IAsyncFileCacheTask { /** Constructor */ FAsyncFileHasher(TArray InFilesThatNeedHashing); /** Return any completed filenames and their corresponding hashes */ TArray GetCompletedData(); /** Returns true when this task has finished hashing all its files */ virtual bool IsComplete() const override; protected: /** Tick this reader (hashes as many files as possible in the time allowed). Returns progress state. */ virtual EProgressResult Tick(const FTimeLimit& Limit) override; /** The array of data that we will process */ TArray Data; /** The number of items we have returned to the client. Only accessed from the main thread. */ int32 NumReturned; /** The number of files that we have hashed on the task thread. Atomic - safe to access from any thread. */ FThreadSafeCounter CurrentIndex; /** Scratch buffer used for reading in files */ TArray ScratchBuffer; }; /** * Class responsible for 'asynchronously' scanning a folder for files and timestamps. * Example usage: * FAsyncDirectoryReader Reader(TEXT("C:\\Path"), EPathType::Relative); * * while(!Reader.IsComplete()) * { * FPlatformProcess::Sleep(1); * Reader.Tick(FTimedSignal(1)); // Do 1 second of work * } * TOptional State = Reader.GetFinalState(); */ struct FAsyncDirectoryReader : public IAsyncFileCacheTask { /** Constructor that sets up the directory reader to the specified directory */ FAsyncDirectoryReader(const FString& InDirectory, EPathType InPathType); /** Set what files are relevant to this reader. Calling this once the reader starts results in undefined behaviour. */ void SetMatchRules(const FMatchRules& InRules) { if (LiveState.IsSet()) { LiveState->Rules = InRules; } } /** * Get the state of the directory once finished. Relinquishes the currently stored directory state to the client. * Returns nothing if incomplete, or if GetLiveState() has already been called. */ TOptional GetLiveState(); /** Retrieve the cached state supplied to this class through UseCachedState(). */ TOptional GetCachedState(); /** Retrieve the cached state supplied to this class through UseCachedState(). */ TArray GetFilesThatNeedHashing() { TArray Swapped; Swap(FilesThatNeedHashing, Swapped); return Swapped; } /** Instruct the directory reader to use the specified cached state to lookup file hashes, where timestamps haven't changed */ void UseCachedState(FDirectoryState InCachedState) { CachedState = MoveTemp(InCachedState); } /** Returns true when this directory reader has finished scanning the directory */ virtual bool IsComplete() const override; /** Tick this reader (discover new directories / files). Returns progress state. */ virtual EProgressResult Tick(const FTimeLimit& Limit) override; private: /** Non-recursively scan a single directory for its contents. Adds results to Pending arrays. */ void ScanDirectory(const FString& InDirectory); /** Returns version of path to store */ FString GetPathToStore(const FString& InPath) const; /** Path to the root directory we want to scan */ FString RootPath; /** Standardized path to the root directory we want to scan */ FString StandardRootPath; /** Whether we should return relative or absolute paths */ EPathType PathType; /** The currently discovered state of the directory - reset once relinquished to the client through GetLiveState */ TOptional LiveState; /** The previously cached state of the directory, optional */ TOptional CachedState; /** An array of files that need hashing */ TArray FilesThatNeedHashing; /** A list of directories we have recursively found on our travels */ TArray PendingDirectories; /** A list of files we have recursively found on our travels */ TArray PendingFiles; /** Thread safe flag to signify when this class has finished reading */ FThreadSafeBool bIsComplete; }; /** Configuration structure required to construct a FFileCache */ struct FFileCacheConfig { /** Enum that specifies what changes are required for a change to be reported. When combined, any valid change is reported. */ enum EChangeDetection { /** Report modifications when the timestamp of a file changes */ Timestamp, /** Report modifications when the contents of a file changes */ FileHash, }; FFileCacheConfig(FString InDirectory, FString InCacheFile) : Directory(InDirectory), CacheFile(InCacheFile), PathType(EPathType::Relative), bDetectChangesSinceLastRun(false) , ChangeDetectionBits(false, 2) { DetectMoves(true); ChangeDetectionBits[EChangeDetection::Timestamp] = true; } /** String specifying the directory on disk that the cache should reflect */ FString Directory; /** String specifying the file that the cache should be saved to. When empty, no cache file will be maintained (thus only an in-memory cache is used) */ FString CacheFile; /** List of rules which define what we will be watching */ FMatchRules Rules; /** Path type to return, relative to the directory or absolute. */ EPathType PathType; /** When true, changes to the directory since the cache shutdown will be detected and reported. When false, said changes will silently be applied to the serialized cache. */ bool bDetectChangesSinceLastRun; /** User-specified custom change detection logic. Called when a file's timestamp has been modified, to see if we should report this change to the client */ TFunction(const FImmutableString&, const FFileData&)> CustomChangeLogic; /** Set up this cache to detect moves */ FFileCacheConfig& DetectMoves(bool bInDetectMoves) { bDetectMoves = bInDetectMoves; if (bDetectMoves) { bRequireFileHashes = true; } return *this; } /** Set up this cache to generate MD5 hashes for its constituent files */ FFileCacheConfig& RequireFileHashes(bool bInRequireFileHashes) { if (ensureMsgf(bInRequireFileHashes || !bDetectMoves, TEXT("Unable to disable file hashing when move detection is enabled"))) { bRequireFileHashes = bInRequireFileHashes; } return *this; } /** Instruct the cache to report the specified changes to files */ FFileCacheConfig& DetectChangesFor(EChangeDetection ChangeType, bool Value) { ChangeDetectionBits[ChangeType] = Value; return *this; } private: /** True to detect moves and renames (based on file hash), false otherwise. Implies bRequireFileHashes. */ bool bDetectMoves; /** When true, the cache will also calculate MD5 hashes for files. When true, an additional thread will be launched on startup to harvest unknown MD5 hashes for the directory. */ bool bRequireFileHashes; /** Bitfied specifying how we will be detecting changes. See EChangeDetection. */ TBitArray<> ChangeDetectionBits; friend class FFileCache; }; /** * A class responsible for scanning a directory, and maintaining a cache of its state (files and timestamps). * Changes in the cache can be retrieved through GetOutstandingChanges(). Changes will be reported for any * change in the cached state even between runs of the process. */ class DIRECTORYWATCHER_API FFileCache { public: /** Construction from a config */ FFileCache(const FFileCacheConfig& InConfig); FFileCache(const FFileCache&) = delete; FFileCache& operator=(const FFileCache&) = delete; /** Destructor */ ~FFileCache(); /** Destroy this cache. Cleans out the in-memory state and deletes the cache file, if present. */ void Destroy(); /** Get the absolute path of the directory this cache reflects */ const FString& GetDirectory() const { return Config.Directory; } /** Check whether this file cache has finished starting up yet. Does not imply rename/move detection is fully initialized. (see MoveDetectionInitialized()) */ bool HasStartedUp() const; /** Check whether this move/rename detection has been initiated or not. This can take much longer than startup, so can be checked separately */ bool MoveDetectionInitialized() const; /** Attempt to locate file data pertaining to the specified filename. * @param InFilename The filename to find data for - either relative to the directory, or absolute, depending on Config.PathType. * @return The current cached file data pertaining to the specified filename, or nullptr. May not be completely up to date if there are outstanding transactions. */ const FFileData* FindFileData(FImmutableString InFilename) const; /** Tick this FileCache */ void Tick(); /** Write out the cached file, if we have any changes to write */ void WriteCache(); /** * Return a transaction to the cache for completion. Will update the cached state with the change * described in the transaction, and mark the cache as needing to be saved. */ void CompleteTransaction(FUpdateCacheTransaction&& Transaction); /** Report an external change to the manager, such that a subsequent equal change reported by the os be ignored */ void IgnoreNewFile(const FString& Filename); void IgnoreFileModification(const FString& Filename); void IgnoreMovedFile(const FString& SrcFilename, const FString& DstFilename); void IgnoreDeletedFile(const FString& Filename); /** Get the number of pending changes to the cache. */ int32 GetNumDirtyFiles() const { return DirtyFiles.Num(); } /** * Iterate the number of pending changes to the cache with the specified predicate. Assumes Tick has been called (and thus, PendingTransactions is up-to-date) * @param InIter A predicate to match files against (returning true to continue iteration, false to break), based on: 1) the pending transaction 2) the time the change was detected */ void IterateOutstandingChanges(TFunctionRef InPredicate) const; /** Get pending changes to the cache. Transactions must be returned to CompleteTransaction to update the cache. * Filter predicate receives a transaction and the time the change was reported. */ TArray FilterOutstandingChanges(TFunctionRef InPredicate); TArray GetOutstandingChanges(); private: /** Called when the directory we are monitoring has been changed in some way */ void OnDirectoryChanged(const TArray& FileChanges); /** Diff the specified set of dirty files (absolute paths, or relative to the monitor directory), adding transactions to the specified array if necessary. * Optionally takes a directory state from which we can retrieve current file system state, without having to ask the FS directly. * Optionally exclude files that have changed since the specified threshold, to ensure that related events get grouped together correctly. */ void DiffDirtyFiles(TMap& InDirtyFiles, TArray& OutTransactions, const FDirectoryState* InFileSystemState = nullptr) const; /** Detect a rename for the specified file */ void DetectRename(); /** Get the absolute path from a transaction filename */ FString GetAbsolutePath(const FString& InTransactionPath) const; /** Get the transaction path from an absolute filename. Returns nothing if the absolute path is invalid (not under the correct folder, or not applicable) */ TOptional GetTransactionPath(const FString& InAbsolutePath) const; /** Unbind the watcher from the directory. Called on destruction. */ void UnbindWatcher(); /** Read the cache file data and return the contents */ TOptional ReadCache() const; /** Called when the initial async reader has finished harvesting file system timestamps */ void ReadStateFromAsyncReader(); /** Called to harvest any file hashes that have been generated by the DirtyFileHasher thread task */ void HarvestDirtyFileHashes(); void RescanForDirtyFileHashes(); /** Update our cache of pending transactions (to prevent diffing against the directory unnecessarily) */ void UpdatePendingTransactions(); /** Returns version of path to store */ FString GetPathToStore(const FFilenameAndHash& InData) const; private: /** Configuration settings applied on construction */ FFileCacheConfig Config; /** Asynchronous directory reader responsible for gathering all file/timestamp information recursively from our cache directory */ TSharedPtr DirectoryReader; /** Asynchronous task used to harvest the MD5 hashes of a set of filenames */ TSharedPtr AsyncFileHasher; /** Asynchronous task used to harvest the MD5 hashes of a set of recently changed filenames */ TSharedPtr DirtyFileHasher; /** A map of dirty files that we will use to report changes to the user. Timestamp value of FFileData here pertains to the *time of the change* not the timestamp of the file. */ TMap DirtyFiles; /** Our in-memory view of the cached directory state. */ FDirectoryState CachedDirectoryState; /** Handle to the directory watcher delegate so we can delete it properly */ FDelegateHandle WatcherDelegate; /** True when the cached state we have in memory is more up to date than the serialized file. Enables WriteCache() when true. */ bool bSavedCacheDirty; /** The time we last retrieved file hashes from the thread */ double LastFileHashGetTime; /** List of cached pending transactions */ bool bPendingTransactionsDirty; TArray PendingTransactions; FString ConfigDirectoryStandardized; }; } // namespace DirectoryWatcher