Files
UnrealEngine/Engine/Source/Programs/UnrealCloudDDC/Jupiter/Implementation/Blob/FileSystemStore.cs
2025-05-18 13:04:45 +08:00

556 lines
18 KiB
C#

// Copyright Epic Games, Inc. All Rights Reserved.
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using EpicGames.Core;
using EpicGames.Horde.Storage;
using Jupiter.Common;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using OpenTelemetry.Trace;
namespace Jupiter.Implementation
{
public class FileSystemStore : IBlobStore, IBlobCleanup
{
private readonly IServiceProvider _provider;
private readonly INamespacePolicyResolver _namespacePolicyResolver;
private readonly IOptionsMonitor<FilesystemSettings> _settings;
private readonly Tracer _tracer;
private readonly ILogger<FileSystemStore> _logger;
private readonly ConcurrentDictionary<NamespaceId, FileStorageBackend> _backends = new ConcurrentDictionary<NamespaceId, FileStorageBackend>();
public FileSystemStore(IOptionsMonitor<FilesystemSettings> settings, Tracer tracer, ILogger<FileSystemStore> logger, IServiceProvider provider, INamespacePolicyResolver namespacePolicyResolver)
{
_settings = settings;
_tracer = tracer;
_logger = logger;
_provider = provider;
_namespacePolicyResolver = namespacePolicyResolver;
}
private FileStorageBackend GetBackend(NamespaceId ns)
{
return _backends.GetOrAdd(ns, x => ActivatorUtilities.CreateInstance<FileStorageBackend>(_provider, GetFilesystemPath(x), x));
}
private string GetRootDir()
{
return PathUtil.ResolvePath(_settings.CurrentValue.RootDir);
}
public static string GetFilesystemPath(BlobId blob)
{
const int CountOfCharactersPerDirectory = 2;
string objectName = blob.ToString();
string firstPart = objectName.Substring(0, CountOfCharactersPerDirectory);
string secondPart = objectName.Substring(CountOfCharactersPerDirectory, CountOfCharactersPerDirectory);
string fileName = objectName;
return Path.Combine(firstPart, secondPart, fileName);
}
public static FileInfo GetFilesystemPath(string rootDir, NamespaceId ns, BlobId blob)
{
return new FileInfo(Path.Combine(rootDir, ns.ToString(), GetFilesystemPath(blob)));
}
public DirectoryReference GetFilesystemPath(NamespaceId ns)
{
return DirectoryReference.Combine(new DirectoryReference(GetRootDir()), ns.ToString());
}
public Task<Uri?> GetObjectByRedirectAsync(NamespaceId ns, BlobId identifier)
{
// not supported
return Task.FromResult<Uri?>(null);
}
public Task<BlobMetadata> GetObjectMetadataAsync(NamespaceId ns, BlobId blobId)
{
try
{
string path = GetFilesystemPath(blobId);
return GetBackend(ns).GetMetadata(path);
}
catch (FileNotFoundException)
{
throw new BlobNotFoundException(ns, blobId);
}
}
public Task CopyBlobAsync(NamespaceId ns, NamespaceId targetNamespace, BlobId blobId)
{
throw new NotImplementedException();
}
public Task<Uri?> PutObjectWithRedirectAsync(NamespaceId ns, BlobId identifier)
{
// not supported
return Task.FromResult<Uri?>(null);
}
public async Task<BlobId> PutObjectAsync(NamespaceId ns, ReadOnlyMemory<byte> content, BlobId blobIdentifier)
{
using EpicGames.Core.ReadOnlyMemoryStream stream = new EpicGames.Core.ReadOnlyMemoryStream(content);
return await PutObjectAsync(ns, stream, blobIdentifier);
}
public async Task<BlobId> PutObjectAsync(NamespaceId ns, Stream content, BlobId blobIdentifier)
{
string path = GetFilesystemPath(blobIdentifier);
await GetBackend(ns).WriteAsync(path, content, CancellationToken.None);
return blobIdentifier;
}
public async Task<BlobId> PutObjectAsync(NamespaceId ns, byte[] content, BlobId blobIdentifier)
{
using MemoryStream stream = new MemoryStream(content);
return await PutObjectAsync(ns, stream, blobIdentifier);
}
public async Task<BlobContents> GetObjectAsync(NamespaceId ns, BlobId blob, LastAccessTrackingFlags flags, bool supportsRedirectUri = false)
{
string path = GetFilesystemPath(blob);
BlobContents? contents = await GetBackend(ns).TryReadAsync(path, flags, CancellationToken.None);
if (contents == null)
{
throw new BlobNotFoundException(ns, blob);
}
return contents;
}
public async Task<bool> ExistsAsync(NamespaceId ns, BlobId blob, bool forceCheck)
{
string path = GetFilesystemPath(blob);
return await GetBackend(ns).ExistsAsync(path, CancellationToken.None);
}
public async Task DeleteObjectAsync(NamespaceId ns, BlobId objectName)
{
string path = GetFilesystemPath(objectName);
await GetBackend(ns).DeleteAsync(path, CancellationToken.None);
}
public async Task DeleteObjectAsync(IEnumerable<NamespaceId> namespaces, BlobId objectName)
{
string path = GetFilesystemPath(objectName);
foreach (NamespaceId ns in namespaces)
{
await GetBackend(ns).DeleteAsync(path, CancellationToken.None);
}
}
public Task DeleteNamespaceAsync(NamespaceId ns)
{
DirectoryInfo namespaceDirectory = GetFilesystemPath(ns).ToDirectoryInfo();
if (namespaceDirectory.Exists)
{
namespaceDirectory.Delete(true);
}
return Task.CompletedTask;
}
public async IAsyncEnumerable<(BlobId, DateTime)> ListObjectsAsync(NamespaceId ns)
{
IStorageBackend backend = GetBackend(ns);
await foreach ((string path, DateTime time) in backend.ListAsync())
{
string name = path.Substring(path.LastIndexOf('/') + 1);
yield return (new BlobId(name), time);
}
}
public bool ShouldRun()
{
return true;
}
public async Task<ulong> CleanupAsync(CancellationToken cancellationToken)
{
return await CleanupInternalAsync(cancellationToken);
}
/// <summary>
/// Clean the store from expired files
///
/// Uses the configuration to remove the least recently accessed (modified) blobs
/// </summary>
/// <param name="cancellationToken">Cancellation token</param>
/// <returns></returns>
public async Task<ulong> CleanupInternalAsync(CancellationToken cancellationToken)
{
using TelemetrySpan scope = _tracer.StartActiveSpan("gc.filesystem")
.SetAttribute("operation.name", "gc.filesystem");
bool perNamespaceGC = _settings.CurrentValue.PerNamespaceGC;
ulong countOfBlobsRemoved = 0;
if (perNamespaceGC)
{
await foreach (NamespaceId ns in ListNamespaces().WithCancellation(cancellationToken))
{
countOfBlobsRemoved += await RunGCAsync(ns, cancellationToken);
}
}
else
{
// legacy global GC
countOfBlobsRemoved += await RunGCAsync(ns: null, cancellationToken);
}
return countOfBlobsRemoved;
}
private async Task<ulong> RunGCAsync(NamespaceId? ns, CancellationToken cancellationToken)
{
ulong countOfBlobsRemoved = 0;
(long triggerSize, long targetSize) = GetTriggerAndTargetSize(ns);
// Perform a maximum of 5 clean up runs
for (int i = 0; i < 5; i++)
{
long size = await CalculateDiskSpaceUsedAsync(ns);
string nsString = ns != null ? ns.ToString()! : "All";
// first check to see if we should trigger at all, this happens for each run but only really matters for the first attempt
if (size < triggerSize)
{
_logger.LogInformation("Filesystem cleanup not running. Disksize used: '{UsedDiskSize}'. Namespace: '{Namespace}'. Trigger size was {TriggerSize}", size, nsString, triggerSize);
return countOfBlobsRemoved;
}
// then check if we have reached the target size, if not we should continue running
if (size <= targetSize)
{
_logger.LogInformation("Filesystem cleanup reached target size. Disksize used: '{UsedDiskSize}'. Namespace: '{Namespace}'. Target size was {TargetSize}", size, nsString, targetSize);
return countOfBlobsRemoved;
}
_logger.LogInformation("Filesystem cleanup running. Disksize used: '{UsedDiskSize}'.Namespace: '{Namespace}'. Trigger size was {TriggerSize}", size, nsString, triggerSize);
// define progressively shorter windows of how long we keep data around for, based on their last write time
DateTime[] cutoffPeriods = new DateTime[]
{
DateTime.Now.AddDays(-14),
DateTime.Now.AddDays(-7),
DateTime.Now.AddDays(-3),
DateTime.Now.AddDays(-1),
DateTime.Now.AddHours(-12),
DateTime.Now /* This is a bit extreme as it will just throw out any object that exists right now, but if we get this far we need to really remove something */
};
bool hadFiles = false;
long totalBytesDeleted = 0;
foreach (DateTime cutoff in cutoffPeriods)
{
IEnumerable<FileInfo> fileInfos = GetObjectsOlderThen(cutoff, ns);
foreach (FileInfo fi in fileInfos)
{
hadFiles = true;
try
{
totalBytesDeleted += fi.Length;
fi.Delete();
++countOfBlobsRemoved;
long currentSize = size - totalBytesDeleted;
if (currentSize <= targetSize || cancellationToken.IsCancellationRequested)
{
return countOfBlobsRemoved;
}
}
catch (FileNotFoundException)
{
// if the file was gced while running we can just ignore it
}
catch (DirectoryNotFoundException)
{
// if the directory was deleted then we can ignore it to, end result is the blob is gone which is what matters
}
}
}
if (!hadFiles)
{
return countOfBlobsRemoved;
}
}
return countOfBlobsRemoved;
}
private (long triggerSize, long targetSize) GetTriggerAndTargetSize(NamespaceId? ns)
{
if (ns == null)
{
ulong maxSizeBytes = _settings.CurrentValue.MaxSizeBytes;
long triggerSize = (long)(maxSizeBytes * _settings.CurrentValue.TriggerThresholdPercentage);
long targetSize = (long)(maxSizeBytes * _settings.CurrentValue.TargetThresholdPercentage); // Target to shrink to if triggered
return (triggerSize, targetSize);
}
else
{
NamespacePolicy policy = _namespacePolicyResolver.GetPoliciesForNs(ns.Value);
ulong? maxSizeBytesPerNs = policy.MaxFilesystemStorageBytes;
ulong maxSizeBytes = maxSizeBytesPerNs ?? _settings.CurrentValue.MaxSizeBytes;
long triggerSize = (long)(maxSizeBytes * _settings.CurrentValue.TriggerThresholdPercentage);
long targetSize = (long)(maxSizeBytes * _settings.CurrentValue.TargetThresholdPercentage); // Target to shrink to if triggered
return (triggerSize, targetSize);
}
}
public IEnumerable<FileInfo> GetObjectsOlderThen(DateTime cutoff, NamespaceId? ns = null)
{
string path = ns != null ? Path.Combine(GetRootDir(), ns.ToString()!) : GetRootDir();
DirectoryInfo di = new DirectoryInfo(path);
if (!di.Exists)
{
return Array.Empty<FileInfo>();
}
return di.EnumerateFiles("*", SearchOption.AllDirectories).Where(x => x.LastWriteTime < cutoff);
}
/// <summary>
/// Get least recently accessed objects
/// Assumes files on disk have the their last access timestamp up-to-date
/// </summary>
/// <param name="ns">Namespace, if set to null all namespaces will be scanned</param>
/// <param name="maxResults">Max results to return. Note that the entire namespace will be scanned no matter what.</param>
/// <param name="maxCountOfObjectsScanned">Max count of objects scanned before we stop.</param>
/// <returns>Enumerable of least recently accessed objects as FileInfos</returns>
public IEnumerable<FileInfo> GetLeastRecentlyAccessedObjects(NamespaceId? ns = null, int maxResults = 10_000, int maxCountOfObjectsScanned = 40_000_000)
{
// TODO: The maxCountOfObjectsScanned is not a ideal solution, we should likely find a solution were we do not have to read all objects into memory like this but rather can scan over them to determine a reasonable last write time cutoff
string path = ns != null ? Path.Combine(GetRootDir(), ns.ToString()!) : GetRootDir();
DirectoryInfo di = new DirectoryInfo(path);
if (!di.Exists)
{
return Array.Empty<FileInfo>();
}
return di.EnumerateFiles("*", SearchOption.AllDirectories).Take(maxCountOfObjectsScanned).OrderBy(x => x.LastWriteTime).Take(maxResults);
}
/// <summary>
/// Calculate the total size of blobs on disk for given namespace
/// </summary>
/// <param name="ns">Namespace, if set to null the total size of all namespaces will be returned</param>
/// <returns>Total size of blobs in bytes</returns>
public async Task<long> CalculateDiskSpaceUsedAsync(NamespaceId? ns = null)
{
using TelemetrySpan scope = _tracer.StartActiveSpan("gc.filesystem.calc_disc_usage")
.SetAttribute("operation.name", "gc.filesystem.calc_disc_usage");
string path = ns != null ? Path.Combine(GetRootDir(), ns.ToString()!) : GetRootDir();
DirectoryInfo di = new DirectoryInfo(path);
if (!di.Exists)
{
return 0;
}
return await Task.Run(() => di.EnumerateFiles("*", SearchOption.AllDirectories).Sum(x =>
{
try
{
return x.Length;
}
catch (FileNotFoundException)
{
// if the file has been gced we just ignore it
return 0;
}
}));
}
public IAsyncEnumerable<NamespaceId> ListNamespaces()
{
DirectoryInfo di = new DirectoryInfo(GetRootDir());
if (!di.Exists)
{
return AsyncEnumerable.Empty<NamespaceId>();
}
return di.GetDirectories().Select(x => new NamespaceId(x.Name)).ToAsyncEnumerable();
}
}
public class FileStorageBackend : IStorageBackend
{
private readonly ILogger _logger;
private readonly DirectoryReference _baseDir;
private readonly NamespaceId _namespaceId;
private readonly IOptionsMonitor<FilesystemSettings> _settings;
private const int DefaultBufferSize = 4096;
public FileStorageBackend(DirectoryReference baseDir, NamespaceId ns, ILogger<FileStorageBackend> logger, IOptionsMonitor<FilesystemSettings> settings)
{
_logger = logger;
_baseDir = baseDir;
_namespaceId = ns;
_settings = settings;
}
private string GetRootDir()
{
return PathUtil.ResolvePath(_settings.CurrentValue.RootDir);
}
public static FileInfo GetFilesystemPath(string rootDir, NamespaceId ns, BlobId blob)
{
const int CountOfCharactersPerDirectory = 2;
string objectName = blob.ToString();
string firstPart = objectName.Substring(0, CountOfCharactersPerDirectory);
string secondPart = objectName.Substring(CountOfCharactersPerDirectory, CountOfCharactersPerDirectory);
string fileName = objectName;
return new FileInfo(Path.Combine(rootDir, ns.ToString(), firstPart, secondPart, fileName));
}
public FileInfo GetFilesystemPath(string path)
{
return FileReference.Combine(_baseDir, path).ToFileInfo();
}
public DirectoryInfo GetFilesystemPath(NamespaceId ns)
{
return new DirectoryInfo(Path.Combine(GetRootDir(), ns.ToString()));
}
static readonly string s_processSuffix = Guid.NewGuid().ToString();
static int s_uniqueId = 0;
public async Task WriteAsync(string path, Stream content, CancellationToken cancellationToken)
{
FileInfo filePath = GetFilesystemPath(path);
filePath.Directory?.Create();
if (!filePath.Exists)
{
int uniqueId = Interlocked.Increment(ref s_uniqueId);
string tempFilePath = $"{filePath.FullName}.{s_processSuffix}.{uniqueId}";
await using (FileStream fs = new FileStream(tempFilePath, FileMode.Create, FileAccess.Write, FileShare.Read, DefaultBufferSize, FileOptions.Asynchronous | FileOptions.SequentialScan))
{
await content.CopyToAsync(fs, cancellationToken);
}
try
{
File.Move(tempFilePath, filePath.FullName, true);
}
catch (IOException) when (File.Exists(filePath.FullName))
{
}
filePath.Refresh();
if (filePath.Length == 0)
{
_logger.LogWarning("0 byte file written as {Path} {Method}", path, "Stream");
}
}
UpdateLastWriteTime(filePath.FullName, DateTime.UnixEpoch);
}
/// <summary>
/// Update the last modified/write time that is used for determining when file was last accessed
///
/// Using access time is tricky as many file systems disable that for performance reasons.
/// A new blob written to disk should be set with the oldest possible write time.
/// This will ensure sorting of least recently accessed files during garbage collection works as intended.
/// The write time update will happen async without any waiting to prevent blocking the critical path
/// as it's best-effort only.
/// </summary>
/// <param name="filePath"></param>
/// <param name="lastAccessed">Time the file was last accessed</param>
private static void UpdateLastWriteTime(string filePath, DateTime lastAccessed)
{
try
{
File.SetLastWriteTimeUtc(filePath, lastAccessed);
}
catch (FileNotFoundException)
{
// it is okay if the file does not exist anymore, that just means it got gced
}
}
public Task<BlobContents?> TryReadAsync(string path, LastAccessTrackingFlags flags, CancellationToken cancellationToken)
{
FileInfo filePath = GetFilesystemPath(path);
if (!filePath.Exists)
{
return Task.FromResult<BlobContents?>(null);
}
if (flags == LastAccessTrackingFlags.DoTracking)
{
UpdateLastWriteTime(filePath.FullName, DateTime.UtcNow);
}
FileStream fs = new FileStream(filePath.FullName, FileMode.Open, FileAccess.Read, FileShare.Read, DefaultBufferSize, FileOptions.Asynchronous | FileOptions.SequentialScan);
return Task.FromResult<BlobContents?>(new BlobContents(fs, fs.Length, $"{_namespaceId}/{path}"));
}
public Task<bool> ExistsAsync(string path, CancellationToken cancellationToken)
{
FileInfo filePath = GetFilesystemPath(path);
return Task.FromResult(filePath.Exists);
}
public Task DeleteAsync(string path, CancellationToken cancellationToken)
{
FileInfo filePath = GetFilesystemPath(path);
if (filePath.Exists)
{
filePath.Delete();
}
return Task.CompletedTask;
}
public IAsyncEnumerable<(string, DateTime)> ListAsync(CancellationToken cancellationToken)
{
return DoListOldObjects().ToAsyncEnumerable();
}
private IEnumerable<(string, DateTime)> DoListOldObjects()
{
DirectoryInfo di = _baseDir.ToDirectoryInfo();
if (!di.Exists)
{
yield break;
}
foreach (FileInfo file in di.EnumerateFiles("*", SearchOption.AllDirectories))
{
string path = new FileReference(file).MakeRelativeTo(_baseDir).Replace(Path.DirectorySeparatorChar, '/');
yield return (path, file.LastWriteTime);
}
}
public Task<BlobMetadata> GetMetadata(string path)
{
FileInfo fileInfo = GetFilesystemPath(path);
return Task.FromResult(new BlobMetadata(fileInfo.Length, fileInfo.CreationTime));
}
}
}