Files
UnrealEngine/Engine/Source/Programs/UnrealCloudDDC/Jupiter/Implementation/Blob/AmazonS3Store.cs
2025-05-18 13:04:45 +08:00

923 lines
29 KiB
C#

// Copyright Epic Games, Inc. All Rights Reserved.
using System;
using System.Buffers;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Runtime.CompilerServices;
using System.Threading;
using System.Threading.Tasks;
using System.Xml;
using Amazon.S3;
using Amazon.S3.Model;
using Amazon.S3.Transfer;
using Amazon.S3.Util;
using EpicGames.Horde.Storage;
using Jupiter.Common;
using Jupiter.Common.Implementation;
using Jupiter.Common.Utils;
using Jupiter.Implementation.Blob;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using OpenTelemetry.Trace;
using Exception = System.Exception;
using KeyNotFoundException = System.Collections.Generic.KeyNotFoundException;
namespace Jupiter.Implementation
{
public class AmazonS3Store : IBlobStore, IMultipartBlobStore
{
private readonly IAmazonS3 _amazonS3;
private readonly IBlobIndex _blobIndex;
private readonly INamespacePolicyResolver _namespacePolicyResolver;
private readonly ILogger<AmazonS3Store> _logger;
private readonly IServiceProvider _provider;
private readonly S3Settings _settings;
private readonly ConcurrentDictionary<NamespaceId, AmazonStorageBackend> _backends = new ConcurrentDictionary<NamespaceId, AmazonStorageBackend>();
public AmazonS3Store(IAmazonS3 amazonS3, IOptionsMonitor<S3Settings> settings, IBlobIndex blobIndex, INamespacePolicyResolver namespacePolicyResolver, ILogger<AmazonS3Store> logger, IServiceProvider provider)
{
_amazonS3 = amazonS3;
_blobIndex = blobIndex;
_namespacePolicyResolver = namespacePolicyResolver;
_logger = logger;
_provider = provider;
_settings = settings.CurrentValue;
}
AmazonStorageBackend GetBackend(NamespaceId ns)
{
return _backends.GetOrAdd(ns, x => ActivatorUtilities.CreateInstance<AmazonStorageBackend>(_provider, GetBucketName(x)));
}
public async Task<Uri?> GetObjectByRedirectAsync(NamespaceId ns, BlobId identifier)
{
Uri? uri = await GetBackend(ns).GetReadRedirectAsync(identifier.AsS3Key());
return uri;
}
public async Task<BlobMetadata> GetObjectMetadataAsync(NamespaceId ns, BlobId identifier)
{
BlobMetadata? result = await GetBackend(ns).GetMetadataAsync(identifier.AsS3Key());
if (result == null)
{
throw new BlobNotFoundException(ns, identifier);
}
return result;
}
public async Task CopyBlobAsync(NamespaceId ns, NamespaceId targetNamespace, BlobId blobId)
{
string targetBucketName = GetBackend(targetNamespace).GetBucketName();
await GetBackend(ns).CopyBlobAsync(blobId.AsS3Key(), targetBucketName, blobId.AsS3Key());
}
public async Task<Uri?> PutObjectWithRedirectAsync(NamespaceId ns, BlobId identifier)
{
Uri? uri = await GetBackend(ns).GetWriteRedirectAsync(identifier.AsS3Key());
return uri;
}
public async Task<BlobId> PutObjectAsync(NamespaceId ns, ReadOnlyMemory<byte> content, BlobId objectName)
{
await using MemoryStream stream = new MemoryStream(content.ToArray());
return await PutObjectAsync(ns, stream, objectName);
}
public async Task<BlobId> PutObjectAsync(NamespaceId ns, Stream stream, BlobId objectName)
{
await GetBackend(ns).WriteAsync(objectName.AsS3Key(), stream, CancellationToken.None);
return objectName;
}
public async Task<BlobId> PutObjectAsync(NamespaceId ns, byte[] content, BlobId objectName)
{
await using MemoryStream stream = new MemoryStream(content);
return await PutObjectAsync(ns, stream, objectName);
}
private string GetBucketName(NamespaceId ns)
{
try
{
NamespacePolicy policy = _namespacePolicyResolver.GetPoliciesForNs(ns);
string storagePool = policy.StoragePool;
// if the bucket to use for the storage pool has been overriden we use the override
if (_settings.StoragePoolBucketOverride.TryGetValue(storagePool, out string? containerOverride))
{
return containerOverride;
}
// by default we use the storage pool as a suffix to determine the bucket for that pool
string storagePoolSuffix = string.IsNullOrEmpty(storagePool) ? "" : $"-{storagePool}";
return $"{_settings.BucketName}{storagePoolSuffix}";
}
catch (KeyNotFoundException)
{
throw new NamespaceNotFoundException(ns);
}
}
public async Task<BlobContents> GetObjectAsync(NamespaceId ns, BlobId blob, LastAccessTrackingFlags flags = LastAccessTrackingFlags.DoTracking, bool supportsRedirectUri = false)
{
NamespacePolicy policies = _namespacePolicyResolver.GetPoliciesForNs(ns);
try
{
if (supportsRedirectUri && policies.AllowRedirectUris)
{
Uri? redirectUri = await GetBackend(ns).GetReadRedirectAsync(blob.AsS3Key());
if (redirectUri != null)
{
return new BlobContents(redirectUri);
}
}
BlobContents? contents = await GetBackend(ns).TryReadAsync(blob.AsS3Key(), flags);
if (contents == null)
{
throw new BlobNotFoundException(ns, blob);
}
return contents;
}
catch (AmazonS3Exception e)
{
// log information about the failed request except for 404 as its valid to not find objects in S3
if (e.StatusCode != HttpStatusCode.NotFound)
{
_logger.LogWarning("Exception raised from S3 {Exception}. {RequestId} {Id}", e, e.RequestId, e.AmazonId2);
}
// rethrow the exception, we just wanted to log more information about the failed request for further debugging
throw;
}
}
public async Task<bool> ExistsAsync(NamespaceId ns, BlobId blobIdentifier, bool forceCheck)
{
NamespacePolicy policies = _namespacePolicyResolver.GetPoliciesForNs(ns);
if (_settings.UseBlobIndexForExistsCheck && policies.UseBlobIndexForSlowExists && !forceCheck)
{
return await _blobIndex.BlobExistsInRegionAsync(ns, blobIdentifier);
}
else
{
return await GetBackend(ns).ExistsAsync(blobIdentifier.AsS3Key(), CancellationToken.None);
}
}
public async Task DeleteNamespaceAsync(NamespaceId ns)
{
string bucketName = GetBucketName(ns);
try
{
await _amazonS3.DeleteBucketAsync(bucketName);
}
catch (AmazonS3Exception e)
{
// if the bucket does not exist we get a not found status code
if (e.StatusCode == HttpStatusCode.NotFound)
{
// deleting a none existent bucket is a success
return;
}
// something else happened, lets just process it as usual
}
}
public async IAsyncEnumerable<(BlobId, DateTime)> ListObjectsAsync(NamespaceId ns)
{
IStorageBackend backend = GetBackend(ns);
await foreach ((string path, DateTime time) in backend.ListAsync())
{
// ignore objects in the temp prefix
if (path.StartsWith("Temp", StringComparison.OrdinalIgnoreCase))
{
continue;
}
string identifierString = path.Substring(path.LastIndexOf("/", StringComparison.Ordinal) + 1);
yield return (new BlobId(identifierString), time);
}
}
public async Task DeleteObjectAsync(NamespaceId ns, BlobId blobIdentifier)
{
IStorageBackend backend = GetBackend(ns);
await backend.DeleteAsync(blobIdentifier.AsS3Key());
}
public async Task DeleteObjectAsync(IEnumerable<NamespaceId> namespaces, BlobId blob)
{
List<NamespaceId> namespaceIds = namespaces.ToList();
List<string> storagePools = namespaceIds.Select(ns => _namespacePolicyResolver.GetPoliciesForNs(ns).StoragePool).Distinct().ToList();
Dictionary<string, NamespaceId> storagePoolsToClean = storagePools.ToDictionary(storagePool => storagePool, storagePool => namespaceIds.FirstOrDefault(id => _namespacePolicyResolver.GetPoliciesForNs(id).StoragePool == storagePool));
foreach ((string _, NamespaceId ns) in storagePoolsToClean)
{
await GetBackend(ns).DeleteAsync(blob.AsS3Key(), CancellationToken.None);
}
}
#region IMultipartBlobStore
private const string TempMultipartPrefix = "Temp/Multipart";
public Task<string> StartMultipartUploadAsync(NamespaceId ns, string blobName)
{
return GetBackend(ns).StartMultipartUploadAsync($"{TempMultipartPrefix}/{blobName}");
}
public Task CompleteMultipartUploadAsync(NamespaceId ns, string blobName, string uploadId, List<string> partIds)
{
return GetBackend(ns).CompleteMultipartUploadAsync($"{TempMultipartPrefix}/{blobName}", uploadId, partIds);
}
public Task<Uri?> GetWriteRedirectForPartAsync(NamespaceId ns, string blobName, string uploadId, string partIdentifier)
{
return Task.FromResult(GetBackend(ns).GetWriteRedirectForPart($"{TempMultipartPrefix}/{blobName}", uploadId, partIdentifier));
}
public async Task<BlobContents?> GetMultipartObjectByNameAsync(NamespaceId ns, string blobName)
{
return await GetBackend(ns).TryReadAsync($"{TempMultipartPrefix}/{blobName}", LastAccessTrackingFlags.SkipTracking);
}
public async Task RenameMultipartBlobAsync(NamespaceId ns, string blobName, BlobId blobId)
{
await GetBackend(ns).RenameMultipartBlobAsync($"{TempMultipartPrefix}/{blobName}", blobId.AsS3Key());
}
public Task PutMultipartPartAsync(NamespaceId ns, string blobName, string uploadId, string partIdentifier, byte[] blob)
{
return GetBackend(ns).PutMultipartPartAsync($"{TempMultipartPrefix}/{blobName}", uploadId, partIdentifier, blob);
}
public List<MultipartByteRange> GetMultipartRanges(NamespaceId ns, string uploadId, ulong blobLength)
{
ulong countOfChunks = (blobLength / IdealS3ChunkSize) + 1;
if (Math.Min(countOfChunks, MaxS3Chunks) == MaxS3Chunks)
{
throw new Exception("Multipart blob would use more then max chunks in S3, this is not supported");
}
List<MultipartByteRange> parts = new();
ulong firstByte = 0;
ulong lastByte = IdealS3ChunkSize; // last byte is inclusive as per http range requests
for (int i = 1; i < (int)countOfChunks+1; i++)
{
// s3 parts start at 1 and max is 10_000 (inclusive)
// s3 part ids are simply incrementing numbers
parts.Add(new MultipartByteRange() {FirstByte = firstByte, LastByte = lastByte, PartId=i.ToString()});
firstByte += IdealS3ChunkSize;
lastByte = Math.Min(lastByte + IdealS3ChunkSize, blobLength);
}
return parts;
}
public MultipartLimits GetMultipartLimits(NamespaceId ns)
{
return new MultipartLimits { IdealChunkSize = IdealS3ChunkSize, MaxCountOfChunks = (int)MaxS3Chunks, MinChunkSize = 8 * 1024 * 1024 };
}
private const ulong MaxS3Chunks = 10_000;
private const int IdealS3ChunkSize = 32 * 1024 * 1024; // 32 MB parts - this means the largest file we can upload to S3 is 312GB
#endregion
}
public class AmazonStorageBackend : IStorageBackend
{
private readonly IAmazonS3 _amazonS3;
private readonly string _bucketName;
private readonly IOptionsMonitor<S3Settings> _settings;
private readonly Tracer _tracer;
private readonly ILogger<AmazonStorageBackend> _logger;
private readonly BufferedPayloadFactory _payloadFactory;
private bool _bucketExistenceChecked;
private bool _bucketAccessPolicyApplied;
public AmazonStorageBackend(IAmazonS3 amazonS3, string bucketName, IOptionsMonitor<S3Settings> settings, Tracer tracer, ILogger<AmazonStorageBackend> logger, BufferedPayloadFactory payloadFactory)
{
_amazonS3 = amazonS3;
_bucketName = bucketName;
_settings = settings;
_tracer = tracer;
_logger = logger;
_payloadFactory = payloadFactory;
}
public async Task WriteAsync(string path, Stream stream, CancellationToken cancellationToken)
{
if (_settings.CurrentValue.CreateBucketIfMissing)
{
if (!_bucketExistenceChecked)
{
bool bucketExist = await AmazonS3Util.DoesS3BucketExistV2Async(_amazonS3, _bucketName);
if (!bucketExist)
{
PutBucketRequest putBucketRequest = new PutBucketRequest
{
BucketName = _bucketName,
UseClientRegion = true
};
try
{
await _amazonS3.PutBucketAsync(putBucketRequest, cancellationToken);
}
catch (AmazonS3Exception e)
{
if (e.StatusCode == HttpStatusCode.Conflict)
{
// bucket already exists so no need to try and create it, most likely a race condition with another write operation that already tried to create the bucket at the same time in this or another instance
// as this will end up with the bucket existing we can just ignore the error
}
else
{
throw;
}
}
}
_bucketExistenceChecked = true;
}
}
if (_settings.CurrentValue.SetBucketPolicies && !_bucketAccessPolicyApplied)
{
// block all public access to the bucket
try
{
await _amazonS3.PutPublicAccessBlockAsync(new PutPublicAccessBlockRequest
{
BucketName = _bucketName,
PublicAccessBlockConfiguration = new PublicAccessBlockConfiguration()
{
RestrictPublicBuckets = true,
BlockPublicAcls = true,
BlockPublicPolicy = true,
IgnorePublicAcls = true,
}
}, cancellationToken);
_bucketAccessPolicyApplied = true;
}
catch (AmazonS3Exception e)
{
// if a conflicting operation is being applied to the public access block we just ignore it, as it will get reset the next time we run
if (e.StatusCode != HttpStatusCode.Conflict)
{
throw;
}
}
}
if (_settings.CurrentValue.UseMultiPartUpload)
{
await WriteMultipartAsync(path, stream, cancellationToken);
}
else
{
PutObjectRequest request = new PutObjectRequest
{
BucketName = _bucketName,
Key = path,
InputStream = stream,
UseChunkEncoding = _settings.CurrentValue.UseChunkEncoding
};
try
{
await _amazonS3.PutObjectAsync(request, cancellationToken);
}
catch (AmazonS3Exception e)
{
// if the same object is added twice S3 will raise a error, as we are content addressed we can just accept whichever of the objects so we can ignore that error
if (e.StatusCode == HttpStatusCode.Conflict)
{
return;
}
if (e.StatusCode == HttpStatusCode.TooManyRequests)
{
throw new ResourceHasToManyRequestsException(e);
}
throw;
}
}
}
private async Task WriteMultipartAsync(string path, Stream stream, CancellationToken cancellationToken)
{
FilesystemBufferedPayload? payload = null;
try
{
string? filePath = null;
if (stream is FileStream fileStream)
{
filePath = fileStream.Name;
}
// use multipart transfers when buffer is larger than 16 MB, which is also the S3 default
else if (stream.Length > 16 * (long)Math.Pow(2, 20))
{
// will be chunked by TransferUtility
using FilesystemBufferedPayloadWriter writer = _payloadFactory.CreateFilesystemBufferedPayloadWriter("s3-upload");
{
await using Stream writableStream = writer.GetWritableStream();
await stream.CopyToAsync(writableStream, cancellationToken);
}
payload = writer.Done();
filePath = payload.TempFile.FullName;
}
using TransferUtility utility = new TransferUtility(_amazonS3);
try
{
if (filePath != null)
{
await utility.UploadAsync(filePath, _bucketName, path, cancellationToken);
}
else
{
await utility.UploadAsync(stream, _bucketName, path, cancellationToken);
}
}
catch (AmazonS3Exception e)
{
// if the same object is added twice S3 will raise a error, as we are content addressed we can just accept whichever of the objects so we can ignore that error
if (e.StatusCode == HttpStatusCode.Conflict)
{
return;
}
if (e.StatusCode == HttpStatusCode.TooManyRequests)
{
throw new ResourceHasToManyRequestsException(e);
}
throw;
}
}
finally
{
payload?.Dispose();
}
}
private async Task<BlobContents?> GetMultipartAsync(string path, CancellationToken cancellationToken)
{
try
{
GetObjectRequest request = new GetObjectRequest { BucketName = _bucketName, Key = path, PartNumber = 1};
GetObjectResponse response = await _amazonS3.GetObjectAsync(request, cancellationToken);
// parts are only set of blobs that was uploaded using multipart, otherwise part 1 is the whole file
if (response.PartsCount > 1)
{
// object had parts, we download them in parallel
// -1 because parts start at 1
int partsCount = response.PartsCount.Value;
byte[][] parts = new byte[partsCount][];
long[] lengths = new long[partsCount];
try
{
{
parts[0] = ArrayPool<byte>.Shared.Rent((int)response.ContentLength);
lengths[0] = response.ContentLength;
await using MemoryStream ms = new MemoryStream(parts[0]);
await response.ResponseStream.CopyToAsync(ms, cancellationToken);
}
// multipart object, download it in parts, we already have the first part
await Parallel.ForEachAsync(Enumerable.Range(2, partsCount-1), cancellationToken, async (part, token) =>
{
GetObjectRequest partRequest = new GetObjectRequest { BucketName = _bucketName, Key = path, PartNumber = part };
GetObjectResponse partResponse = await _amazonS3.GetObjectAsync(partRequest, cancellationToken);
// the first part was retrieved in the initial request
parts[part-1] = ArrayPool<byte>.Shared.Rent((int)partResponse.ContentLength);
lengths[part-1] = partResponse.ContentLength;
await using MemoryStream ms = new MemoryStream(parts[part-1]);
await partResponse.ResponseStream.CopyToAsync(ms, cancellationToken);
});
long totalSize = lengths.Sum();
if (totalSize < _settings.CurrentValue.MultiPartMaxMemoryBufferSize)
{
Stream s = new MemoryStream(new byte[(int)totalSize]);
for (int i = 0; i < parts.Length; i++)
{
byte[] part = parts[i];
await s.WriteAsync(part, 0, (int)lengths[i], cancellationToken);
}
s.Position = 0;
return new BlobContents(s, totalSize);
}
else
{
// large payload - we have to buffer to file
using FilesystemBufferedPayloadWriter writer = _payloadFactory.CreateFilesystemBufferedPayloadWriter("s3-download");
{
await using Stream s = writer.GetWritableStream();
for (int i = 0; i < parts.Length; i++)
{
byte[] part = parts[i];
await s.WriteAsync(part, 0, (int)lengths[i], cancellationToken);
}
}
return new BlobContents(writer.Done());
}
}
finally
{
for (int i = 0; i < partsCount; i++)
{
ArrayPool<byte>.Shared.Return(parts[i]);
}
}
}
else
{
return new BlobContents(response.ResponseStream, response.ContentLength);
}
}
catch (AmazonS3Exception e)
{
if (e.ErrorCode == "NoSuchKey")
{
return null;
}
if (e.ErrorCode == "NoSuchBucket")
{
return null;
}
if (e.StatusCode == HttpStatusCode.TooManyRequests)
{
throw new ResourceHasToManyRequestsException(e);
}
throw;
}
}
public async Task<BlobContents?> TryReadAsync(string path, LastAccessTrackingFlags flags = LastAccessTrackingFlags.DoTracking, CancellationToken cancellationToken = default)
{
if (_settings.CurrentValue.UseMultiPartDownload)
{
return await GetMultipartAsync(path, cancellationToken);
}
else
{
GetObjectResponse response;
try
{
response = await _amazonS3.GetObjectAsync(_bucketName, path, cancellationToken);
}
catch (AmazonS3Exception e)
{
if (e.ErrorCode == "NoSuchKey")
{
return null;
}
if (e.ErrorCode == "NoSuchBucket")
{
return null;
}
throw;
}
return new BlobContents(response.ResponseStream, response.ContentLength);
}
}
public async Task<bool> ExistsAsync(string path, CancellationToken cancellationToken)
{
try
{
await _amazonS3.GetObjectMetadataAsync(_bucketName, path, cancellationToken);
}
catch (AmazonS3Exception e)
{
// if the object does not exist we get a not found status code
if (e.StatusCode == HttpStatusCode.NotFound)
{
return false;
}
// something else happened, lets just process it as usual
}
return true;
}
public async IAsyncEnumerable<(string, DateTime)> ListAsync([EnumeratorCancellation] CancellationToken cancellationToken)
{
if (_settings.CurrentValue.PerPrefixListing)
{
List<string> hashPrefixes = new List<string>(65536);
int i = 0;
for (int a = 0; a <= byte.MaxValue; a++)
{
for (int b = 0; b <= byte.MaxValue; b++)
{
hashPrefixes.Add(StringUtils.FormatAsHexString(new byte[] { (byte)a, (byte)b }));
i++;
}
}
hashPrefixes.Shuffle();
if (!await AmazonS3Util.DoesS3BucketExistV2Async(_amazonS3, _bucketName))
{
yield break;
}
foreach (string hashPrefix in hashPrefixes)
{
ListObjectsV2Request request = new ListObjectsV2Request
{
BucketName = _bucketName,
Prefix = hashPrefix,
MaxKeys = _settings.CurrentValue.PerPrefixMaxKeys
};
ListObjectsV2Response response;
do
{
response = await _amazonS3.ListObjectsV2Async(request, cancellationToken);
foreach (S3Object obj in response.S3Objects)
{
yield return (obj.Key, obj.LastModified);
}
request.ContinuationToken = response.NextContinuationToken;
} while (response.IsTruncated);
}
}
else
{
if (!await AmazonS3Util.DoesS3BucketExistV2Async(_amazonS3, _bucketName))
{
yield break;
}
ListObjectsV2Request request = new ListObjectsV2Request
{
BucketName = _bucketName
};
ListObjectsV2Response response;
do
{
response = await _amazonS3.ListObjectsV2Async(request, cancellationToken);
foreach (S3Object obj in response.S3Objects)
{
yield return (obj.Key, obj.LastModified);
}
request.ContinuationToken = response.NextContinuationToken;
} while (response.IsTruncated);
}
}
public async Task DeleteAsync(string path, CancellationToken cancellationToken)
{
await _amazonS3.DeleteObjectAsync(_bucketName, path, cancellationToken);
}
public ValueTask<Uri?> GetReadRedirectAsync(string path)
{
return new ValueTask<Uri?>(GetPresignedUrl(path, HttpVerb.GET));
}
public ValueTask<Uri?> GetWriteRedirectAsync(string path)
{
return new ValueTask<Uri?>(GetPresignedUrl(path, HttpVerb.PUT));
}
/// <summary>
/// Helper method to generate a presigned URL for a request
/// </summary>
Uri? GetPresignedUrl(string path, HttpVerb verb, int? partNumber = null, string? uploadId = null)
{
using TelemetrySpan span = _tracer.StartActiveSpan("s3.BuildPresignedUrl")
.SetAttribute("Path", path)
;
try
{
GetPreSignedUrlRequest signedUrlRequest = new GetPreSignedUrlRequest();
signedUrlRequest.BucketName = _bucketName;
signedUrlRequest.Key = path;
signedUrlRequest.Verb = verb;
signedUrlRequest.Protocol = _settings.CurrentValue.AssumeHttpForRedirectUri ? Protocol.HTTP : Protocol.HTTPS;
signedUrlRequest.Expires = DateTime.UtcNow.AddHours(3.0);
if (partNumber.HasValue)
{
signedUrlRequest.PartNumber = partNumber.Value;
}
if (uploadId != null)
{
signedUrlRequest.UploadId = uploadId;
}
string url = _amazonS3.GetPreSignedURL(signedUrlRequest);
return new Uri(url);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Unable to get presigned url for {Path} from S3", path);
return null;
}
}
public async Task<BlobMetadata?> GetMetadataAsync(string path)
{
try
{
GetObjectAttributesResponse? metadata = await _amazonS3.GetObjectAttributesAsync(new GetObjectAttributesRequest
{
BucketName = _bucketName, Key = path, ObjectAttributes = new List<ObjectAttributes>() { ObjectAttributes.ObjectSize }
});
if (metadata == null)
{
return null;
}
return new BlobMetadata(metadata.ObjectSize, metadata.LastModified);
}
catch (AmazonS3Exception e)
{
// if the object does not exist we get a not found status code
if (e.StatusCode == HttpStatusCode.NotFound)
{
return null;
}
throw;
}
catch (XmlException)
{
// Multipart uploaded objects can cause invalid xml objects generated in S3
return null;
}
}
public async Task<string> StartMultipartUploadAsync(string path)
{
InitiateMultipartUploadResponse response = await _amazonS3.InitiateMultipartUploadAsync(new InitiateMultipartUploadRequest {BucketName = _bucketName, Key = path});
return response.UploadId;
}
public async Task CompleteMultipartUploadAsync(string path, string uploadId, List<string> partIds)
{
Dictionary<int, PartETag> etags = new ();
int? partNumberMarker = 0;
for (int i = 0; i < 10; i++)
{
// s3 only allows you to list 1000 parts per request, but supports up to 10_000 parts so we need to potentially list this 10 times to get all parts
ListPartsRequest listPartsRequest = new() {BucketName = _bucketName, Key = path, UploadId = uploadId, PartNumberMarker = partNumberMarker.ToString()};
ListPartsResponse partList = await _amazonS3.ListPartsAsync(listPartsRequest);
foreach (PartDetail? part in partList.Parts)
{
etags.Add(part.PartNumber, new PartETag(part.PartNumber, part.ETag));
}
if (!partList.IsTruncated)
{
break;
}
partNumberMarker = partList.NextPartNumberMarker;
}
// check if any part is missing
List<string> missingPartIds = new();
foreach (string partId in partIds)
{
if (!etags.ContainsKey(int.Parse(partId)))
{
missingPartIds.Add(partId);
}
}
if (missingPartIds.Count > 0)
{
throw new MissingMultipartPartsException(missingPartIds!);
}
await _amazonS3.CompleteMultipartUploadAsync(new CompleteMultipartUploadRequest {BucketName = _bucketName, Key = path, UploadId = uploadId, PartETags = etags.Values.ToList()});
}
public async Task PutMultipartPartAsync(string path, string uploadId, string partIdentifier, byte[] data)
{
await _amazonS3.UploadPartAsync(new UploadPartRequest { BucketName = _bucketName, Key = path, UploadId = uploadId, PartNumber = int.Parse(partIdentifier), InputStream = new MemoryStream(data)});
}
public Uri? GetWriteRedirectForPart(string path, string uploadId, string partIdentifier)
{
Uri? uri = GetPresignedUrl(path, HttpVerb.PUT, partNumber: int.Parse(partIdentifier), uploadId: uploadId);
if (uri != null)
{
return uri;
}
return null;
}
public async Task RenameMultipartBlobAsync(string blobName, string targetBlobName)
{
await CopyBlobAsync(blobName, _bucketName, targetBlobName);
// delete the old multipart object
await _amazonS3.DeleteObjectAsync(_bucketName, blobName);
}
internal string GetBucketName()
{
return _bucketName;
}
public async Task CopyBlobAsync(string path, string targetBucketName, string targetPath)
{
if (string.Equals(targetBucketName, _bucketName, StringComparison.OrdinalIgnoreCase))
{
// copy within the bucket
if (string.Equals(targetPath, path, StringComparison.OrdinalIgnoreCase))
{
// copy to the same object, this already exists so no need to do any work
return;
}
}
GetObjectMetadataResponse metadata = await _amazonS3.GetObjectMetadataAsync(new GetObjectMetadataRequest {BucketName = _bucketName, Key = path, PartNumber = 1} );
if (!metadata.PartsCount.HasValue)
{
// not a multipart object
await _amazonS3.CopyObjectAsync(new CopyObjectRequest()
{
SourceBucket = _bucketName, DestinationBucket = targetBucketName, SourceKey = path, DestinationKey = targetPath,
});
return;
}
InitiateMultipartUploadResponse startMultipartUpload = await _amazonS3.InitiateMultipartUploadAsync(_bucketName, targetPath);
string uploadId = startMultipartUpload.UploadId;
ConcurrentBag<PartETag> parts = new();
long chunkSize = metadata.ContentLength;
long objectLength = long.Parse(metadata.ContentRange.Split('/')[1]);
await Parallel.ForAsync(1, metadata.PartsCount.Value + 1, async(int i, CancellationToken token) =>
{
long startOffset = (i - 1) * chunkSize;
long endOffset = (i * chunkSize)-1; // last byte is exclusive
endOffset = Math.Min(endOffset, objectLength-1);
CopyPartResponse response = await _amazonS3.CopyPartAsync(new CopyPartRequest()
{
SourceBucket = _bucketName,
DestinationBucket = targetBucketName,
PartNumber = i,
SourceKey = path,
DestinationKey = targetPath,
UploadId = uploadId,
FirstByte = startOffset,
LastByte = endOffset,
}, token);
parts.Add(new PartETag(i, response.ETag));
});
await _amazonS3.CompleteMultipartUploadAsync(new CompleteMultipartUploadRequest {BucketName = _bucketName, UploadId = uploadId, Key = targetPath, PartETags = parts.ToList()});
}
}
public static class BlobIdentifierExtensions
{
public static string AsS3Key(this BlobId blobIdentifier)
{
string s = blobIdentifier.ToString();
string prefix = s.Substring(0, 4);
return $"{prefix}/{s}";
}
}
}