Files
UnrealEngine/Engine/Source/Programs/Horde/HordeServer/Server/ServerStatusService.cs
2025-05-18 13:04:45 +08:00

210 lines
6.6 KiB
C#

// Copyright Epic Games, Inc. All Rights Reserved.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
using EpicGames.Core;
using Microsoft.Extensions.Diagnostics.HealthChecks;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using StackExchange.Redis;
namespace HordeServer.Server;
internal class HealthMonitor<T> : IHealthMonitor<T>
{
private readonly ServerStatusService _statusService;
private string _name;
public HealthMonitor(ServerStatusService statusService) : this(statusService, typeof(T).Name)
{
}
public HealthMonitor(ServerStatusService statusService, string name)
{
_statusService = statusService;
_name = name;
}
public void SetName(string name)
{
_name = name;
}
public async Task UpdateAsync(HealthStatus result, string? message, DateTimeOffset? timestamp)
{
await _statusService.ReportAsync(typeof(T), _name, result, message, timestamp);
}
}
/// <summary>
/// Represents status of a subsystem inside Horde
/// </summary>
/// <param name="Id">Unique ID</param>
/// <param name="Name">Human-readable name</param>
/// <param name="Updates">List of updates</param>
public record SubsystemStatus(string Id, string Name, List<SubsystemStatusUpdate> Updates)
{
/// <inheritdoc/>
public override string ToString()
{
string updates = Updates.Count > 0 ? Updates.First().ToString() : "<no updates>";
return $"Subsystem(Id={Id} Name={Name} LastUpdate={updates})";
}
}
/// <summary>
/// An individual status update for a subsystem
/// </summary>
/// <param name="Result"></param>
/// <param name="Message"></param>
/// <param name="UpdatedAt"></param>
public record SubsystemStatusUpdate(HealthStatus Result, string? Message, DateTimeOffset UpdatedAt);
/// <summary>
/// Tracks health and status of the Horde server itself
/// Such as connectivity to external systems (MongoDB, Redis, Perforce etc).
/// </summary>
public class ServerStatusService : IHostedService
{
/// <summary>
/// Max historical status updates to keep
/// </summary>
public const int MaxHistoryLength = 10;
private readonly IClock _clock;
private readonly IMongoService _mongoService;
private readonly IRedisService _redisService;
private readonly IHealthMonitor<MongoService> _mongoDbHealth;
private readonly ITicker _mongoDbHealthTicker;
private readonly IHealthMonitor<RedisService> _redisHealth;
private readonly ITicker _redisHealthTicker;
private static string RedisHashKey() => "server-status";
/// <summary>
/// Constructor
/// </summary>
/// <param name="redisService"></param>
/// <param name="clock"></param>
/// <param name="mongoService"></param>
/// <param name="logger"></param>
public ServerStatusService(IMongoService mongoService, IRedisService redisService, IClock clock, ILogger<ServerStatusService> logger)
{
_mongoService = mongoService;
_redisService = redisService;
_clock = clock;
_mongoDbHealth = new HealthMonitor<MongoService>(this, "MongoDB");
_mongoDbHealthTicker = clock.AddTicker($"{nameof(ServerStatusService)}.MongoDb", TimeSpan.FromSeconds(30.0), UpdateMongoDbHealthAsync, logger);
_redisHealth = new HealthMonitor<RedisService>(this, "Redis");
_redisHealthTicker = clock.AddTicker($"{nameof(ServerStatusService)}.Redis", TimeSpan.FromSeconds(30.0), UpdateRedisHealthAsync, logger);
}
/// <inheritdoc/>
public async Task StartAsync(CancellationToken cancellationToken)
{
await _mongoDbHealthTicker.StartAsync();
await _redisHealthTicker.StartAsync();
}
/// <inheritdoc/>
public async Task StopAsync(CancellationToken cancellationToken)
{
await _mongoDbHealthTicker.StopAsync();
await _redisHealthTicker.StopAsync();
}
/// <summary>
/// Checks health and connectivity to MongoDB database
/// </summary>
internal async ValueTask UpdateMongoDbHealthAsync(CancellationToken cancellationToken)
{
HealthCheckResult result = await _mongoService.CheckHealthAsync(new HealthCheckContext(), cancellationToken);
await _mongoDbHealth.UpdateAsync(result.Status, result.Description);
}
/// <summary>
/// Checks health and connectivity to Redis database
/// </summary>
internal async ValueTask UpdateRedisHealthAsync(CancellationToken cancellationToken)
{
HealthCheckResult result = await _redisService.CheckHealthAsync(new HealthCheckContext(), cancellationToken);
await _redisHealth.UpdateAsync(result.Status, result.Description);
}
/// <summary>
/// Report a status update for a given subsystem
/// </summary>
/// <param name="type">Service type reporting health</param>
/// <param name="name">Human-readable name</param>
/// <param name="result">Result of the update</param>
/// <param name="message">Human-readable message</param>
/// <param name="timestamp">Optional timestamp to be associated with the report. Defaults to UtcNow</param>
public async Task ReportAsync(Type type, string name, HealthStatus result, string? message = null, DateTimeOffset? timestamp = null)
{
string id = type.Name;
IDatabase redis = _redisService.GetDatabase();
SubsystemStatus status = await GetSubsystemStatusFromRedisAsync(redis, id, name);
SubsystemStatusUpdate update = new(result, message, timestamp ?? _clock.UtcNow);
status.Updates.Add(update);
status.Updates.Sort((a, b) => b.UpdatedAt.CompareTo(a.UpdatedAt));
if (status.Updates.Count > MaxHistoryLength)
{
status.Updates.RemoveRange(MaxHistoryLength, status.Updates.Count - MaxHistoryLength);
}
string data = JsonSerializer.Serialize(status);
if (!_redisService.ReadOnlyMode)
{
await redis.HashSetAsync(RedisHashKey(), id, data);
}
}
private static async Task<SubsystemStatus> GetSubsystemStatusFromRedisAsync(IDatabase redis, string id, string name)
{
try
{
string? rawJson = await redis.HashGetAsync(RedisHashKey(), id);
if (rawJson != null)
{
return JsonSerializer.Deserialize<SubsystemStatus>(rawJson) ?? throw new JsonException("Unable to parse JSON: " + rawJson);
}
}
catch (Exception)
{
// Ignored
}
return new SubsystemStatus(id, name, []);
}
/// <summary>
/// Get a list of status and updates for each subsystem
/// </summary>
/// <returns>A list of statuses</returns>
public async Task<IReadOnlyList<SubsystemStatus>> GetSubsystemStatusesAsync()
{
HashEntry[] entries = await _redisService.GetDatabase().HashGetAllAsync(RedisHashKey());
List<SubsystemStatus> subsystems = [];
foreach (HashEntry entry in entries)
{
try
{
SubsystemStatus status = JsonSerializer.Deserialize<SubsystemStatus>(entry.Value.ToString()) ?? throw new JsonException("Failed parsing JSON");
subsystems.Add(status);
}
catch (JsonException) { /* Ignored */ }
}
return subsystems;
}
}