// Copyright Epic Games, Inc. All Rights Reserved. using System; using System.Collections.Generic; using System.Linq; using System.Text.Json; using System.Threading; using System.Threading.Tasks; using EpicGames.Core; using Microsoft.Extensions.Diagnostics.HealthChecks; using Microsoft.Extensions.Hosting; using Microsoft.Extensions.Logging; using StackExchange.Redis; namespace HordeServer.Server; internal class HealthMonitor : IHealthMonitor { private readonly ServerStatusService _statusService; private string _name; public HealthMonitor(ServerStatusService statusService) : this(statusService, typeof(T).Name) { } public HealthMonitor(ServerStatusService statusService, string name) { _statusService = statusService; _name = name; } public void SetName(string name) { _name = name; } public async Task UpdateAsync(HealthStatus result, string? message, DateTimeOffset? timestamp) { await _statusService.ReportAsync(typeof(T), _name, result, message, timestamp); } } /// /// Represents status of a subsystem inside Horde /// /// Unique ID /// Human-readable name /// List of updates public record SubsystemStatus(string Id, string Name, List Updates) { /// public override string ToString() { string updates = Updates.Count > 0 ? Updates.First().ToString() : ""; return $"Subsystem(Id={Id} Name={Name} LastUpdate={updates})"; } } /// /// An individual status update for a subsystem /// /// /// /// public record SubsystemStatusUpdate(HealthStatus Result, string? Message, DateTimeOffset UpdatedAt); /// /// Tracks health and status of the Horde server itself /// Such as connectivity to external systems (MongoDB, Redis, Perforce etc). /// public class ServerStatusService : IHostedService { /// /// Max historical status updates to keep /// public const int MaxHistoryLength = 10; private readonly IClock _clock; private readonly IMongoService _mongoService; private readonly IRedisService _redisService; private readonly IHealthMonitor _mongoDbHealth; private readonly ITicker _mongoDbHealthTicker; private readonly IHealthMonitor _redisHealth; private readonly ITicker _redisHealthTicker; private static string RedisHashKey() => "server-status"; /// /// Constructor /// /// /// /// /// public ServerStatusService(IMongoService mongoService, IRedisService redisService, IClock clock, ILogger logger) { _mongoService = mongoService; _redisService = redisService; _clock = clock; _mongoDbHealth = new HealthMonitor(this, "MongoDB"); _mongoDbHealthTicker = clock.AddTicker($"{nameof(ServerStatusService)}.MongoDb", TimeSpan.FromSeconds(30.0), UpdateMongoDbHealthAsync, logger); _redisHealth = new HealthMonitor(this, "Redis"); _redisHealthTicker = clock.AddTicker($"{nameof(ServerStatusService)}.Redis", TimeSpan.FromSeconds(30.0), UpdateRedisHealthAsync, logger); } /// public async Task StartAsync(CancellationToken cancellationToken) { await _mongoDbHealthTicker.StartAsync(); await _redisHealthTicker.StartAsync(); } /// public async Task StopAsync(CancellationToken cancellationToken) { await _mongoDbHealthTicker.StopAsync(); await _redisHealthTicker.StopAsync(); } /// /// Checks health and connectivity to MongoDB database /// internal async ValueTask UpdateMongoDbHealthAsync(CancellationToken cancellationToken) { HealthCheckResult result = await _mongoService.CheckHealthAsync(new HealthCheckContext(), cancellationToken); await _mongoDbHealth.UpdateAsync(result.Status, result.Description); } /// /// Checks health and connectivity to Redis database /// internal async ValueTask UpdateRedisHealthAsync(CancellationToken cancellationToken) { HealthCheckResult result = await _redisService.CheckHealthAsync(new HealthCheckContext(), cancellationToken); await _redisHealth.UpdateAsync(result.Status, result.Description); } /// /// Report a status update for a given subsystem /// /// Service type reporting health /// Human-readable name /// Result of the update /// Human-readable message /// Optional timestamp to be associated with the report. Defaults to UtcNow public async Task ReportAsync(Type type, string name, HealthStatus result, string? message = null, DateTimeOffset? timestamp = null) { string id = type.Name; IDatabase redis = _redisService.GetDatabase(); SubsystemStatus status = await GetSubsystemStatusFromRedisAsync(redis, id, name); SubsystemStatusUpdate update = new(result, message, timestamp ?? _clock.UtcNow); status.Updates.Add(update); status.Updates.Sort((a, b) => b.UpdatedAt.CompareTo(a.UpdatedAt)); if (status.Updates.Count > MaxHistoryLength) { status.Updates.RemoveRange(MaxHistoryLength, status.Updates.Count - MaxHistoryLength); } string data = JsonSerializer.Serialize(status); if (!_redisService.ReadOnlyMode) { await redis.HashSetAsync(RedisHashKey(), id, data); } } private static async Task GetSubsystemStatusFromRedisAsync(IDatabase redis, string id, string name) { try { string? rawJson = await redis.HashGetAsync(RedisHashKey(), id); if (rawJson != null) { return JsonSerializer.Deserialize(rawJson) ?? throw new JsonException("Unable to parse JSON: " + rawJson); } } catch (Exception) { // Ignored } return new SubsystemStatus(id, name, []); } /// /// Get a list of status and updates for each subsystem /// /// A list of statuses public async Task> GetSubsystemStatusesAsync() { HashEntry[] entries = await _redisService.GetDatabase().HashGetAllAsync(RedisHashKey()); List subsystems = []; foreach (HashEntry entry in entries) { try { SubsystemStatus status = JsonSerializer.Deserialize(entry.Value.ToString()) ?? throw new JsonException("Failed parsing JSON"); subsystems.Add(status); } catch (JsonException) { /* Ignored */ } } return subsystems; } }