// Copyright Epic Games, Inc. All Rights Reserved. using System; using System.Collections; using System.Collections.Generic; using System.Text; using System.Text.Json; using EpicGames.Core; using EpicGames.Horde.Storage; using Microsoft.Extensions.Logging; namespace EpicGames.Horde.Logs { /// /// Read-only buffer for log text, with indexed line offsets. /// [BlobConverter(typeof(LogChunkConverter))] public class LogChunkNode { /// /// Type of this blob when serialized /// public static BlobType BlobType { get; } = new BlobType("{7020B6CA-4174-0F72-AA06-AAB630EFA360}", 1); /// /// Provides access to the lines for this chunk through a list interface /// class LinesCollection : IReadOnlyList { readonly LogChunkNode _owner; public LinesCollection(LogChunkNode owner) => _owner = owner; /// public Utf8String this[int index] => _owner.GetLine(index); /// public int Count => _owner.LineCount; /// public IEnumerator GetEnumerator() { for (int idx = 0; idx < Count; idx++) { yield return _owner.GetLine(idx); } } /// IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); } /// /// Empty log chunk /// public static LogChunkNode Empty { get; } = new LogChunkNode(Array.Empty(), new int[1]); /// /// The raw text data. Contains a complete set of lines followed by newline characters. /// public ReadOnlyMemory Data { get; } /// /// Span for the raw text data. /// public ReadOnlySpan Span => Data.Span; /// /// Accessor for the lines in this chunk /// public IReadOnlyList Lines { get; } /// /// Offsets of lines within the data object, including a sentinel for the end of the data (LineCount + 1 entries). /// public IReadOnlyList LineOffsets { get; } /// /// Length of this chunk /// public int Length => Data.Length; /// /// Number of lines in the block (excluding the sentinel). /// public int LineCount => LineOffsets.Count - 1; /// /// Default constructor /// public LogChunkNode() : this(Empty.Data, Empty.LineOffsets) { } /// /// Constructor /// /// Data to construct from public LogChunkNode(ReadOnlyMemory data) : this(data, FindLineOffsets(data.Span)) { } /// /// Constructor /// internal LogChunkNode(ReadOnlyMemory data, IReadOnlyList lineOffsets) { Data = data; Lines = new LinesCollection(this); LineOffsets = lineOffsets; } /// /// Accessor for an individual line /// /// Index of the line to retrieve /// Line at the given index public Utf8String GetLine(int idx) => new Utf8String(Data.Slice(LineOffsets[idx], LineOffsets[idx + 1] - LineOffsets[idx] - 1)); /// /// Accessor for an individual line, including the trailing newline character /// /// Index of the line to retrieve /// Line at the given index public Utf8String GetLineWithNewline(int idx) => new Utf8String(Data.Slice(LineOffsets[idx], LineOffsets[idx + 1] - LineOffsets[idx])); /// /// Find the line index for a particular offset /// /// Offset within the text /// The line index public int GetLineIndexForOffset(int offset) { int lineIdx = LineOffsets.BinarySearch(offset); if (lineIdx < 0) { lineIdx = ~lineIdx - 1; } return lineIdx; } /// /// Creates a new list of line offsets for the given text /// /// /// public static List FindLineOffsets(ReadOnlySpan data) { List lineOffsets = new List(); lineOffsets.Add(0); UpdateLineOffsets(data, 0, lineOffsets); return lineOffsets; } /// /// Updates the length of this chunk, computing all the newline offsets /// /// Text to search for line endings /// Start offset within the text buffer /// Offsets of each line within the text public static void UpdateLineOffsets(ReadOnlySpan data, int start, List lineOffsets) { for (int idx = start; idx < data.Length; idx++) { if (data[idx] == '\n') { lineOffsets.Add(idx + 1); } } } } /// /// Converter from log chunks to blobs /// class LogChunkConverter : BlobConverter { /// public override LogChunkNode Read(IBlobReader reader, BlobSerializerOptions options) { byte[] data = reader.ReadVariableLengthBytes().ToArray(); return new LogChunkNode(data); } /// public override BlobType Write(IBlobWriter writer, LogChunkNode value, BlobSerializerOptions options) { writer.WriteVariableLengthBytes(value.Data.Span); return LogChunkNode.BlobType; } } /// /// Reference to a chunk of text, with information about its placement in the larger log file /// public class LogChunkRef { /// /// First line within the file /// public int LineIndex { get; } /// /// Number of lines in this block /// public int LineCount { get; } /// /// Offset within the entire log file /// public long Offset { get; } /// /// Length of this chunk /// public int Length { get; } /// /// Handle to the target chunk /// public IHashedBlobRef Target { get; } /// /// Constructor /// /// Index of the first line within this block /// Number of lines in the chunk /// Offset within the log file /// Length of the chunk /// Referenced log text public LogChunkRef(int lineIndex, int lineCount, long offset, int length, IHashedBlobRef target) { LineIndex = lineIndex; LineCount = lineCount; Offset = offset; Length = length; Target = target; } /// /// Deserializing constructor /// /// public LogChunkRef(IBlobReader reader) { Target = reader.ReadBlobRef(); LineIndex = (int)reader.ReadUnsignedVarInt(); LineCount = (int)reader.ReadUnsignedVarInt(); Offset = (long)reader.ReadUnsignedVarInt(); Length = (int)reader.ReadUnsignedVarInt(); } /// public void Serialize(IBlobWriter writer) { writer.WriteBlobRef(Target); writer.WriteUnsignedVarInt(LineIndex); writer.WriteUnsignedVarInt(LineCount); writer.WriteUnsignedVarInt((ulong)Offset); writer.WriteUnsignedVarInt(Length); } } /// /// Builder for objects. /// public class LogChunkBuilder { /// /// Accessor for Data /// byte[] _data; /// /// Current used length of the buffer /// int _length; /// /// Offsets of the start of each line within the data /// readonly List _lineOffsets = new List { 0 }; /// /// Current length of the buffer /// public int Length => _length; /// /// Number of lines in this buffer /// public int LineCount => _lineOffsets.Count - 1; /// /// Capacity of the buffer /// public int Capacity => _data.Length; /// /// Constructor /// public LogChunkBuilder(int maxLength = 64 * 1024) { _data = new byte[maxLength]; } /// /// Constructor /// /// Data to initialize this chunk with. Ownership of this array is transferred to the chunk, and its length determines the chunk size. /// Number of valid bytes within the initial data array public LogChunkBuilder(byte[] data, int length) : this(data, length, LogChunkNode.FindLineOffsets(data.AsSpan(0, length))) { } /// /// Constructor /// /// /// /// private LogChunkBuilder(byte[] data, int length, List lineOffsets) { _data = data; _length = length; _lineOffsets = lineOffsets; } /// /// Clear the contents of the buffer /// public void Clear() { _length = 0; _lineOffsets.RemoveRange(1, _lineOffsets.Count - 1); } /// /// Gets a line at the given index /// /// Index of the line /// Text for the line public Utf8String GetLine(int index) => new Utf8String(_data.AsMemory(_lineOffsets[index], _lineOffsets[index + 1] - _lineOffsets[index])); /// /// Create a new chunk data object with the given data appended. The internal buffers are reused, with the assumption that /// there is no contention over writing to the same location in the chunk. /// /// The data to append /// New chunk data object public void Append(ReadOnlySpan textData) { CreateOutputSpace(textData.Length); textData.CopyTo(_data.AsSpan(_length, textData.Length)); int prevLength = _length; _length += textData.Length; LogChunkNode.UpdateLineOffsets(_data.AsSpan(0, _length), prevLength, _lineOffsets); } /// /// Appends JSON text from another buffer as plain text in this one /// public void AppendJsonAsPlainText(ReadOnlySpan inputLine, ILogger logger) { CreateOutputSpace(inputLine.Length); try { _length = ConvertToPlainText(inputLine, _data, _length); } catch (Exception ex) { inputLine.CopyTo(_data.AsSpan(_length)); _length += inputLine.Length; logger.LogWarning(ex, "Exception while attempting to parse log text as JSON. Line: \"{Line}\"", Encoding.UTF8.GetString(inputLine).Trim()); } _lineOffsets.Add(_length); } /// /// Appends JSON text from another buffer as plain text in this one /// public void AppendJsonAsPlainText(LogChunkNode srcText, int srcLineIndex, int srcLineCount, ILogger logger) { for (int idx = 0; idx < srcLineCount; idx++) { int lineOffset = srcText.LineOffsets[srcLineIndex + idx]; int nextLineOffset = srcText.LineOffsets[srcLineIndex + idx + 1]; ReadOnlySpan inputLine = srcText.Data.Slice(lineOffset, nextLineOffset - lineOffset).Span; AppendJsonAsPlainText(inputLine, logger); } } /// /// Ensure there is a certain amount of space in the output buffer /// /// Required space void CreateOutputSpace(int appendLength) { int requiredLength = _length + appendLength; if (_data.Length < requiredLength) { Array.Resize(ref _data, requiredLength); } } /// /// Determines if the given line is empty /// /// The input data /// True if the given text is empty static bool IsEmptyOrWhitespace(ReadOnlySpan input) { for (int idx = 0; idx < input.Length; idx++) { byte v = input[idx]; if (v != (byte)'\n' && v != '\r' && v != ' ') { return false; } } return true; } /// /// Converts a JSON log line to plain text /// /// The JSON data /// Output buffer for the converted line /// Offset within the buffer to write the converted data /// static int ConvertToPlainText(ReadOnlySpan input, byte[] output, int outputOffset) { if (IsEmptyOrWhitespace(input)) { output[outputOffset] = (byte)'\n'; return outputOffset + 1; } Utf8JsonReader reader = new Utf8JsonReader(input); if (reader.Read() && reader.TokenType == JsonTokenType.StartObject) { while (reader.Read() && reader.TokenType == JsonTokenType.PropertyName) { if (!reader.ValueTextEquals("message")) { reader.Skip(); continue; } if (!reader.Read() || reader.TokenType != JsonTokenType.String) { reader.Skip(); continue; } int unescapedLength = UnescapeUtf8(reader.ValueSpan, output.AsSpan(outputOffset)); outputOffset += unescapedLength; output[outputOffset] = (byte)'\n'; outputOffset++; break; } } return outputOffset; } /// /// Unescape a json utf8 string /// /// Source span of bytes /// Target span of bytes /// Length of the converted data static int UnescapeUtf8(ReadOnlySpan source, Span target) { int length = 0; for (; ; ) { // Copy up to the next backslash int backslash = source.IndexOf((byte)'\\'); if (backslash == -1) { source.CopyTo(target); length += source.Length; break; } else if (backslash > 0) { source.Slice(0, backslash).CopyTo(target); source = source.Slice(backslash); target = target.Slice(backslash); length += backslash; } // Check what the escape code is if (source[1] == 'u') { char[] chars = { (char)((StringUtils.ParseHexByte(source, 2) << 8) | StringUtils.ParseHexByte(source, 4)) }; int encodedLength = Encoding.UTF8.GetBytes(chars.AsSpan(), target); source = source.Slice(6); target = target.Slice(encodedLength); length += encodedLength; } else { target[0] = source[1] switch { (byte)'\"' => (byte)'\"', (byte)'\\' => (byte)'\\', (byte)'b' => (byte)'\b', (byte)'f' => (byte)'\f', (byte)'n' => (byte)'\n', (byte)'r' => (byte)'\r', (byte)'t' => (byte)'\t', _ => source[1] }; source = source.Slice(2); target = target.Slice(1); length++; } } return length; } /// /// Shrinks the data allocated by this chunk to the minimum required /// public void Shrink() { if (_data.Length > _length) { Array.Resize(ref _data, _length); } } /// /// Create an array of lines from the text /// /// Array of lines public Utf8String[] ToArray() { Utf8String[] lines = new Utf8String[LineCount]; for (int idx = 0; idx < LineCount; idx++) { lines[idx] = new Utf8String(_data, _lineOffsets[idx], _lineOffsets[idx + 1] - 1).Clone(); } return lines; } /// /// Create a object from the current state /// /// public LogChunkNode ToLogChunk() => new LogChunkNode(_data.AsMemory(0, _length).ToArray(), _lineOffsets.ToArray()); } /// /// Builds a sequence of log chunks /// class LogChunkSequenceBuilder { readonly List _chunks = new List(); readonly LogChunkBuilder _nextChunkBuilder; int _flushedLength; int _flushedLineCount; /// /// Desired size for each chunk /// public int ChunkSize { get; } /// /// The complete chunks. Note that this does not include data which has not yet been flushed. /// public IReadOnlyList Chunks => _chunks; /// /// Total length of the sequence /// public int Length => _flushedLength + _nextChunkBuilder.Length; /// /// Number of lines in this builder /// public int LineCount => _flushedLineCount + _nextChunkBuilder.LineCount; /// /// Constructor /// /// Desired size for each chunk. Each chunk will be limited to this size. public LogChunkSequenceBuilder(int chunkSize) { ChunkSize = chunkSize; _nextChunkBuilder = new LogChunkBuilder(chunkSize); } /// /// Clear the current contents of the buffer /// public void Clear() { _chunks.Clear(); _nextChunkBuilder.Clear(); _flushedLength = 0; _flushedLineCount = 0; } /// /// Remove a number of chunks from the start of the builder /// /// Number of chunks to remove public void Remove(int count) { for (int idx = 0; idx < count; idx++) { LogChunkNode chunk = _chunks[idx]; _flushedLength -= chunk.Length; _flushedLineCount -= chunk.LineCount; } _chunks.RemoveRange(0, count); } /// public void Append(ReadOnlySpan textData) { if (textData.Length == 0) { return; } if (textData[^1] != (byte)'\n') { throw new ArgumentException("Text data to append must end with a newline", nameof(textData)); } while (textData.Length > 0) { ReadOnlySpan lineData = textData.Slice(0, textData.IndexOf((byte)'\n') + 1); CreateOutputSpace(lineData.Length); _nextChunkBuilder.Append(lineData); textData = textData.Slice((int)lineData.Length); } } /// public void AppendJsonAsPlainText(ReadOnlySpan textData, ILogger logger) { if (textData.Length == 0) { return; } if (textData[^1] != (byte)'\n') { throw new ArgumentException("Text data to append must end with a newline", nameof(textData)); } while (textData.Length > 0) { ReadOnlySpan lineData = textData.Slice(0, textData.IndexOf((byte)'\n') + 1); CreateOutputSpace(lineData.Length); _nextChunkBuilder.AppendJsonAsPlainText(lineData, logger); textData = textData.Slice((int)lineData.Length); } } /// /// Flushes the current contents of the builder /// public void Flush() { if (_nextChunkBuilder.Length > 0) { LogChunkNode nextChunk = _nextChunkBuilder.ToLogChunk(); _chunks.Add(nextChunk); _nextChunkBuilder.Clear(); _flushedLength += nextChunk.Length; _flushedLineCount += nextChunk.LineCount; } } /// /// Enumerate lines starting at the given index /// /// Index to start from /// Sequence of lines public IEnumerable EnumerateLines(int startIdx = 0) { int lineIdx = startIdx; foreach (LogChunkNode chunk in _chunks) { for (; lineIdx < chunk.LineCount; lineIdx++) { yield return chunk.GetLineWithNewline(lineIdx); } lineIdx -= chunk.LineCount; } for (; lineIdx < _nextChunkBuilder.LineCount; lineIdx++) { yield return _nextChunkBuilder.GetLine(lineIdx); } } /// /// Flushes the current chunk if necessary to provide the requested space /// /// Space required in void CreateOutputSpace(int requiredSpace) { if (_nextChunkBuilder.Length + requiredSpace > ChunkSize) { Flush(); } } } /// /// Extension methods for ILogText /// public static class LogChunkExtensions { /// /// Gets the chunk index containing the given offset. /// /// The chunks to search /// The offset to search for /// The chunk index containing the given offset public static int GetChunkForOffset(this IReadOnlyList chunks, long offset) { int chunkIndex = chunks.BinarySearch(x => x.Offset, offset); if (chunkIndex < 0) { chunkIndex = ~chunkIndex - 1; } return chunkIndex; } /// /// Gets the starting chunk index for the given line /// /// The chunks to search /// Index of the line to query /// Index of the chunk to fetch public static int GetChunkForLine(this IReadOnlyList chunks, int lineIndex) { int chunkIndex = chunks.BinarySearch(x => x.LineIndex, lineIndex); if (chunkIndex < 0) { chunkIndex = ~chunkIndex - 1; } return chunkIndex; } /// /// Converts a log text instance to plain text /// /// The text to convert /// Logger for conversion warnings /// The plain text instance public static LogChunkNode ConvertJsonToPlainText(this LogChunkNode logText, ILogger logger) { LogChunkBuilder other = new LogChunkBuilder(); other.AppendJsonAsPlainText(logText, 0, logText.LineCount, logger); return other.ToLogChunk(); } } }