// Copyright Epic Games, Inc. All Rights Reserved. using System; using System.Collections.Generic; using System.Text; namespace EpicGames.Horde.Logs { /// /// Stores cached information about a utf8 search term /// public class SearchTerm { /// /// The search text /// public string Text { get; } /// /// The utf-8 bytes to search for /// public ReadOnlyMemory Bytes { get; } /// /// Normalized (lowercase) utf-8 bytes to search for /// readonly byte[] _searchBytes; /// /// Skip table for comparisons /// readonly byte[] _skipTable; /// /// Constructor /// /// The text to search for public SearchTerm(string text) { byte[] bytes = Encoding.UTF8.GetBytes(text); Bytes = bytes; Text = text; // Find the byte sequence to search for, in lowercase _searchBytes = new byte[bytes.Length]; for (int idx = 0; idx < _searchBytes.Length; idx++) { if (bytes[idx] >= 'A' && bytes[idx] <= 'Z') { _searchBytes[idx] = (byte)('a' + (bytes[idx] - 'A')); } else { _searchBytes[idx] = bytes[idx]; } } // Build a table indicating how many characters to skip before attempting the next comparison _skipTable = new byte[256]; for (int idx = 0; idx < 256; idx++) { _skipTable[idx] = (byte)_searchBytes.Length; } for (int idx = 0; idx < _searchBytes.Length - 1; idx++) { byte character = _searchBytes[idx]; byte skipBytes = (byte)(_searchBytes.Length - 1 - idx); _skipTable[character] = skipBytes; if (character >= 'a' && character <= 'z') { _skipTable['A' + (character - 'a')] = skipBytes; } } } /// /// Find all ocurrences of the text in the given buffer /// /// The buffer to search /// The text to search for /// Sequence of offsets within the buffer public static IEnumerable FindOcurrences(ReadOnlyMemory buffer, SearchTerm text) { for (int offset = 0; ; offset++) { offset = FindNextOcurrence(buffer.Span, offset, text); if (offset == -1) { break; } yield return offset; } } /// /// Perform a case insensitive search for the next occurerence of the search term in a given buffer /// /// The buffer to search /// Starting offset for the search /// The text to search for /// Offset of the next occurence, or -1 public static int FindNextOcurrence(ReadOnlySpan buffer, int offset, SearchTerm text) { while (offset + text._searchBytes.Length <= buffer.Length) { if (Matches(buffer, offset, text)) { return offset; } else { offset += text._skipTable[buffer[offset + text._searchBytes.Length - 1]]; } } return -1; } /// /// Compare the search term against the given buffer /// /// The buffer to search /// Starting offset for the search /// The text to search for /// True if the text matches, false otherwise public static bool Matches(ReadOnlySpan buffer, int offset, SearchTerm text) { for (int idx = text._searchBytes.Length - 1; idx >= 0; idx--) { byte character = buffer[offset + idx]; if (character >= 'A' && character <= 'Z') { character = (byte)('a' + (character - 'A')); } if (character != text._searchBytes[idx]) { return false; } } return true; } } /// /// Stores cached information about a utf8 search term /// public static class SearchTextExtensions { /// /// Find all ocurrences of the text in the given buffer /// /// The buffer to search /// The text to search for /// Sequence of offsets within the buffer public static IEnumerable FindOcurrences(this ReadOnlyMemory buffer, SearchTerm text) { return SearchTerm.FindOcurrences(buffer, text); } /// /// Perform a case sensitive search for the next occurerence of the search term in a given buffer /// /// The buffer to search /// Starting offset for the search /// The text to search for /// Offset of the next occurence, or -1 public static int FindNextOcurrence(this ReadOnlySpan buffer, int offset, SearchTerm text) { return SearchTerm.FindNextOcurrence(buffer, offset, text); } /// /// Compare the search term against the given buffer /// /// The buffer to search /// Starting offset for the search /// The text to search for /// True if the text matches, false otherwise public static bool Matches(this ReadOnlySpan buffer, int offset, SearchTerm text) { return SearchTerm.Matches(buffer, offset, text); } } }