// Copyright Epic Games, Inc. All Rights Reserved. using System; using System.Collections; using System.Collections.Generic; using System.Runtime.InteropServices; using EpicGames.Core; namespace EpicGames.Horde.Logs { /// /// A sparse, space-efficient set of 64-bit values. Implemented as a trie backed by a flat lookup table. /// /// Each 64-bit value in the set is decomposed into 4-bit fragments, and each node in the trie contains a 2^4=16-bit mask /// indicating which child nodes exist. The array of nodes is stored in a flat buffer, with a predictable order, with /// the children of a particular node stored contiguously, breadth first. /// /// Doing so allows constructing a lookup table for the first child of each parent node with a single pass of /// the buffer, allowing efficient traversal of the tree to satisfy queries. /// /// In practice, only the top 32-bits of values stored in the trie are used for encoding ngram information. The /// bottom 32 bits are used to index a block number, allowing querying the existence of ngrams and their /// rough location. /// public class NgramSet : IEnumerable { /// /// Stack item for traversing the tree /// struct StackItem { /// /// The current node index /// public int _index; /// /// Value in the current node (0-15) /// public int _value; } /// /// Delegate for filtering values during a tree traversal /// /// The current value /// Mask for which bits in the value are valid /// True if values matching the given mask should be enumerated public delegate bool VisitorDelegate(ulong value, ulong mask); /// /// Height of the tree /// const int Height = sizeof(ulong) * 2; /// /// Array of bitmasks for each node in the tree /// public IReadOnlyList NodeData => _nodeData; /// /// Array of bitmasks for each node in the tree /// readonly ushort[] _nodeData; /// /// Array of child offsets for each node. Excludes the last layer of the tree. /// readonly int[] _firstChildIndex; /// /// Empty index definition /// public static NgramSet Empty { get; } = new NgramSet(new ushort[1]); /// /// Constructor /// /// Node data public NgramSet(ushort[] nodeData) { _nodeData = nodeData; _firstChildIndex = CreateChildLookup(nodeData); } /// /// Tests whether the given value is in the trie /// /// The value to check for /// True if the value is in the trie public bool Contains(ulong value) { int index = 0; for (int shift = (sizeof(ulong) * 8) - 4; shift >= 0; shift -= 4) { int mask = _nodeData[index]; int flag = 1 << (int)((value >> shift) & 15); if ((mask & flag) == 0) { return false; } index = _firstChildIndex[index]; for (; ; ) { mask &= (mask - 1); if ((mask & flag) == 0) { break; } index++; } } return true; } /// public IEnumerator GetEnumerator() { return EnumerateRange(UInt64.MinValue, UInt64.MaxValue).GetEnumerator(); } /// IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); /// /// Enumerate all values matching a given filter /// /// Predicate for which values to include /// Values satisfying the given predicate public IEnumerable EnumerateValues(VisitorDelegate predicate) { int depth = 0; ulong value = 0; StackItem[] stack = new StackItem[Height]; stack[1]._index = _firstChildIndex[0]; for (; ; ) { StackItem current = stack[depth]; if (current._value >= 16) { // Move up the tree if we've enumerated all the branches at the current level depth--; if (depth < 0) { yield break; } stack[depth]._value++; // Increment the child index too. These are stored sequentially for a given parent. The value will be cleared when we recurse into it. stack[depth + 1]._index++; } else if ((_nodeData[current._index] & (1 << current._value)) == 0) { // This branch does not exist. Skip it. stack[depth]._value++; } else { // Get the value and mask for the current node int shift = (stack.Length - depth - 1) * 4; ulong mask = ~((1UL << shift) - 1); value = (value & (mask << 4)) | ((ulong)(uint)current._value << shift); if (!predicate(value, mask)) { // This node is excluded, skip it stack[depth]._value++; if (depth + 1 < stack.Length) { stack[depth + 1]._index++; } } else if (depth + 1 < stack.Length) { // Move down the tree depth++; stack[depth]._value = 0; if (depth + 1 < stack.Length) { stack[depth + 1]._index = _firstChildIndex[stack[depth]._index]; } } else { // Yield the current value yield return value; stack[depth]._value++; } } } } /// /// Enumerates all values in the trie between the given ranges /// /// Minimum value to enumerate /// Maximum value to enumerate /// Sequence of values public IEnumerable EnumerateRange(ulong minValue, ulong maxValue) { return EnumerateValues((value, mask) => (value >= (minValue & mask) && value <= (maxValue & mask))); } /// /// Creates a lookup for child node offsets from raw node data /// /// Array of masks for each node /// Array of offsets static int[] CreateChildLookup(ushort[] nodeData) { List childOffsets = new List(); if (nodeData.Length > 0) { int nodeCount = 1; int index = 0; int childIndex = nodeCount; for (int level = 0; level < Height; level++) { int nextNodeCount = 0; for (int idx = 0; idx < nodeCount; idx++) { ushort node = nodeData[index++]; int numChildren = CountBits(node); childOffsets.Add(childIndex); childIndex += numChildren; nextNodeCount += numChildren; } nodeCount = nextNodeCount; } } return childOffsets.ToArray(); } /// /// Count the number of set bits in the given value /// /// Value to test /// Number of set bits static int CountBits(ushort value) { int count = value; count = (count & 0b0101010101010101) + ((count >> 1) & 0b0101010101010101); count = (count & 0b0011001100110011) + ((count >> 2) & 0b0011001100110011); count = (count & 0b0000111100001111) + ((count >> 4) & 0b0000111100001111); count = (count & 0b0000000011111111) + ((count >> 8) & 0b0000000011111111); return count; } /// /// Read a trie from the given buffer /// /// Reader to read from /// New trie public static NgramSet Read(IMemoryReader reader) { ReadOnlyMemory nodes = reader.ReadVariableLengthBytesWithInt32Length(); ushort[] nodeData = MemoryMarshal.Cast(nodes.Span).ToArray(); return new NgramSet(nodeData); } /// /// Write this trie to the given buffer /// /// Writer to output to public void Write(IMemoryWriter writer) { writer.WriteVariableLengthBytesWithInt32Length(MemoryMarshal.AsBytes(_nodeData)); } /// /// Gets the serialized size of this trie /// /// public int GetSerializedSize() { return (sizeof(int) + _nodeData.Length * sizeof(ushort)); } } /// /// Extension methods for serializing tries /// public static class NgramSetExtensions { /// /// Read a trie from the given buffer /// /// Reader to read from /// New trie public static NgramSet ReadNgramSet(this IMemoryReader reader) { return NgramSet.Read(reader); } /// /// Write this trie to the given buffer /// /// Writer to output to /// Trie to write public static void WriteNgramSet(this IMemoryWriter writer, NgramSet set) { set.Write(writer); } } }