// Copyright Epic Games, Inc. All Rights Reserved. using System; using System.Buffers.Binary; using System.Diagnostics; using System.IO; using System.IO.MemoryMappedFiles; using System.Runtime.InteropServices; using System.Text; using System.Threading; using System.Threading.Tasks; using EpicGames.Core; using Microsoft.Extensions.Logging; namespace EpicGames.Horde.Symbols { /// /// Provides functionality for hashing files to add to a Microsoft Symbol store. /// /// * For PE-format files (EXE, DLL), this consists of the /// * For PDB files, this consists of the PDB GUID followed by its age (the number of times it has been written). /// public static class SymStore { /// /// Gets the hash of a file. /// /// File to hash /// Logger for diagnostic messages /// Cancellation token for the operation /// Hash of the file, or null if it cannot be parsed. public static Task GetHashAsync(FileReference file, ILogger logger, CancellationToken cancellationToken = default) { try { if (file.HasExtension(".exe") || file.HasExtension(".dll")) { return GetExeHashAsync(file, cancellationToken); } else if (file.HasExtension(".pdb")) { return Task.FromResult(GetPdbHash(file)); } else { return Task.FromResult(null); } } catch (Exception ex) { logger.LogError(ex, "Unable to get hash for {File}: {Message}", file, ex.Message); return Task.FromResult(null); } } #region PE files /// /// Gets the hash for a Windows portable executable file, consisting of concatenated hex values for the timestamp and image size fields in the header. /// Structures referenced below (IMAGE_DOS_HEADER, IMAGE_NT_HEADERS) are defined in Windows headers, but are parsed via offsets into the executable /// to reduce messy marshalling in C#. /// static async Task GetExeHashAsync(FileReference file, CancellationToken cancellationToken) { using FileStream stream = FileReference.Open(file, FileMode.Open, FileAccess.Read, FileShare.Read | FileShare.Delete); // Read the IMAGE_DOS_HEADER structure. const int SizeOf_IMAGE_DOS_HEADER = 64; const int OffsetOf_IMAGE_DOS_HEADER_Magic = 0; const int OffsetOf_IMAGE_DOS_HEADER_LfaNew = 0x3c; byte[] dosHeader = new byte[SizeOf_IMAGE_DOS_HEADER]; if (await stream.ReadAsync(dosHeader, 0, dosHeader.Length, cancellationToken) != dosHeader.Length) { return null; } ushort magic = BinaryPrimitives.ReadUInt16LittleEndian(dosHeader.AsSpan(OffsetOf_IMAGE_DOS_HEADER_Magic)); if (magic != (ushort)('M' | ('Z' << 8))) { return null; } uint lfaNew = BinaryPrimitives.ReadUInt32LittleEndian(dosHeader.AsSpan(OffsetOf_IMAGE_DOS_HEADER_LfaNew)); stream.Seek(lfaNew, SeekOrigin.Begin); // Read the first part of the IMAGE_NT_HEADERS structure (just including the signature and file header) const int SizeOf_IMAGE_NT_HEADERS = 84; const int OffsetOf_IMAGE_NT_HEADERS_Signature = 0; const int OffsetOf_IMAGE_NT_HEADERS_TimeDateStamp = 8; const int OffsetOf_IMAGE_NT_HEADERS_SizeOfImage = 80; byte[] ntHeader = new byte[SizeOf_IMAGE_NT_HEADERS]; if (await stream.ReadAsync(ntHeader, 0, ntHeader.Length, cancellationToken) != ntHeader.Length) { return null; } uint signature = BinaryPrimitives.ReadUInt16LittleEndian(ntHeader.AsSpan(OffsetOf_IMAGE_NT_HEADERS_Signature)); if (signature != (uint)('P' | ('E' << 8))) { return null; } uint timeDateStamp = BinaryPrimitives.ReadUInt32LittleEndian(ntHeader.AsSpan(OffsetOf_IMAGE_NT_HEADERS_TimeDateStamp)); uint sizeOfImage = BinaryPrimitives.ReadUInt32LittleEndian(ntHeader.AsSpan(OffsetOf_IMAGE_NT_HEADERS_SizeOfImage)); // Return the hash value return $"{timeDateStamp:X}{sizeOfImage:X}"; } #endregion #region PDB files static readonly ReadOnlyMemory s_portablePdbSignature = new byte[] { 0x42, 0x53, 0x4a, 0x42 }; /// /// Parse the hash value from a PDB file, consisting of concatenated hex values for the PDB GUID and age (the number of times it has been written). /// /// While the fields to obtain are at fairly straightforward offsets in particular data structures, PDBs are internally structured as an MSF file /// consisting of multiple data streams with non-contiguous pages. There are a few references for parsing this format: /// /// * LLVM documentation: https://llvm.org/docs/PDB/MsfFile.html /// * Microsoft's PDB source code: https://github.com/microsoft/microsoft-pdb (particularly PDB/msf/msf.cpp) /// /// This function only handles the BigMSF format. /// static string? GetPdbHash(FileReference file) { using FileStream stream = FileReference.Open(file, FileMode.Open, FileAccess.Read, FileShare.Read | FileShare.Delete); using MemoryMappedFile memoryMappedFile = MemoryMappedFile.CreateFromFile(stream, null, stream.Length, MemoryMappedFileAccess.Read, HandleInheritability.None, false); using MemoryMappedView memoryMappedView = new MemoryMappedView(memoryMappedFile, 0, stream.Length, MemoryMappedFileAccess.Read); // Check if this is a portable PDB. Symstore does not support these. Span header = memoryMappedView.GetMemory(0, 4).Span.Slice(0, 4); if (header.SequenceEqual(s_portablePdbSignature.Span)) { return null; } // Create the MSF file MsfFile msfFile = new MsfFile(memoryMappedView); // Read the guid from the PDB stream const int PdbStream = 1; ReadOnlySpan firstPdbPage = msfFile.GetStreamData(PdbStream, 0); Guid guid = new Guid(firstPdbPage.Slice(12, 16)); string result = guid.ToString("N").ToUpperInvariant(); // Read the age from the DBI stream const int DbiStream = 3; ReadOnlySpan firstDbiPage = msfFile.GetStreamData(DbiStream, 0); if (firstDbiPage.Length > 0) { int age = BinaryPrimitives.ReadInt32LittleEndian(firstDbiPage.Slice(8)); result += $"{age:X}"; } return result; } class MsfFile { static readonly byte[] s_msfSignature = Encoding.ASCII.GetBytes("Microsoft C/C++ MSF 7.00\r\n\x1a\x44\x53\0\0\0"); record struct MsfStream(int Length, int DirectoryPagesIdx); readonly MemoryMappedView _memoryMappedView; readonly int _numPages; readonly int _pageSize; readonly int[] _streamDirectoryPages; readonly MsfStream[] _streams; public MsfFile(MemoryMappedView memoryMappedView) { _memoryMappedView = memoryMappedView; // Read the file header (the 'superblock' in LLVM docs, BIGMSF_HDR in Microsoft source) const int SizeOf_BIGMSF_HDR = 0x20 + (5 * 4); const int OffsetOf_BIGMSF_HDR_Magic = 0; const int OffsetOf_BIGMSF_HDR_PageSize = 32; const int OffsetOf_BIGMSF_HDR_NumPages = 40; const int OffsetOf_BIGMSF_HDR_NumDirectoryBytes = 44; Span header = memoryMappedView.GetMemory(0, SizeOf_BIGMSF_HDR).Span.Slice(0, SizeOf_BIGMSF_HDR); if (!header.Slice(OffsetOf_BIGMSF_HDR_Magic, s_msfSignature.Length).SequenceEqual(s_msfSignature)) { throw new InvalidDataException($"Invalid signature for PDB file"); } _pageSize = BinaryPrimitives.ReadInt32LittleEndian(header.Slice(OffsetOf_BIGMSF_HDR_PageSize)); _numPages = BinaryPrimitives.ReadInt32LittleEndian(header.Slice(OffsetOf_BIGMSF_HDR_NumPages)); // The stream directory is distributed across multiple pages, and the indexes for the pages of the stream directory are also split across multiple pages. // We can use the stream directory size to figure out how many pages are in each level of indirection. int streamDirectorySize = BinaryPrimitives.ReadInt32LittleEndian(header.Slice(OffsetOf_BIGMSF_HDR_NumDirectoryBytes)); // The list of stream directory macro pages is immediately after the header int numStreamDirectoryPages = GetPageCount(streamDirectorySize); int numStreamDirectoryMacroPages = GetPageCount(numStreamDirectoryPages * sizeof(int)); Span streamDirectoryMacroPages = MemoryMarshal.Cast(memoryMappedView.GetMemory(SizeOf_BIGMSF_HDR, numStreamDirectoryMacroPages * sizeof(int)).Span); // Create a flat list of all the stream directory pages to make it easier to seek within it _streamDirectoryPages = new int[numStreamDirectoryPages]; int pageIdx = 0; for (int macroPageIdx = 0; macroPageIdx < numStreamDirectoryMacroPages; macroPageIdx++) { ReadOnlySpan macroPage = GetPage(streamDirectoryMacroPages[macroPageIdx]); for (; macroPage.Length > 0 && pageIdx < _streamDirectoryPages.Length; pageIdx++) { _streamDirectoryPages[pageIdx] = BinaryPrimitives.ReadInt32LittleEndian(macroPage); macroPage = macroPage.Slice(sizeof(int)); } } // Parse the header for each stream from the stream directory int directoryPage = 0; ReadOnlySpan directoryData = GetDirectoryPage(directoryPage); int numStreams = directoryData[0]; directoryData = directoryData.Slice(1); _streams = new MsfStream[numStreams]; int nextDirectoryOffset = 1 + numStreams; for (int streamIdx = 0; streamIdx < numStreams; streamIdx++) { if (directoryData.Length == 0) { directoryPage++; directoryData = GetDirectoryPage(directoryPage); } int streamSize = directoryData[0]; _streams[streamIdx] = new MsfStream(streamSize, nextDirectoryOffset); nextDirectoryOffset += GetPageCount(streamSize); directoryData = directoryData.Slice(1); } } int GetPageCount(int numBytes) => (numBytes + (_pageSize - 1)) / _pageSize; public ReadOnlySpan GetStreamData(int streamIdx, int offset) { int streamPageIdx = offset / _pageSize; int streamPagePos = offset % _pageSize; return GetStreamPage(streamIdx, streamPageIdx).Slice(streamPagePos); } public ReadOnlySpan GetStreamDirectoryData(int offset) { int pageIdx = (int)(offset / (_pageSize / sizeof(int))); int pagePos = (int)(offset % (_pageSize / sizeof(int))); return GetDirectoryPage(pageIdx).Slice(pagePos); } public Span GetPage(int pageIdx) { Debug.Assert(pageIdx < _numPages); // Cast to long to prevent overflow on offset for large PDBs return _memoryMappedView.GetMemory((long)pageIdx * _pageSize, _pageSize).Span; } ReadOnlySpan GetStreamPage(int streamIdx, int pageIdx) { ReadOnlySpan pages = GetStreamDirectoryData(_streams[streamIdx].DirectoryPagesIdx + pageIdx); return GetPage(pages[0]); } ReadOnlySpan GetDirectoryPage(int directoryPageIdx) { int pageIdx = _streamDirectoryPages[directoryPageIdx]; return MemoryMarshal.Cast(GetPage(pageIdx)); } } #endregion } }