// Copyright 2011-2020 Molecular Matters GmbH, all rights reserved. #if LC_VERSION == 1 // BEGIN EPIC MOD //#include PCH_INCLUDE // END EPIC MOD #include "LC_Coff.h" #include "LC_CoffDetail.h" #include "LC_StringUtil.h" #include "LC_PointerUtil.h" #include "LC_Filesystem.h" #include "LC_SymbolPatterns.h" #include "LC_Symbols.h" #include "LC_MemoryStream.h" #include "LC_UniqueId.h" #include "LC_MemoryMappedFile.h" #include "LC_Assembler.h" // BEGIN EPIC MOD #include "LC_Allocators.h" #include "LC_Assert.h" #include "LC_Vfs.h" #include // END EPIC MOD namespace { static const uint32_t INVALID_CRT_SECTION = 0xFFFFFFFFu; // this is not used as a character in mangled names static const char COFF_SUFFIX = '%'; static const wchar_t COFF_SUFFIX_WIDE = L'%'; static const ImmutableString TLS_SECTION(".tls"); } namespace coff { ObjFile* OpenObj(const wchar_t* filename) { // BEGIN EPIC MOD Filesystem::Path temp; filename = Filesystem::Devirtualize(filename, temp); // END EPIC MOD ObjFile* objFile = LC_NEW(&g_objFileAllocator, ObjFile); objFile->filename = string::ToUtf8String(filename); objFile->memoryFile = Filesystem::OpenMemoryMappedFile(filename, Filesystem::OpenMode::READ, true); return objFile; } void CloseObj(ObjFile*& objFile) { Filesystem::CloseMemoryMappedFile(objFile->memoryFile); LC_DELETE(&g_objFileAllocator, objFile, sizeof(ObjFile)); objFile = nullptr; } static size_t FindSymbolShortNameLength(const char* shortName) { size_t length = 0u; while ((shortName[length] != '\0') && (length < 8u)) { // find (optional!) null-terminator ++length; } return length; } static ImmutableString GetSymbolShortName(const BYTE* shortName) { const char* str = reinterpret_cast(shortName); return ImmutableString(str, FindSymbolShortNameLength(str)); } static ImmutableString DisambiguateStaticSymbolName(const char* name, size_t nameLength, uint32_t uniqueCoffId, uint16_t uniqueCounter) { // static symbols are not necessarily unique across different translation units, hence we need something // to disambiguate them. this is done by appending the unique ID of the .obj file in which this symbol is defined. // the new name has the form "name%ID" (plus a null terminator), e.g. "g_counter2%AF20" const size_t frontLength = nameLength; const size_t uniqueIdLength = 8u; const bool needsCounter = (uniqueCounter != 0u); const size_t length = needsCounter ? frontLength + 1u + uniqueIdLength + 1u + 4u // unlikely case, add a unique counter to the name of the symbol : frontLength + 1u + uniqueIdLength + 1u; // more likely case, no unique counter needed char* temp = static_cast(_alloca(length)); char* finalName = temp; memcpy(temp, name, frontLength); if (needsCounter) { temp[frontLength + 0u] = "0123456789ABCDEF"[(uniqueCounter >> 12u) & 0x0F]; temp[frontLength + 1u] = "0123456789ABCDEF"[(uniqueCounter >> 8u) & 0x0F]; temp[frontLength + 2u] = "0123456789ABCDEF"[(uniqueCounter >> 4u) & 0x0F]; temp[frontLength + 3u] = "0123456789ABCDEF"[(uniqueCounter >> 0u) & 0x0F]; temp += 4u; } temp[frontLength] = COFF_SUFFIX; // store unique ID in hex, no leading zeros bool wasValueWritten = false; temp += frontLength + 1u; for (unsigned int i = 0u; i < 8u; ++i) { const uint32_t shift = (7u - i) * 4u; // 28, 24, 20, 16, ... const uint32_t index = (uniqueCoffId >> shift) & 0x0F; // if no value != zero has been written yet, don't advance to the next position. // this makes sure that no leading zeros are written and automatically handles the case of ID == 0, // where at least one zero has to be written. if (wasValueWritten) { ++temp; } *temp = "0123456789ABCDEF"[index]; if (index != 0u) { wasValueWritten = true; } } temp[1] = '\0'; return ImmutableString(finalName); } template static ImmutableString GetSymbolName(const char* stringTable, const T* symbol) { /* From the COFF spec: The ShortName field in a symbol table consists of 8 bytes that contain the name itself, if it is not more than 8 bytes long, or the ShortName field gives an offset into the string table. To determine whether the name itself or an offset is given, test the first 4 bytes for equality to zero. */ if (symbol->N.Name.Short != 0) { // short name return GetSymbolShortName(symbol->N.ShortName); } else { // long name, points into string table return ImmutableString(stringTable + symbol->N.Name.Long); } } template static ImmutableString GetSymbolName(const char* stringTable, const T* symbol, uint32_t uniqueId, uint16_t uniqueCounter) { /* From the COFF spec: The ShortName field in a symbol table consists of 8 bytes that contain the name itself, if it is not more than 8 bytes long, or the ShortName field gives an offset into the string table. To determine whether the name itself or an offset is given, test the first 4 bytes for equality to zero. */ // static symbols must have their name disambiguated across several translation units. // this is true even for COMDAT symbols, because COMDAT folding done by the linker depends on linker settings // and (seemingly) the type/name of the symbol, e.g. folding is done for template functions, but not for // identical static inline functions in several translation units. const bool isStatic = (symbol->StorageClass == IMAGE_SYM_CLASS_STATIC); if (symbol->N.Name.Short != 0) { // short name if (isStatic) { const char* str = reinterpret_cast(symbol->N.ShortName); return DisambiguateStaticSymbolName(str, FindSymbolShortNameLength(str), uniqueId, uniqueCounter); } else { return GetSymbolShortName(symbol->N.ShortName); } } else { // long name, points into string table const char* str = stringTable + symbol->N.Name.Long; return isStatic ? DisambiguateStaticSymbolName(str, strlen(str), uniqueId, uniqueCounter) : ImmutableString(str); } } static ImmutableString GetSectionName(const char* stringTable, const IMAGE_SECTION_HEADER* section) { /* From the COFF spec: An 8-byte, null-padded UTF-8 encoded string. If the string is exactly 8 characters long, there is no terminating null. For longer names, this field contains a slash (/) that is followed by an ASCII representation of a decimal number that is an offset into the string table. Executable images do not use a string table and do not support section names longer than 8 characters. Long names in object files are truncated if they are emitted to an executable file. */ if (section->Name[0] == '/') { // potentially a long name, but could also be a section starting with '/' unsigned int offset = 0; const int convertedCount = sscanf_s(reinterpret_cast(§ion->Name[1]), "%u", &offset); if (convertedCount > 0) { return ImmutableString(stringTable + offset); } // could not convert decimal number, hence the section short name starts with '/' } // short name return GetSymbolShortName(section->Name); } static bool SortSymbolByAscendingRVA(const Symbol* lhs, const Symbol* rhs) { return lhs->rva < rhs->rva; } template static coff::SymbolType::Enum DetermineDataSymbolType(const T* symbol) { LC_ASSERT(!coffDetail::IsFunctionSymbol(symbol), "Symbol must be a data symbol"); if (symbol->StorageClass == IMAGE_SYM_CLASS_EXTERNAL) { return coff::SymbolType::EXTERNAL_DATA; } if ((symbol->StorageClass == IMAGE_SYM_CLASS_STATIC) || (symbol->Value == 0u)) { return coff::SymbolType::STATIC_DATA; } return coff::SymbolType::UNKNOWN_DATA; } template static coff::SymbolType::Enum DetermineFunctionSymbolType(const T* symbol) { LC_ASSERT(coffDetail::IsFunctionSymbol(symbol), "Symbol must be a function symbol"); if (symbol->StorageClass == IMAGE_SYM_CLASS_EXTERNAL) { return coff::SymbolType::EXTERNAL_FUNCTION; } if (symbol->StorageClass == IMAGE_SYM_CLASS_STATIC) { return coff::SymbolType::STATIC_FUNCTION; } return coff::SymbolType::UNKNOWN_FUNCTION; } template static coff::SymbolType::Enum DetermineSymbolType(const T* symbol) { return coffDetail::IsFunctionSymbol(symbol) ? DetermineFunctionSymbolType(symbol) : DetermineDataSymbolType(symbol); } template static RawCoffType* ReadRaw(ObjFile* file, uint32_t uniqueId) { using CoffHeader = typename coffDetail::CoffType::TypeMap::HeaderType; using CoffSymbol = typename coffDetail::CoffType::TypeMap::SymbolType; using CoffAuxSymbol = typename coffDetail::CoffType::TypeMap::AuxSymbolType; RawCoffType* rawCoff = new RawCoffType; rawCoff->size = Filesystem::GetMemoryMappedSize(file->memoryFile); rawCoff->type = RawCoffType::TYPE; const void* objFileBase = Filesystem::GetMemoryMappedFileData(file->memoryFile); // section header is first const CoffHeader* rawCoffHeader = pointer::As(objFileBase); rawCoff->header = *rawCoffHeader; // section headers follow after that const IMAGE_SECTION_HEADER* rawSectionHeaders = coffDetail::GetSectionHeader(objFileBase); const uint32_t sectionCount = coffDetail::GetNumberOfSections(objFileBase); rawCoff->sections.reserve(sectionCount); for (uint32_t i = 0u; i < sectionCount; ++i) { const IMAGE_SECTION_HEADER* rawSection = rawSectionHeaders + i; RawSection section; // header { section.header = *rawSection; } // raw data if (rawSection->PointerToRawData != 0u) { // some sections like .bss don't store (uninitialized) data const DWORD rawDataSize = rawSection->SizeOfRawData; section.data = LC_ALLOC(&g_rawCoffAllocator, rawDataSize, 8u); memcpy(section.data, pointer::Offset(objFileBase, rawSection->PointerToRawData), rawDataSize); } // relocations { const IMAGE_RELOCATION* rawRelocations = pointer::Offset(objFileBase, rawSection->PointerToRelocations); const size_t count = coffDetail::GetRelocationCount(objFileBase, rawSection); // if relocation count in section has overflown, ignore the first relocation const size_t startRelocation = (count > 0xFFFFu) ? 1u : 0u; section.relocations.reserve(count - startRelocation); for (size_t j = 0u; j < count - startRelocation; ++j) { section.relocations.push_back(rawRelocations[j + startRelocation]); } } // line numbers { const IMAGE_LINENUMBER* rawLineNumbers = pointer::Offset(objFileBase, rawSection->PointerToLinenumbers); const size_t count = rawSection->NumberOfLinenumbers; section.lineNumbers.reserve(count); for (size_t j = 0u; j < count; ++j) { section.lineNumbers.push_back(rawLineNumbers[j]); } } rawCoff->sections.emplace_back(std::move(section)); } // symbol table follows after section headers, but offset is stored in COFF header directly const void* rawSymbolTable = coffDetail::GetSymbolTable(objFileBase); const uint32_t symbolCount = coffDetail::GetNumberOfSymbols(objFileBase); rawCoff->symbols.reserve(symbolCount); for (uint32_t i = 0u; i < symbolCount; ++i) { const CoffSymbol* rawSymbol = coffDetail::GetSymbol(rawSymbolTable, i); rawCoff->symbols.push_back(*rawSymbol); } // string table follows after symbol table const char* rawStringTable = coffDetail::GetStringTable(rawSymbolTable, symbolCount); // first 4 bytes contain the total size of the string table (including these 4 bytes) rawCoff->rawStringTable.size = *pointer::As(rawStringTable); rawCoff->rawStringTable.data = static_cast(LC_ALLOC(&g_rawCoffAllocator, rawCoff->rawStringTable.size, 8u)); memcpy(rawCoff->rawStringTable.data, rawStringTable, rawCoff->rawStringTable.size); // construct string table { rawCoff->stringTable.resize(symbolCount); types::StringMap uniqueStaticDataSymbols; uniqueStaticDataSymbols.reserve(16u); for (uint32_t i = 0u; i < symbolCount; ++i) { const CoffSymbol* symbol = coffDetail::GetSymbol(rawSymbolTable, i); if (coffDetail::IsAbsoluteSymbol(symbol)) { continue; } else if (coffDetail::IsDebugSymbol(symbol)) { continue; } else if (coffDetail::IsSectionSymbol(symbol)) { continue; } const SymbolType::Enum type = DetermineSymbolType(symbol); rawCoff->stringTable[i] = GetSymbolName(rawStringTable, symbol, uniqueId, 0u); const ImmutableString& name = rawCoff->stringTable[i]; if (type == SymbolType::STATIC_DATA) { // make sure this symbol is unique auto insertIterator = uniqueStaticDataSymbols.emplace(name, static_cast(0u)); if (insertIterator.second == false) { // the name of this symbol is not unique. // as a workaround, try generating a unique name for it. // this does not fix potential issues in all cases, but works successfully in cases where // this compiland is never recompiled, or where the order of variables in the file doesn't change. // increase the counter associated with this name and generate a new name from it ++insertIterator.first->second; rawCoff->stringTable[i] = GetSymbolName(rawStringTable, symbol, uniqueId, insertIterator.first->second); } } } } // construct look-up table of associated COMDAT sections { for (uint32_t i = 0u; i < symbolCount; ++i) { const CoffSymbol* symbol = coffDetail::GetSymbol(rawSymbolTable, i); // ignore absolute, debug and undefined symbols if (coffDetail::IsAbsoluteSymbol(symbol)) { } else if (coffDetail::IsDebugSymbol(symbol)) { } else if (coffDetail::IsUndefinedSymbol(symbol)) { } else if (coffDetail::IsSectionSymbol(symbol)) { // if this is a COMDAT section, grab its selection number from the auxiliary record const uint32_t symbolSectionIndex = coffDetail::GetSectionIndex(symbol); RawSection& section = rawCoff->sections[symbolSectionIndex]; if (coffDetail::IsComdatSection(§ion.header)) { // the auxiliary record holds information about the COMDAT section. according to the COFF spec 5.5.6, // a COMDAT section always has one auxiliary record which is "the COMDAT symbol". if (symbol->NumberOfAuxSymbols == 1u) { const CoffAuxSymbol* auxSymbol = coffDetail::GetSymbol(rawSymbolTable, i + 1u); const uint8_t selection = auxSymbol->Section.Selection; if (selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) { const uint32_t associatedSectionIndex = coffDetail::GetAssociatedComdatSectionIndex(auxSymbol); rawCoff->associatedComdatSections[associatedSectionIndex].push_back(symbolSectionIndex); } else if (selection == IMAGE_COMDAT_SELECT_ANY) { section.isSelectAnyComdat = true; } } } } } } return rawCoff; } RawCoff* ReadRaw(ObjFile* file, uint32_t uniqueId) { const coffDetail::CoffType::Enum type = coffDetail::GetCoffType(Filesystem::GetMemoryMappedFileData(file->memoryFile)); if (type == coffDetail::CoffType::COFF) { return ReadRaw(file, uniqueId); } else if (type == coffDetail::CoffType::BIGOBJ) { return ReadRaw(file, uniqueId); } return nullptr; } template static void WriteRaw(const wchar_t* filename, const RawCoffType* rawCoff, SymbolRemovalStrategy::Enum removalStrategy) { using CoffHeader = typename coffDetail::CoffType::TypeMap::HeaderType; using CoffSymbol = typename coffDetail::CoffType::TypeMap::SymbolType; // the file structure is as follows: // - COFF file header // - all section headers // - all symbols // - string table // - raw data for section 0 // - relocations for section 0 // - line numbers for section 0 // - ... // - raw data for section N // - relocations for section N // - line numbers for section N // keeping the section data, relocations, and line numbers at the end of the file makes it easier to keep track // of file offsets that are stored in the section headers. memoryStream::Writer outputData(static_cast(rawCoff->size)); CoffHeader coffHeader(rawCoff->header); // careful: the number of sections is of type WORD, but DWORD in files compiled with /bigobj! typedef decltype(CoffHeader::NumberOfSections) SectionCountType; coffHeader.NumberOfSections = static_cast(rawCoff->sections.size()); coffHeader.NumberOfSymbols = static_cast(rawCoff->symbols.size()); coffHeader.PointerToSymbolTable = static_cast(sizeof(CoffHeader) + rawCoff->sections.size() * sizeof(IMAGE_SECTION_HEADER)); outputData.Write(coffHeader); const size_t baseOffset = sizeof(CoffHeader) + rawCoff->sections.size() * sizeof(IMAGE_SECTION_HEADER) + rawCoff->symbols.size() * sizeof(CoffSymbol) + rawCoff->rawStringTable.size; size_t fileOffset = baseOffset; for (size_t i = 0u; i < rawCoff->sections.size(); ++i) { const RawSection& section(rawCoff->sections[i]); IMAGE_SECTION_HEADER sectionHeader(rawCoff->sections[i].header); if (sectionHeader.PointerToRawData != 0u) { // .bss and sections with uninitialized data have a size, but don't store any actual data sectionHeader.PointerToRawData = static_cast(fileOffset); fileOffset += sectionHeader.SizeOfRawData; } if (section.relocations.size() == 0u) { sectionHeader.PointerToRelocations = 0u; sectionHeader.NumberOfRelocations = 0u; } else { sectionHeader.PointerToRelocations = static_cast(fileOffset); size_t relocationCount = section.relocations.size(); if (relocationCount >= 0xFFFFu) { // relocation count would overflow, so we need to add one extra relocation that stores // the actual number of relocations. sectionHeader.NumberOfRelocations = 0xFFFFu; sectionHeader.Characteristics |= IMAGE_SCN_LNK_NRELOC_OVFL; ++relocationCount; } else { sectionHeader.NumberOfRelocations = static_cast(relocationCount); } fileOffset += relocationCount * sizeof(IMAGE_RELOCATION); } if (section.lineNumbers.size() == 0u) { sectionHeader.PointerToLinenumbers = 0u; } else { sectionHeader.PointerToLinenumbers = static_cast(fileOffset); fileOffset += section.lineNumbers.size() * sizeof(IMAGE_LINENUMBER); } sectionHeader.NumberOfLinenumbers = static_cast(section.lineNumbers.size()); outputData.Write(sectionHeader); } for (size_t i = 0u; i < rawCoff->symbols.size(); ++i) { outputData.Write(rawCoff->symbols[i]); } outputData.Write(rawCoff->rawStringTable.data, rawCoff->rawStringTable.size); for (size_t i = 0u; i < rawCoff->sections.size(); ++i) { const RawSection& section = rawCoff->sections[i]; if (section.header.PointerToRawData != 0u) { outputData.Write(section.data, section.header.SizeOfRawData); } const size_t relocationCount = section.relocations.size(); if (relocationCount >= 0xFFFFu) { // relocation count would overflow, so we need to add one extra relocation that stores // the actual number of relocations. IMAGE_RELOCATION dummyRelocation = {}; dummyRelocation.RelocCount = static_cast(relocationCount); outputData.Write(dummyRelocation); } if (removalStrategy == SymbolRemovalStrategy::Enum::MSVC_COMPATIBLE) { for (size_t j = 0u; j < relocationCount; ++j) { const IMAGE_RELOCATION& relocation = section.relocations[j]; outputData.Write(relocation); } } else if (removalStrategy == SymbolRemovalStrategy::Enum::LLD_COMPATIBLE) { // LLD does not allow SECREL relocations to absolute symbols (which are our fake stripped symbols), // unless these relocations are contained in a debug section. // debug sections can be identified by checking if the section in question is marked as being discardable. for (size_t j = 0u; j < relocationCount; ++j) { IMAGE_RELOCATION relocation = section.relocations[j]; if (relocation.Type == Relocation::Type::SECTION_RELATIVE) { // only "fix" section-relative relocations in non-debug sections if (!coffDetail::IsDiscardableSection(§ion.header)) { // only "fix" section-relative relocations pointing to a removed symbol if (IsRemovedSymbol(rawCoff, relocation.SymbolTableIndex, removalStrategy)) { // fix by whatever means necessary in order to make LLD happy. // this is a stripped symbol, so its relocation will be patched anyway. relocation.Type = Relocation::Type::RVA_32; } } } outputData.Write(relocation); } } for (size_t j = 0u; j < section.lineNumbers.size(); ++j) { outputData.Write(section.lineNumbers[j]); } } Filesystem::CreateFileWithData(filename, outputData.GetData(), outputData.GetSize()); } void WriteRaw(const wchar_t* filename, const RawCoff* rawCoff, SymbolRemovalStrategy::Enum removalStrategy) { if (rawCoff->type == RawCoffRegular::TYPE) { WriteRaw(filename, static_cast(rawCoff), removalStrategy); } else if (rawCoff->type == RawCoffBigObj::TYPE) { WriteRaw(filename, static_cast(rawCoff), removalStrategy); } } template static void DestroyRaw(RawCoffType* rawCoff) { for (size_t i = 0u; i < rawCoff->sections.size(); ++i) { const RawSection& section = rawCoff->sections[i]; LC_FREE(&g_rawCoffAllocator, section.data, section.header.SizeOfRawData); } LC_FREE(&g_rawCoffAllocator, rawCoff->rawStringTable.data, rawCoff->rawStringTable.size); delete rawCoff; } void DestroyRaw(RawCoff* rawCoff) { if (rawCoff->type == RawCoffRegular::TYPE) { DestroyRaw(static_cast(rawCoff)); } else if (rawCoff->type == RawCoffBigObj::TYPE) { DestroyRaw(static_cast(rawCoff)); } } size_t GetSymbolCount(const RawCoff* rawCoff) { if (rawCoff->type == RawCoffRegular::TYPE) { return static_cast(rawCoff)->symbols.size(); } else if (rawCoff->type == RawCoffBigObj::TYPE) { return static_cast(rawCoff)->symbols.size(); } return 0u; } size_t GetSectionCount(const RawCoff* rawCoff) { return rawCoff->sections.size(); } size_t GetAuxSymbolCount(const RawCoff* rawCoff, size_t symbolIndex) { if (rawCoff->type == RawCoffRegular::TYPE) { const auto& symbol = static_cast(rawCoff)->symbols[symbolIndex]; return symbol.NumberOfAuxSymbols; } else if (rawCoff->type == RawCoffBigObj::TYPE) { const auto& symbol = static_cast(rawCoff)->symbols[symbolIndex]; return symbol.NumberOfAuxSymbols; } return 0u; } SymbolType::Enum GetSymbolType(const RawCoff* rawCoff, size_t index) { if (rawCoff->type == RawCoffRegular::TYPE) { const auto& symbol = static_cast(rawCoff)->symbols[index]; return DetermineSymbolType(&symbol); } else if (rawCoff->type == RawCoffBigObj::TYPE) { const auto& symbol = static_cast(rawCoff)->symbols[index]; return DetermineSymbolType(&symbol); } return SymbolType::UNKNOWN_FUNCTION; } const ImmutableString& GetSymbolName(const RawCoff* rawCoff, size_t index) { return rawCoff->stringTable[index]; } ImmutableString GetSectionName(const RawCoff* rawCoff, size_t index) { const IMAGE_SECTION_HEADER* header = &rawCoff->sections[index].header; return GetSectionName(rawCoff->rawStringTable.data, header); } uint32_t GetSymbolSectionIndex(const RawCoff* rawCoff, size_t index) { if (rawCoff->type == RawCoffRegular::TYPE) { const auto& symbol = static_cast(rawCoff)->symbols[index]; return coffDetail::GetSectionIndex(&symbol); } else if (rawCoff->type == RawCoffBigObj::TYPE) { const auto& symbol = static_cast(rawCoff)->symbols[index]; return coffDetail::GetSectionIndex(&symbol); } return 0u; } bool IsAbsoluteSymbol(const RawCoff* rawCoff, size_t index) { if (rawCoff->type == RawCoffRegular::TYPE) { const auto& symbol = static_cast(rawCoff)->symbols[index]; return coffDetail::IsAbsoluteSymbol(&symbol); } else if (rawCoff->type == RawCoffBigObj::TYPE) { const auto& symbol = static_cast(rawCoff)->symbols[index]; return coffDetail::IsAbsoluteSymbol(&symbol); } return false; } bool IsDebugSymbol(const RawCoff* rawCoff, size_t index) { if (rawCoff->type == RawCoffRegular::TYPE) { const auto& symbol = static_cast(rawCoff)->symbols[index]; return coffDetail::IsDebugSymbol(&symbol); } else if (rawCoff->type == RawCoffBigObj::TYPE) { const auto& symbol = static_cast(rawCoff)->symbols[index]; return coffDetail::IsDebugSymbol(&symbol); } return false; } bool IsSectionSymbol(const RawCoff* rawCoff, size_t index) { if (rawCoff->type == RawCoffRegular::TYPE) { const auto& symbol = static_cast(rawCoff)->symbols[index]; return coffDetail::IsSectionSymbol(&symbol); } else if (rawCoff->type == RawCoffBigObj::TYPE) { const auto& symbol = static_cast(rawCoff)->symbols[index]; return coffDetail::IsSectionSymbol(&symbol); } return false; } bool IsUndefinedSymbol(const RawCoff* rawCoff, size_t index) { if (rawCoff->type == RawCoffRegular::TYPE) { const auto& symbol = static_cast(rawCoff)->symbols[index]; return coffDetail::IsUndefinedSymbol(&symbol); } else if (rawCoff->type == RawCoffBigObj::TYPE) { const auto& symbol = static_cast(rawCoff)->symbols[index]; return coffDetail::IsUndefinedSymbol(&symbol); } return false; } bool IsRemovedSymbol(const RawCoff* rawCoff, size_t index, SymbolRemovalStrategy::Enum removalStrategy) { if (rawCoff->type == RawCoffRegular::TYPE) { const auto& symbol = static_cast(rawCoff)->symbols[index]; return ((symbol.Type == IMAGE_SYM_TYPE_NULL) && (symbol.StorageClass == IMAGE_SYM_CLASS_NULL) && (symbol.SectionNumber == removalStrategy)); } else if (rawCoff->type == RawCoffBigObj::TYPE) { const auto& symbol = static_cast(rawCoff)->symbols[index]; return ((symbol.Type == IMAGE_SYM_TYPE_NULL) && (symbol.StorageClass == IMAGE_SYM_CLASS_NULL) && (symbol.SectionNumber == removalStrategy)); } return false; } bool IsSelectAnyComdatSection(const RawCoff* rawCoff, size_t sectionIndex) { return (rawCoff->sections[sectionIndex].isSelectAnyComdat); } static types::vector TokenizeLinkerDirectives(const char* sectionData, size_t size) { types::vector directives; // gather all linker commands by tokenizing raw data. // individual commands are separated by spaces. const char* start = sectionData; for (size_t i = 0u; i < size; ++i) { // look for spaces if (sectionData[i] == ' ') { if (sectionData + i > start) { std::string directive(start, sectionData + i); directives.emplace_back(directive); } start = sectionData + i + 1u; } } return directives; } template static types::vector ExtractLinkerDirectives(const ObjFile* objFile) { using CoffHeader = typename coffDetail::CoffType::TypeMap::HeaderType; const void* objFileBase = Filesystem::GetMemoryMappedFileData(objFile->memoryFile); const uint32_t sectionCount = coffDetail::GetNumberOfSections(objFileBase); const IMAGE_SECTION_HEADER* sectionHeader = coffDetail::GetSectionHeader(objFileBase); for (unsigned int i = 0u; i < sectionCount; ++i) { const IMAGE_SECTION_HEADER* section = sectionHeader + i; if (coffDetail::IsDirectiveSection(section)) { const char* rawData = pointer::Offset(objFileBase, section->PointerToRawData); return TokenizeLinkerDirectives(rawData, section->SizeOfRawData); } } return types::vector(); } types::vector ExtractLinkerDirectives(const ObjFile* file) { const coffDetail::CoffType::Enum type = coffDetail::GetCoffType(Filesystem::GetMemoryMappedFileData(file->memoryFile)); if (type == coffDetail::CoffType::COFF) { return ExtractLinkerDirectives(file); } else if (type == coffDetail::CoffType::BIGOBJ) { return ExtractLinkerDirectives(file); } return types::vector(); } types::vector ExtractLinkerDirectives(const RawCoff* rawCoff) { const size_t sectionCount = rawCoff->sections.size(); for (size_t i = 0u; i < sectionCount; ++i) { const RawSection& section = rawCoff->sections[i]; if (coffDetail::IsDirectiveSection(§ion.header)) { return TokenizeLinkerDirectives(static_cast(section.data), section.header.SizeOfRawData); } } return types::vector(); } void ReplaceLinkerDirectives(RawCoff* rawCoff, const types::vector& directives) { const size_t sectionCount = rawCoff->sections.size(); for (size_t i = 0u; i < sectionCount; ++i) { RawSection& section = rawCoff->sections[i]; if (coffDetail::IsDirectiveSection(§ion.header)) { std::string newDirectives; newDirectives.reserve(1024u); // separate directives with spaces const size_t count = directives.size(); for (size_t j = 0u; j < count; ++j) { newDirectives += directives[j]; newDirectives += ' '; } LC_FREE(&g_rawCoffAllocator, section.data, section.header.SizeOfRawData); section.header.SizeOfRawData = static_cast(newDirectives.length()); section.data = LC_ALLOC(&g_rawCoffAllocator, newDirectives.length(), 8u); memcpy(section.data, newDirectives.data(), newDirectives.length()); // we assume that there's only one directive section in a COFF file return; } } } template static void PatchFunctionSymbol(RawCoffType* rawCoff, size_t symbolIndex, uint32_t sectionIndex) { LC_LOG_DEV("Patching function symbol %d (%s)", symbolIndex, rawCoff->stringTable[symbolIndex].c_str()); coff::RawSection& section = rawCoff->sections[sectionIndex]; uint8_t* code = static_cast(section.data); code[0] = Assembler::Opcode::RET[0]; } void PatchFunctionSymbol(RawCoff* rawCoff, size_t symbolIndex, uint32_t sectionIndex) { if (rawCoff->type == RawCoffRegular::TYPE) { PatchFunctionSymbol(static_cast(rawCoff), symbolIndex, sectionIndex); } else if (rawCoff->type == RawCoffBigObj::TYPE) { PatchFunctionSymbol(static_cast(rawCoff), symbolIndex, sectionIndex); } } template static void RemoveSymbol(RawCoffType* rawCoff, size_t symbolIndex, SymbolRemovalStrategy::Enum removalStrategy) { LC_LOG_DEV("Removing symbol %d (%s)", symbolIndex, rawCoff->stringTable[symbolIndex].c_str()); auto& symbol = rawCoff->symbols[symbolIndex]; symbol.Type = IMAGE_SYM_TYPE_NULL; symbol.StorageClass = IMAGE_SYM_CLASS_NULL; // rather than removing the symbol, we fake the removed symbol by putting it in an "appropriate" section. // we need two different strategies, depending on the linker used. // MSVC can be tricked by putting the symbol into the debug section, but LLD will report // "error: relocation against symbol in discarded section" in this case. // LLD can be tricked by putting the symbol into an absolute section, but MSVC will report // "error LNK2016: absolute symbol '&' used as target of REL32 relocation in section" in this case. symbol.SectionNumber = removalStrategy; symbol.Value = 0u; // replace the symbol name with the shortest possible (illegal in C++) identifier memcpy(symbol.N.ShortName, "&", 2u); } void RemoveSymbol(RawCoff* rawCoff, size_t symbolIndex, SymbolRemovalStrategy::Enum removalStrategy) { if (rawCoff->type == RawCoffRegular::TYPE) { RemoveSymbol(static_cast(rawCoff), symbolIndex, removalStrategy); } else if (rawCoff->type == RawCoffBigObj::TYPE) { RemoveSymbol(static_cast(rawCoff), symbolIndex, removalStrategy); } } void RemoveRelocations(RawCoff* rawCoff, size_t symbolIndex) { LC_LOG_DEV("Removing relocations pointing to symbol %d (%s)", symbolIndex, rawCoff->stringTable[symbolIndex].c_str()); for (size_t i = 0u; i < rawCoff->sections.size(); ++i) { RawSection& section = rawCoff->sections[i]; for (auto it = section.relocations.begin(); it != section.relocations.end(); /*nothing*/) { const IMAGE_RELOCATION& relocation = *it; if (relocation.SymbolTableIndex == symbolIndex) { it = section.relocations.erase(it); } else { ++it; } } } } void RemoveSection(RawCoff* rawCoff, size_t sectionIndex) { LC_LOG_DEV("Removing section %d (%s)", sectionIndex, GetSectionName(rawCoff->rawStringTable.data, &rawCoff->sections[sectionIndex].header).c_str()); // rather than really removing the section, we make it zero-sized without raw data, relocations, or line numbers. // otherwise, we would have to update the section numbers of all symbols for each removed section, which could // get complicated if we want to do it efficiently. RawSection& section = rawCoff->sections[sectionIndex]; LC_FREE(&g_rawCoffAllocator, section.data, section.header.SizeOfRawData); section.header.SizeOfRawData = 0u; section.header.PointerToRawData = 0u; section.data = nullptr; section.header.NumberOfRelocations = 0u; section.header.PointerToRelocations = 0u; section.relocations.clear(); section.header.NumberOfLinenumbers = 0u; section.header.PointerToLinenumbers = 0u; section.lineNumbers.clear(); // furthermore, we also rename the section and set our own flags that tell the linker that this section // can and should be discarded. memcpy(section.header.Name, ".remove", 8u); section.header.Characteristics = IMAGE_SCN_LNK_REMOVE | IMAGE_SCN_MEM_DISCARDABLE; section.wasRemoved = true; } void RemoveAssociatedComdatSections(RawCoff* rawCoff, size_t sectionIndex) { LC_LOG_DEV("Removing COMDAT sections associated with section %d", sectionIndex); LC_LOG_INDENT_DEV; const types::vector& associatedSections = rawCoff->associatedComdatSections[static_cast(sectionIndex)]; const size_t count = associatedSections.size(); for (size_t i = 0u; i < count; ++i) { RemoveSection(rawCoff, associatedSections[i]); } } template static CoffDB* GatherDatabase(ObjFile* objFile, uint32_t uniqueId) { using CoffHeader = typename coffDetail::CoffType::TypeMap::HeaderType; using CoffSymbol = typename coffDetail::CoffType::TypeMap::SymbolType; using CoffAuxSymbol = typename coffDetail::CoffType::TypeMap::AuxSymbolType; CoffDB* coffDb = new CoffDB; coffDb->symbolAllocator = new PoolAllocator("COFF symbols", sizeof(coff::Symbol), alignof(coff::Symbol), 8192u); coffDb->relocationAllocator = new PoolAllocator("Relocations", sizeof(coff::Relocation), alignof(coff::Relocation), 8192u); const void* objFileBase = Filesystem::GetMemoryMappedFileData(objFile->memoryFile); const uint32_t sectionCount = coffDetail::GetNumberOfSections(objFileBase); const uint32_t symbolCount = coffDetail::GetNumberOfSymbols(objFileBase); const IMAGE_SECTION_HEADER* sectionHeader = coffDetail::GetSectionHeader(objFileBase); const void* symbolTable = coffDetail::GetSymbolTable(objFileBase); const char* stringTable = coffDetail::GetStringTable(symbolTable, symbolCount); // pre-allocate data structures coffDb->sections.resize(sectionCount); coffDb->stringTable.resize(symbolCount); coffDb->symbols.reserve(symbolCount); // temporarily hold symbols per section in order to make assigning relocations to symbols easier types::vector> symbolsForSection; symbolsForSection.resize(sectionCount); // grab all sections and store CRT sections in a separate data structure, because they are needed for // finding dynamic initializers. we additionally use a lookup-table to make assigning symbols to CRT // sections faster. types::vector lutSectionIndexToCrtSection; lutSectionIndexToCrtSection.resize(sectionCount, INVALID_CRT_SECTION); for (unsigned int i = 0u; i < sectionCount; ++i) { const IMAGE_SECTION_HEADER* section = sectionHeader + i; Section& s = coffDb->sections[i]; s.name = GetSectionName(stringTable, section); s.rawDataSize = section->SizeOfRawData; s.rawDataRva = section->PointerToRawData; s.characteristics = section->Characteristics; s.comdatSelection = 0u; // store CRT sections in a separate data structure, they are needed for finding dynamic initializers if (string::Contains(s.name.c_str(), ".CRT$")) { // add this section to the lookup-table lutSectionIndexToCrtSection[i] = static_cast(coffDb->crtSections.size()); const CrtSection crtSection { s.name, section->SizeOfRawData, section->PointerToRawData, {} }; coffDb->crtSections.push_back(crtSection); } // we would like to reserve space for the symbols of each section to avoid allocations where possible. // because we don't know yet how many symbols a section holds, using the number of relocations is a // good approximation. const unsigned int relocationCount = coffDetail::GetRelocationCount(objFileBase, section); symbolsForSection[i].reserve(relocationCount); } // unfortunately, some compilers such as VS 2013 and earlier do *not* generate fully unique names in COFFs // for certain symbols. // the simplest example are static (internal) data symbols with the same name in different namespaces: // namespace a // { // static int g_counter = 10; // } // namespace b // { // static int g_counter = 20; // } // the corresponding COFF file will have two symbols that are both named "g_counter", so there is no way // to distinguish them. at least the compiler only does this for data symbols, never for function symbols. // this means that we need to keep track of non-unique data symbols and try to fix them accordingly. types::StringMap uniqueStaticDataSymbols; uniqueStaticDataSymbols.reserve(16u); for (unsigned int i = 0u; i < symbolCount; ++i) { const CoffSymbol* symbol = coffDetail::GetSymbol(symbolTable, i); // ignore absolute, debug and section symbols if (coffDetail::IsAbsoluteSymbol(symbol)) { } else if (coffDetail::IsDebugSymbol(symbol)) { } else if (coffDetail::IsUndefinedSymbol(symbol)) { coffDb->stringTable[i] = GetSymbolName(stringTable, symbol, uniqueId, 0u); } else if (coffDetail::IsSectionSymbol(symbol)) { // if this is a COMDAT section, grab its selection number from the auxiliary record const uint32_t sectionIndex = coffDetail::GetSectionIndex(symbol); const IMAGE_SECTION_HEADER* section = sectionHeader + sectionIndex; if (coffDetail::IsComdatSection(section)) { // the auxiliary record holds information about the COMDAT section. according to the COFF spec 5.5.6, // a COMDAT section always has one auxiliary record which is "the COMDAT symbol". if (symbol->NumberOfAuxSymbols == 1u) { const CoffAuxSymbol* auxSymbol = coffDetail::GetSymbol(symbolTable, i + 1u); coffDb->sections[sectionIndex].comdatSelection = auxSymbol->Section.Selection; } } coffDb->stringTable[i] = GetSectionName(stringTable, section); } else { // this symbol is stored in the COFF. coffDb->stringTable[i] = GetSymbolName(stringTable, symbol, uniqueId, 0u); const ImmutableString& name = coffDb->stringTable[i]; // we are not interested in certain types of symbols. // they never store any relocations and don't convey any kind of meaningful information regarding // relocations. if (!coffDetail::IsLabelSymbol(symbol) && IsInterestingSymbol(name)) { const uint32_t sectionIndex = coffDetail::GetSectionIndex(symbol); const IMAGE_SECTION_HEADER* section = sectionHeader + sectionIndex; const uint32_t rva = section->PointerToRawData + symbol->Value; const SymbolType::Enum type = DetermineSymbolType(symbol); Symbol* newSymbol = LC_NEW(coffDb->symbolAllocator, Symbol) { i, rva, sectionIndex, type, {} }; newSymbol->relocations.reserve(32u); coffDb->symbols.push_back(newSymbol); // add the symbol to the corresponding CRT section, if any const uint32_t crtSectionIndex = lutSectionIndexToCrtSection[sectionIndex]; if (crtSectionIndex != INVALID_CRT_SECTION) { CrtSection& crtSection = coffDb->crtSections[crtSectionIndex]; crtSection.symbols.push_back(newSymbol); } if (type == SymbolType::STATIC_DATA) { // make sure this symbol is unique auto insertIterator = uniqueStaticDataSymbols.emplace(name, static_cast(0u)); if (insertIterator.second == false) { // the name of this symbol is not unique, inform the user. // when compiling with control-flow guard (CFG), the compiler will generate // non-unique __guard_fids__ symbols - ignore those. const ImmutableString& symbolName = GetSymbolName(stringTable, symbol); if (!string::Matches(symbolName.c_str(), "__guard_fids__")) { // BEGIN EPIC MOD // VS2019 seems to generate duplicate unwind and pdata block for the "dynamic atexit destructor" method (__F) under some conditions that I can't determine. // Also, do not generate warnings for symbols in COMDATs if (!coffDetail::IsComdatSection(section)) { if (!string::StartsWith(symbolName.c_str(), "$unwind$??__F") && !string::StartsWith(symbolName.c_str(), "$pdata$??__F")) { LC_WARNING_USER("Non-unique symbol %s found in COFF file %s. Do not change the order of these variables while live coding, or consider upgrading to a newer compiler (VS 2015 or later)", symbolName.c_str(), objFile->filename.c_str()); } else { LC_LOG_USER("Non-unique at-exit symbol %s found in COFF file %s. These sometimes appear in debug builds", symbolName.c_str(), objFile->filename.c_str()); } } // END EPIC MOD } // as a workaround, try generating a unique name for it. // this does not fix potential issues in all cases, but works successfully in cases where // this compiland is never recompiled, or where the order of variables in the file doesn't change. // increase the counter associated with this name and generate a new name from it ++insertIterator.first->second; coffDb->stringTable[i] = GetSymbolName(stringTable, symbol, uniqueId, insertIterator.first->second); } } symbolsForSection[sectionIndex].push_back(newSymbol); } } // skip auxiliary symbols const BYTE auxSymbolCount = symbol->NumberOfAuxSymbols; i += auxSymbolCount; } // walk through all relocations { for (unsigned int i = 0u; i < sectionCount; ++i) { const IMAGE_SECTION_HEADER* section = sectionHeader + i; // ignore relocations inside sections that will either not be part of the final image, or can be // discarded at will. those are mostly ".drectve" and ".debug" sections. if (coffDetail::IsDiscardableSection(section)) { continue; } if (!coffDetail::IsPartOfImage(section)) { continue; } const unsigned int relocationCount = coffDetail::GetRelocationCount(objFileBase, section); const IMAGE_RELOCATION* relocations = pointer::Offset(objFileBase, section->PointerToRelocations); const types::vector& symbolsForCurrentSection = symbolsForSection[i]; if (symbolsForCurrentSection.size() == 0u) { // this section does not hold any symbols to which we could assign relocations continue; } // sort symbols in this section by RVA in order to make associating relocations with symbols much easier std::sort(symbolsForSection[i].begin(), symbolsForSection[i].end(), &SortSymbolByAscendingRVA); unsigned int currentSymbolIndex = 0u; // if relocation count in section has overflown, ignore the first relocation const unsigned int startRelocation = (relocationCount > 0xFFFFu) ? 1u : 0u; for (unsigned int j = 0u; j < relocationCount - startRelocation; ++j) { const IMAGE_RELOCATION* relocation = relocations + j + startRelocation; // ignore debug relocations if (coffDetail::IsDebugRelocation(relocation)) { continue; } const CoffSymbol* symbol = coffDetail::GetSymbol(symbolTable, relocation->SymbolTableIndex); const ImmutableString& dstSymbolName = coffDb->stringTable[relocation->SymbolTableIndex]; // ignore relocations to line numbers and string literals if (!IsInterestingSymbol(dstSymbolName)) { continue; } else if (symbols::IsRttiObjectLocator(dstSymbolName)) { // RTTI Complete Object Locators are a strange thing. // they are always located before the first entry in the vtable, but they cannot be found using the vtable // symbol because that symbol starts at "section + 4", "skipping" the object locator. // example: // symbol "const SFV_Base::`vftable'" (??_7SFV_Base@@6B@)" has relocations to: // 00000000 ??_R4SFV_Base@@6B@ (const SFV_Base::`RTTI Complete Object Locator') // 00000004 ??_ESFV_Base@@UAEPAXI@Z (public: virtual void * __thiscall SFV_Base::`vector deleting destructor'(unsigned int)) // 00000008 ?TestFunction@SFV_Base@@UAEXXZ (public: virtual void __thiscall SFV_Base::TestFunction(void)) // according to the symbol table, the vtable symbol sits in section .rdata, #1E, but at offset 4: // 00000004 SECT1E External ??_7SFV_Base@@6B@ (const SFV_Base::`vftable') // this means that we don't have a symbol for which we can store this relocation. // it would be vtable - 4, but we don't handle that. it doesn't matter because the object locators // are public symbols anyway. continue; } const DWORD relocationRva = section->PointerToRawData + relocation->VirtualAddress; // find the symbol that contains this relocation and determine the RVA relative to the start // of the data or function. note that walking through the symbols of this section like this only // works because both the relocations as well as the symbols are sorted by their RVA. while (currentSymbolIndex < symbolsForCurrentSection.size() - 1u) { const uint32_t nextIndex = currentSymbolIndex + 1u; const uint32_t nextSymbolRva = symbolsForCurrentSection[nextIndex]->rva; if (relocationRva < nextSymbolRva) { // found symbol that holds this relocation break; } // relocation does not belong to this symbol, but possibly to the next currentSymbolIndex = nextIndex; } Symbol* srcSymbol = symbolsForCurrentSection[currentSymbolIndex]; if (relocationRva < srcSymbol->rva) { LC_ERROR_DEV("Cannot find symbol that contains relocation at 0x%X for destination symbol %s in file %s", relocationRva, dstSymbolName.c_str(), objFile->filename.c_str()); continue; } // RVA relative to the RVA of the symbol that holds the relocation const uint32_t srcRva = relocationRva - srcSymbol->rva; const int32_t sectionIndex = symbol->SectionNumber - 1; switch (relocation->Type) { case Relocation::Type::SECTION_RELATIVE: case Relocation::Type::RELATIVE: case Relocation::Type::VA_32: case Relocation::Type::RVA_32: #if LC_64_BIT case Relocation::Type::RELATIVE_OFFSET_1: case Relocation::Type::RELATIVE_OFFSET_2: case Relocation::Type::RELATIVE_OFFSET_3: case Relocation::Type::RELATIVE_OFFSET_4: case Relocation::Type::RELATIVE_OFFSET_5: #endif { const uint32_t* relocationAddress = pointer::Offset(objFileBase, relocationRva); const uint32_t destinationOffset = *relocationAddress; Relocation* newRelocation = LC_NEW(coffDb->relocationAllocator, Relocation) { relocation->SymbolTableIndex, srcRva, destinationOffset, sectionIndex, static_cast(relocation->Type), srcSymbol->type, DetermineSymbolType(symbol), coffDetail::IsSectionSymbol(symbol) }; srcSymbol->relocations.emplace_back(newRelocation); } break; #if LC_64_BIT case Relocation::Type::VA_64: { const uint64_t* relocationAddress = pointer::Offset(objFileBase, relocationRva); // read the destination offset as 64-bit, but convert it into a 32-bit offset. // no symbol can ever be larger than 4 GB. const uint64_t destinationOffset = *relocationAddress; Relocation* newRelocation = LC_NEW(coffDb->relocationAllocator, Relocation) { relocation->SymbolTableIndex, srcRva, static_cast(destinationOffset), sectionIndex, static_cast(relocation->Type), srcSymbol->type, DetermineSymbolType(symbol), coffDetail::IsSectionSymbol(symbol) }; srcSymbol->relocations.emplace_back(newRelocation); } break; #endif default: LC_ERROR_DEV("Unknown relocation %d", relocation->Type); break; }; } } } // minimize amount of memory needed and generate lookup table coffDb->symbols.shrink_to_fit(); { coffDb->indexToSymbol.resize(symbolCount); const size_t count = coffDb->symbols.size(); for (size_t i = 0u; i < count; ++i) { Symbol* symbol = coffDb->symbols[i]; symbol->relocations.shrink_to_fit(); coffDb->indexToSymbol[symbol->nameIndex] = symbol; } } // sort symbols in CRT sections by RVA { const size_t count = coffDb->crtSections.size(); for (size_t i = 0u; i < count; ++i) { std::sort(coffDb->crtSections[i].symbols.begin(), coffDb->crtSections[i].symbols.end(), &SortSymbolByAscendingRVA); } } return coffDb; } CoffDB* GatherDatabase(ObjFile* file, uint32_t uniqueId) { const coffDetail::CoffType::Enum type = coffDetail::GetCoffType(Filesystem::GetMemoryMappedFileData(file->memoryFile)); if (type == coffDetail::CoffType::COFF) { return GatherDatabase(file, uniqueId); } else if (type == coffDetail::CoffType::BIGOBJ) { return GatherDatabase(file, uniqueId); } return nullptr; } void DestroyDatabase(CoffDB* db) { const size_t count = db->symbols.size(); for (size_t i = 0u; i < count; ++i) { Symbol* symbol = db->symbols[i]; const size_t relocationCount = symbol->relocations.size(); for (size_t j = 0u; j < relocationCount; ++j) { Relocation* relocation = symbol->relocations[j]; LC_DELETE(db->relocationAllocator, relocation, sizeof(Relocation)); } LC_DELETE(db->symbolAllocator, symbol, sizeof(Symbol)); } delete db->symbolAllocator; delete db->relocationAllocator; delete db; } template static UnresolvedSymbolDB* GatherUnresolvedSymbolDatabase(ObjFile* objFile, uint32_t uniqueId) { using CoffHeader = typename coffDetail::CoffType::TypeMap::HeaderType; using CoffSymbol = typename coffDetail::CoffType::TypeMap::SymbolType; UnresolvedSymbolDB* symbolDb = new UnresolvedSymbolDB; const void* objFileBase = Filesystem::GetMemoryMappedFileData(objFile->memoryFile); const uint32_t symbolCount = coffDetail::GetNumberOfSymbols(objFileBase); const void* symbolTable = coffDetail::GetSymbolTable(objFileBase); const char* stringTable = coffDetail::GetStringTable(symbolTable, symbolCount); // pre-allocate data structures symbolDb->symbols.reserve(symbolCount); symbolDb->symbolIndex.reserve(symbolCount); for (uint32_t i = 0u; i < symbolCount; ++i) { const CoffSymbol* symbol = coffDetail::GetSymbol(symbolTable, i); // ignore absolute, debug and section symbols if (coffDetail::IsAbsoluteSymbol(symbol)) { } else if (coffDetail::IsDebugSymbol(symbol)) { } else if (coffDetail::IsUndefinedSymbol(symbol)) { ImmutableString name = GetSymbolName(stringTable, symbol, uniqueId, 0u); symbolDb->symbols.emplace_back(std::move(name)); symbolDb->symbolIndex.emplace_back(i); } else if (coffDetail::IsSectionSymbol(symbol)) { } // skip auxiliary symbols const BYTE auxSymbolCount = symbol->NumberOfAuxSymbols; i += auxSymbolCount; } return symbolDb; } UnresolvedSymbolDB* GatherUnresolvedSymbolDatabase(ObjFile* file, uint32_t uniqueId) { const coffDetail::CoffType::Enum type = coffDetail::GetCoffType(Filesystem::GetMemoryMappedFileData(file->memoryFile)); if (type == coffDetail::CoffType::COFF) { return GatherUnresolvedSymbolDatabase(file, uniqueId); } else if (type == coffDetail::CoffType::BIGOBJ) { return GatherUnresolvedSymbolDatabase(file, uniqueId); } return nullptr; } void DestroyDatabase(UnresolvedSymbolDB* db) { delete db; } template static ExternalSymbolDB* GatherExternalSymbolDatabase(ObjFile* objFile, uint32_t uniqueId) { using CoffHeader = typename coffDetail::CoffType::TypeMap::HeaderType; using CoffSymbol = typename coffDetail::CoffType::TypeMap::SymbolType; ExternalSymbolDB* symbolDb = new ExternalSymbolDB; const void* objFileBase = Filesystem::GetMemoryMappedFileData(objFile->memoryFile); const uint32_t symbolCount = coffDetail::GetNumberOfSymbols(objFileBase); const void* symbolTable = coffDetail::GetSymbolTable(objFileBase); const char* stringTable = coffDetail::GetStringTable(symbolTable, symbolCount); // pre-allocate data structures symbolDb->symbols.reserve(symbolCount); symbolDb->types.reserve(symbolCount); for (uint32_t i = 0u; i < symbolCount; ++i) { const CoffSymbol* symbol = coffDetail::GetSymbol(symbolTable, i); // ignore absolute, debug, section and undefined symbols if (coffDetail::IsAbsoluteSymbol(symbol)) { } else if (coffDetail::IsDebugSymbol(symbol)) { } else if (coffDetail::IsUndefinedSymbol(symbol)) { } else if (coffDetail::IsSectionSymbol(symbol)) { } else { const SymbolType::Enum type = DetermineSymbolType(symbol); if ((type == SymbolType::EXTERNAL_DATA) || (type == SymbolType::EXTERNAL_FUNCTION)) { // this is an external symbol stored in the COFF ImmutableString name = GetSymbolName(stringTable, symbol, uniqueId, 0u); symbolDb->symbols.emplace_back(std::move(name)); symbolDb->types.emplace_back(type); } } // skip auxiliary symbols const BYTE auxSymbolCount = symbol->NumberOfAuxSymbols; i += auxSymbolCount; } return symbolDb; } ExternalSymbolDB* GatherExternalSymbolDatabase(ObjFile* file, uint32_t uniqueId) { const coffDetail::CoffType::Enum type = coffDetail::GetCoffType(Filesystem::GetMemoryMappedFileData(file->memoryFile)); if (type == coffDetail::CoffType::COFF) { return GatherExternalSymbolDatabase(file, uniqueId); } else if (type == coffDetail::CoffType::BIGOBJ) { return GatherExternalSymbolDatabase(file, uniqueId); } return nullptr; } void DestroyDatabase(ExternalSymbolDB* db) { delete db; } size_t GetIndexCount(const CoffDB* coffDb) { return coffDb->indexToSymbol.size(); } const Symbol* GetSymbolByIndex(const CoffDB* coffDb, size_t index) { return coffDb->indexToSymbol[index]; } const ImmutableString& GetSymbolName(const CoffDB* coffDb, const Symbol* symbol) { return coffDb->stringTable[symbol->nameIndex]; } const ImmutableString& GetRelocationDstSymbolName(const CoffDB* coffDb, const Relocation* relocation) { return coffDb->stringTable[relocation->dstSymbolNameIndex]; } const ImmutableString& GetUnresolvedSymbolName(const CoffDB* coffDb, size_t unresolvedSymbolIndex) { return coffDb->stringTable[unresolvedSymbolIndex]; } SymbolType::Enum GetRelocationSrcSymbolType(const Relocation* relocation) { return relocation->srcSymbolType; } SymbolType::Enum GetRelocationDstSymbolType(const Relocation* relocation) { return relocation->dstSymbolType; } const CrtSection* FindCrtSection(const CoffDB* coffDb, const ImmutableString& sectionName, uint32_t sectionSize) { const CrtSection* foundSection = nullptr; const size_t count = coffDb->crtSections.size(); for (size_t i = 0u; i < count; ++i) { const CrtSection* section = &coffDb->crtSections[i]; if (section->rawDataSize != sectionSize) { continue; } else if (section->name != sectionName) { continue; } else if (foundSection) { // matching section has been found more than once return nullptr; } foundSection = section; } return foundSection; } std::vector FindMatchingCrtSections(const CoffDB* coffDb, const ImmutableString& sectionName, uint32_t sectionSize) { std::vector result; result.reserve(4u); const size_t count = coffDb->crtSections.size(); for (size_t i = 0u; i < count; ++i) { const CrtSection* section = &coffDb->crtSections[i]; if (section->rawDataSize != sectionSize) { continue; } else if (section->name != sectionName) { continue; } result.push_back(section); } return result; } uint32_t FindCoffSuffix(const ImmutableString& symbolName) { return symbolName.Find(COFF_SUFFIX); } uint32_t GetRelocationDestinationSectionCharacteristics(const coff::CoffDB* coffDb, const coff::Relocation* relocation) { if (relocation->dstSectionIndex < 0) { return 0u; } const uint32_t index = static_cast(relocation->dstSectionIndex); const coff::Section& section = coffDb->sections[index]; return section.characteristics; } const void* GetBaseAddress(const ObjFile* file) { return Filesystem::GetMemoryMappedFileData(file->memoryFile); } bool IsFunctionSymbol(SymbolType::Enum type) { switch (type) { case SymbolType::EXTERNAL_DATA: return false; case SymbolType::EXTERNAL_FUNCTION: return true; case SymbolType::STATIC_DATA: return false; case SymbolType::STATIC_FUNCTION: return true; case SymbolType::UNKNOWN_DATA: return false; case SymbolType::UNKNOWN_FUNCTION: return true; default: LC_ERROR_DEV("Unexpected SymbolType value %d", type); return false; } } char GetCoffSuffix(void) { return COFF_SUFFIX; } wchar_t GetWideCoffSuffix(void) { return COFF_SUFFIX_WIDE; } const ImmutableString& GetTlsSectionName(void) { return TLS_SECTION; } bool IsInterestingSymbol(const ImmutableString& name) { if (symbols::IsStringLiteral(name)) { return false; } else if (symbols::IsFloatingPointSseAvxConstant(name)) { return false; } else if (symbols::IsLineNumber(name)) { return false; } return true; } bool IsMSVCJustMyCodeSection(const char* sectionName) { return string::Matches(sectionName, ".msvcjmc"); } } #endif