926 lines
34 KiB
C++
926 lines
34 KiB
C++
// Copyright 2011-2020 Molecular Matters GmbH, all rights reserved.
|
|
|
|
#if LC_VERSION == 1
|
|
|
|
// BEGIN EPIC MOD
|
|
//#include PCH_INCLUDE
|
|
// END EPIC MOD
|
|
#include "LC_SymbolReconstruction.h"
|
|
|
|
#include "LC_StringUtil.h"
|
|
#include "LC_PointerUtil.h"
|
|
#include "LC_NameMangling.h"
|
|
#include "LC_DiaUtil.h"
|
|
|
|
|
|
namespace
|
|
{
|
|
static inline bool HasLowerRVA(const symbols::Contribution* lhs, uint32_t rva)
|
|
{
|
|
return lhs->rva < rva;
|
|
}
|
|
}
|
|
|
|
|
|
void symbols::ReconstructFromExecutableCoff
|
|
(
|
|
const symbols::Provider* provider,
|
|
const executable::Image* image,
|
|
const executable::ImageSectionDB* imageSections,
|
|
const coff::CoffDB* coffDb,
|
|
const types::StringSet& strippedSymbols,
|
|
const symbols::ObjPath& objPath,
|
|
const symbols::ContributionDB* contributionDb,
|
|
const symbols::ThunkDB* thunkDb,
|
|
const symbols::ImageSectionDB* imageSectionDb,
|
|
symbols::SymbolDB* symbolDB
|
|
)
|
|
{
|
|
const executable::PreferredBase imageBase = executable::GetPreferredBase(image);
|
|
const uint32_t imageSize = executable::GetSize(image);
|
|
|
|
LC_LOG_DEV("Gathering symbols from COFF file %s", objPath.c_str());
|
|
LC_LOG_INDENT_DEV;
|
|
|
|
LC_LOG_DEV("Symbols in COFF: %d", coffDb->symbols.size());
|
|
LC_LOG_DEV("Symbols stripped: %d", strippedSymbols.size());
|
|
|
|
// gather symbols by following relocation "paths", backtracking from the location in the executable
|
|
// to the symbol's origin RVA. our starting entry paths are the functions and data of which we already
|
|
// know the name and RVA.
|
|
size_t unknownSymbolsToFind = 0u;
|
|
types::vector<const coff::Symbol*> openSymbols;
|
|
openSymbols.reserve(coffDb->symbols.size());
|
|
{
|
|
const size_t count = coff::GetIndexCount(coffDb);
|
|
for (size_t i = 0u; i < count; ++i)
|
|
{
|
|
// do we have a symbol at that index?
|
|
const coff::Symbol* symbol = coff::GetSymbolByIndex(coffDb, i);
|
|
if (symbol)
|
|
{
|
|
// yes, so check whether this symbol is known already
|
|
const ImmutableString& symbolName = coff::GetSymbolName(coffDb, symbol);
|
|
const symbols::Symbol* srcSymbol = symbols::FindSymbolByName(symbolDB, symbolName);
|
|
if (srcSymbol)
|
|
{
|
|
LC_LOG_DEV("Known symbol %s at 0x%X", symbolName.c_str(), srcSymbol->rva);
|
|
openSymbols.push_back(symbol);
|
|
}
|
|
else if (strippedSymbols.find(symbolName) != strippedSymbols.end())
|
|
{
|
|
LC_LOG_DEV("Stripped symbol %s", symbolName.c_str());
|
|
}
|
|
else
|
|
{
|
|
const coff::Section& coffSection = coffDb->sections[symbol->sectionIndex];
|
|
if (coff::IsMSVCJustMyCodeSection(coffSection.name.c_str()))
|
|
{
|
|
LC_LOG_DEV("JustMyCode symbol %s", symbolName.c_str());
|
|
}
|
|
else
|
|
{
|
|
LC_LOG_DEV("Unknown symbol %s", symbolName.c_str());
|
|
++unknownSymbolsToFind;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// we do not have a symbol stored in the COFF, because it might be external/unresolved.
|
|
// if so, chances are very high that this symbol is already known publicly.
|
|
const ImmutableString& symbolName = coff::GetUnresolvedSymbolName(coffDb, i);
|
|
if (symbolName.GetLength() == 0u)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
if (symbols::FindSymbolByName(symbolDB, symbolName))
|
|
{
|
|
LC_LOG_DEV("Publicly known symbol %s", symbolName.c_str());
|
|
}
|
|
else if (!coff::IsInterestingSymbol(symbolName))
|
|
{
|
|
// relocations to those symbols are not stored in the COFF, hence we
|
|
// can not reconstruct these anyway
|
|
LC_LOG_DEV("Non-interesting symbol %s", symbolName.c_str());
|
|
}
|
|
else if (symbols::IsImageBaseRelatedSymbol(symbolName))
|
|
{
|
|
LC_LOG_DEV("Linker-generated image base symbol %s", symbolName.c_str());
|
|
}
|
|
else if (symbols::IsTlsArrayRelatedSymbol(symbolName))
|
|
{
|
|
LC_LOG_DEV("Compiler-generated symbol %s", symbolName.c_str());
|
|
}
|
|
else if (symbols::IsSectionSymbol(symbolName))
|
|
{
|
|
LC_LOG_DEV("Section symbol %s", symbolName.c_str());
|
|
}
|
|
else if (strippedSymbols.find(symbolName) != strippedSymbols.end())
|
|
{
|
|
LC_LOG_DEV("Stripped symbol %s", symbolName.c_str());
|
|
}
|
|
else if (symbolDB->symbolsWithoutRva.find(symbolName) != symbolDB->symbolsWithoutRva.end())
|
|
{
|
|
// ignore symbols without an RVA. those are often generated by the compiler or linker,
|
|
// are being relocated to, but store absolute values encoded in their offset in the PDB.
|
|
LC_LOG_DEV("Compiler- or linker-generated symbol %s without an RVA", symbolName.c_str());
|
|
}
|
|
else if (string::Contains(symbolName.c_str(), "$__resumable"))
|
|
{
|
|
LC_LOG_DEV("Coroutine symbol %s", symbolName.c_str());
|
|
}
|
|
else
|
|
{
|
|
LC_LOG_DEV("Unknown unresolved symbol %s", symbolName.c_str());
|
|
++unknownSymbolsToFind;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
LC_LOG_DEV("Unknown symbols left to find: %d", unknownSymbolsToFind);
|
|
|
|
// do we already know all symbols?
|
|
if (unknownSymbolsToFind == 0u)
|
|
{
|
|
LC_LOG_DEV("Know all symbols already, nothing to do");
|
|
return;
|
|
}
|
|
|
|
// keep walking relocations of all open symbols to determine the RVA of symbols contained in this .obj
|
|
types::unordered_set<const coff::Symbol*> walkedAlready;
|
|
types::unordered_set<const coff::Symbol*> triedReconstructingAlready;
|
|
|
|
unsigned int pass = 0u;
|
|
|
|
walkOpenSymbols:
|
|
while (openSymbols.size() > 0u)
|
|
{
|
|
const coff::Symbol* symbol = openSymbols.back();
|
|
openSymbols.pop_back();
|
|
|
|
// check whether we walked this symbol already
|
|
const auto it = walkedAlready.find(symbol);
|
|
if (it != walkedAlready.end())
|
|
{
|
|
// handled already, nothing more to do
|
|
continue;
|
|
}
|
|
|
|
// check whether the symbol is actually the one that contributed its code.
|
|
// in case of COMDATs available in both executable and static libraries, this might not
|
|
// be true and would lead to completely wrong symbols being reconstructed.
|
|
const ImmutableString& srcSymbolName = coff::GetSymbolName(coffDb, symbol);
|
|
const symbols::Symbol* srcSymbol = symbols::FindSymbolByName(symbolDB, srcSymbolName);
|
|
if (srcSymbol)
|
|
{
|
|
const symbols::Contribution* symbolContribution = symbols::FindContributionByRVA(contributionDb, srcSymbol->rva);
|
|
if (symbolContribution)
|
|
{
|
|
const ImmutableString& contributingCompiland = symbols::GetContributionCompilandName(contributionDb, symbolContribution);
|
|
if (contributingCompiland != objPath)
|
|
{
|
|
LC_LOG_DEV("Not walking symbol %s from contribution in different file %s", srcSymbolName.c_str(), contributingCompiland.c_str());
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
LC_LOG_DEV("Walking relocations of symbol %s", srcSymbolName.c_str());
|
|
LC_LOG_INDENT_DEV;
|
|
|
|
const size_t relocationCount = symbol->relocations.size();
|
|
for (size_t i = 0u; i < relocationCount; ++i)
|
|
{
|
|
const coff::Relocation* relocation = symbol->relocations[i];
|
|
|
|
// ignore relocations to symbols in .msvcjmc (MSVC JustMyCode) sections
|
|
if (relocation->dstSectionIndex >= 0)
|
|
{
|
|
const uint32_t index = static_cast<uint32_t>(relocation->dstSectionIndex);
|
|
const coff::Section& section = coffDb->sections[index];
|
|
if (coff::IsMSVCJustMyCodeSection(section.name.c_str()))
|
|
{
|
|
LC_LOG_DEV("Ignoring relocation to symbol in section %s", section.name.c_str());
|
|
continue;
|
|
}
|
|
}
|
|
|
|
const ImmutableString& dstSymbolName = coff::GetRelocationDstSymbolName(coffDb, relocation);
|
|
|
|
// the symbol we are looking for might already be in the database because of the public symbols gathered from the PDB
|
|
if (symbols::FindSymbolByName(symbolDB, dstSymbolName))
|
|
{
|
|
LC_LOG_DEV("Publicly known symbol %s", dstSymbolName.c_str());
|
|
|
|
// we know this symbol already, but we might not have walked its relocations yet.
|
|
// add it to the list and continue.
|
|
const coff::Symbol* nextSymbol = coff::GetSymbolByIndex(coffDb, relocation->dstSymbolNameIndex);
|
|
if (nextSymbol)
|
|
{
|
|
openSymbols.push_back(nextSymbol);
|
|
}
|
|
|
|
continue;
|
|
}
|
|
else if (strippedSymbols.find(dstSymbolName) != strippedSymbols.end())
|
|
{
|
|
// the relocation points to a symbol we should ignore
|
|
LC_LOG_DEV("Ignoring stripped symbol \"%s\"", dstSymbolName.c_str());
|
|
continue;
|
|
}
|
|
else if (symbols::IsImageBaseRelatedSymbol(dstSymbolName))
|
|
{
|
|
// the linker-generated __ImageBase always sits at RVA zero, and relocations should never be patched
|
|
LC_LOG_DEV("Ignoring destination symbol \"%s\"", dstSymbolName.c_str());
|
|
continue;
|
|
}
|
|
else if (symbols::IsTlsArrayRelatedSymbol(dstSymbolName))
|
|
{
|
|
// compiler-generated symbols such as __tls_array don't have any RVA, because they always reside at
|
|
// the same address, e.g. relative to a segment register.
|
|
// one such example would be how thread-local storage variables are accessed:
|
|
// the generated code always fetches the flat address of the thread-local storage array from the TEB (https://en.wikipedia.org/wiki/Win32_Thread_Information_Block).
|
|
// the TEB itself can be accessed using segment register FS on x86, and GS on x64, so one of the first instructions of thread-local storage access is always going to
|
|
// access the member at 0x2C/0x58 relative to FS/GS, e.g.:
|
|
// mov eax, dword ptr fs:0x2C (x86)
|
|
// mov rax, qword ptr gs:0x58 (x64)
|
|
// therefore, the "RVA" of __tls_array is 0x2C (x86) or 0x58 (x64).
|
|
// see http://www.nynaeve.net/?p=180 for more in-depth information about thread-local storage on Windows.
|
|
// NOTE: we do need the RVA of __tls_index because that is used to set the data segment register to the
|
|
// table used for accessing TLS variables.
|
|
LC_LOG_DEV("Ignoring destination symbol \"%s\"", dstSymbolName.c_str());
|
|
continue;
|
|
}
|
|
else if (symbols::IsSectionSymbol(dstSymbolName))
|
|
{
|
|
LC_LOG_DEV("Ignoring section symbol \"%s\"", dstSymbolName.c_str());
|
|
continue;
|
|
}
|
|
else if (string::Contains(dstSymbolName.c_str(), "$__resumable"))
|
|
{
|
|
LC_LOG_DEV("Ignoring Coroutine symbol %s", dstSymbolName.c_str());
|
|
continue;
|
|
}
|
|
|
|
if (!srcSymbol)
|
|
{
|
|
LC_ERROR_DEV("Cannot find source symbol %s (%s)",
|
|
srcSymbolName.c_str(),
|
|
nameMangling::UndecorateSymbol(srcSymbolName.c_str(), 0u).c_str());
|
|
continue;
|
|
}
|
|
|
|
const coff::Relocation::Type::Enum type = relocation->type;
|
|
|
|
// the relocation's RVA is relative to the start of the function, and the executable already has all relocations
|
|
// resolved. hence we can backtrack the RVA of the destination symbol by peeking into the executable's code
|
|
// at the address of the relocation.
|
|
const uint32_t relocationRva = srcSymbol->rva + relocation->srcRva;
|
|
|
|
// check for invalid RVAs before trying to reconstruct the symbol.
|
|
// these can occur when a COMDAT gets stripped in an .obj, but is needed by an .obj coming from a library.
|
|
// the COMDAT will then be stripped from the executable, so we shouldn't try reconstructing it.
|
|
{
|
|
#if LC_64_BIT
|
|
if (type == coff::Relocation::Type::VA_64)
|
|
{
|
|
const uint64_t rvaInCode = executable::ReadFromImage<uint64_t>(image, imageSections, relocationRva);
|
|
if (rvaInCode == 0u)
|
|
{
|
|
continue;
|
|
}
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
const uint32_t rvaInCode = executable::ReadFromImage<uint32_t>(image, imageSections, relocationRva);
|
|
if (rvaInCode == 0u)
|
|
{
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
// even though the final RVA can only be 32-bit because no image can ever be larger than 4GB, intermediate results
|
|
// can point to addresses in the full 64-bit address space.
|
|
|
|
#if LC_64_BIT
|
|
uint64_t dstRva = 0u;
|
|
#else
|
|
uint32_t dstRva = 0u;
|
|
#endif
|
|
|
|
// backtrack to the real RVA of the destination symbol depending on the type of relocation.
|
|
// 32-BIT NOTE: relative addresses are signed 32-bit offsets, but addressing performed by the CPU
|
|
// works modulo 2^32. this means that it doesn't matter whether we go forward 3GB, or back 1GB -
|
|
// the resulting address will be the same.
|
|
// we therefore carry out all calculations using *unsigned* 32-bit integers, because they have
|
|
// natural overflow/underflow behaviour, and do *not* invoke undefined behaviour like signed integers.
|
|
switch (type)
|
|
{
|
|
case coff::Relocation::Type::RELATIVE:
|
|
|
|
#if LC_64_BIT
|
|
case coff::Relocation::Type::RELATIVE_OFFSET_1:
|
|
case coff::Relocation::Type::RELATIVE_OFFSET_2:
|
|
case coff::Relocation::Type::RELATIVE_OFFSET_3:
|
|
case coff::Relocation::Type::RELATIVE_OFFSET_4:
|
|
case coff::Relocation::Type::RELATIVE_OFFSET_5:
|
|
#endif
|
|
{
|
|
// relative relocations are used for e.g. JMP and CALL instructions and are relative to the address
|
|
// of the next instruction.
|
|
// example:
|
|
// 00015DAA E8 1E B8 FF FF call _printf(0115CDh)
|
|
// the CALL instruction sits at 0x00015DAA and calls printf at 0x0115CD, but this is *not* the address
|
|
// encoded in the CALL instruction. the encoded relative address is 0xFFFFB81E, which is -18402.
|
|
// adding 0xFFFFB81E to 0x00015DAA + 5 (the address of the next instruction!) yields 0x0115CD.
|
|
// NOTE: the relocation points to the address of the *relocation*, not the beginning of
|
|
// the *instruction* (hence we add 4, not 5).
|
|
|
|
const uint32_t rva = executable::ReadFromImage<uint32_t>(image, imageSections, relocationRva);
|
|
dstRva = relocationRva + rva + 4ull + coff::Relocation::Type::GetByteDistance(type);
|
|
}
|
|
break;
|
|
|
|
case coff::Relocation::Type::SECTION_RELATIVE:
|
|
{
|
|
// section-relative relocations are used for thread-local storage, e.g. accessing __declspec(thread)
|
|
// variables.
|
|
// example:
|
|
// 00016845 A1 14 35 02 00 mov eax, dword ptr[_tls_index(023514h)]
|
|
// 0001684A 64 8B 0D 2C 00 00 00 mov ecx, dword ptr fs:[2Ch]
|
|
// 00016851 8B 14 81 mov edx, dword ptr[ecx + eax*4]
|
|
// 00016854 8B 82 04 01 00 00 mov eax, dword ptr[edx + 104h]
|
|
// the code accesses a global variable in thread-local storage, which happens relative to the
|
|
// .tls section. the section-relative offset of the variable in question is 0x104, and the relocation
|
|
// directly stores this offset (0x00000104 in the last line).
|
|
|
|
// grab RVA of the symbol's section
|
|
const ImmutableString& sectionName = coff::GetTlsSectionName();
|
|
const symbols::ImageSection* section = symbols::FindImageSectionByName(imageSectionDb, sectionName);
|
|
if (!section)
|
|
{
|
|
LC_ERROR_DEV("Cannot find section %s in image", sectionName.c_str());
|
|
continue;
|
|
}
|
|
|
|
// the relocation itself is 32-bit, always positive
|
|
dstRva = executable::ReadFromImage<uint32_t>(image, imageSections, relocationRva) + section->rva;
|
|
}
|
|
break;
|
|
|
|
case coff::Relocation::Type::VA_32:
|
|
{
|
|
#if LC_64_BIT
|
|
// an absolute 32-bit virtual address cannot exist in a 64-bit image, otherwise the .exe/.dll could
|
|
// not be loaded into the upper 32-bits of the address space.
|
|
LC_ERROR_DEV("Ignoring relocation of type %s (%d)", coff::Relocation::Type::ToString(type), type);
|
|
continue;
|
|
#else
|
|
// direct virtual addresses are used for accessing e.g. global symbols, string literals.
|
|
// the instruction directly stores the absolute address of the symbol in question.
|
|
// example:
|
|
// 00015DA5 68 9C 11 02 00 push 2119Ch
|
|
// this pushes the absolute address of a string literal to the stack. the address encoded
|
|
// in the opcode is 0x0002119C, which is the direct address of the string literal in memory.
|
|
dstRva = executable::ReadFromImage<uint32_t>(image, imageSections, relocationRva) - imageBase;
|
|
#endif
|
|
}
|
|
break;
|
|
|
|
case coff::Relocation::Type::RVA_32:
|
|
{
|
|
// in 32-bit, this type of relocation is only used for .debug and .rsrc (resource) sections.
|
|
// the latter are only needed by the linker in order to know where to place resources in the executable.
|
|
|
|
// in 64-bit, this type of relocation is used for addressing exception-relevant functions and data,
|
|
// and seldomly for accessing data at an absolute offset to the image base, e.g.
|
|
// mov rcx,qword ptr [r8+rcx*8+1771060h]
|
|
// r8 stores the image base, 1771060h is the value of the RVA_32 relocation.
|
|
dstRva = executable::ReadFromImage<uint32_t>(image, imageSections, relocationRva);
|
|
}
|
|
break;
|
|
|
|
#if LC_64_BIT
|
|
case coff::Relocation::Type::VA_64:
|
|
{
|
|
// direct virtual addresses are used for accessing e.g. global symbols, same as on 32-bit
|
|
dstRva = executable::ReadFromImage<uint64_t>(image, imageSections, relocationRva) - imageBase;
|
|
}
|
|
break;
|
|
#endif
|
|
|
|
case coff::Relocation::Type::UNKNOWN:
|
|
default:
|
|
LC_ERROR_DEV("Unknown relocation type %s (%d)", coff::Relocation::Type::ToString(type), type);
|
|
break;
|
|
}
|
|
|
|
// the original relocation might have been applied to the symbol at a certain offset.
|
|
// subtract that offset (if any) to arrive at the symbol's original RVA.
|
|
dstRva -= relocation->dstOffset;
|
|
|
|
if (dstRva == 0u)
|
|
{
|
|
// this was reconstructed from a stripped COMDAT symbol that is referenced by an .obj where it
|
|
// wasn't stripped (e.g. an .obj contained in a .lib).
|
|
continue;
|
|
}
|
|
|
|
if (dstRva > imageSize)
|
|
{
|
|
// the RVA underflowed somewhere (the unsigned int would then surely be larger than 2 GB),
|
|
// or the RVA lies outside the module.
|
|
LC_ERROR_DEV("Detected wrong RVA 0x%X: Relocation %s (%d) from %s to %s in file %s",
|
|
dstRva,
|
|
coff::Relocation::Type::ToString(type), type,
|
|
srcSymbolName.c_str(), dstSymbolName.c_str(), objPath.c_str());
|
|
LC_ERROR_DEV("Source symbol at 0x%X", srcSymbol->rva);
|
|
LC_ERROR_DEV("Relocation srcRva: 0x%X, dstOffset: 0x%X", relocation->srcRva, relocation->dstOffset);
|
|
continue;
|
|
}
|
|
|
|
// at this point, the RVA itself must fit into 32-bit, even in 64-bit
|
|
uint32_t dstRva32 = static_cast<uint32_t>(dstRva);
|
|
|
|
// when incremental linking is enabled, the linker links function calls against "@ILT+offset" thunks rather
|
|
// than the real function address. we can follow these thunks and get the function's real RVA.
|
|
const uint32_t thunkTarget = symbols::FindThunkTargetByRVA(thunkDb, dstRva32);
|
|
if (thunkTarget != 0u)
|
|
{
|
|
// the real destination RVA is at the thunk's target
|
|
dstRva32 = thunkTarget;
|
|
}
|
|
|
|
// we found a new symbol, add it to the database
|
|
LC_LOG_DEV("Found new symbol %s at RVA 0x%X", dstSymbolName.c_str(), dstRva32);
|
|
symbols::CreateNewSymbol(dstSymbolName, dstRva32, symbolDB);
|
|
|
|
// walk the relocations of the new symbol as well
|
|
const coff::Symbol* nextSymbol = coff::GetSymbolByIndex(coffDb, relocation->dstSymbolNameIndex);
|
|
if (nextSymbol)
|
|
{
|
|
openSymbols.push_back(nextSymbol);
|
|
}
|
|
|
|
--unknownSymbolsToFind;
|
|
|
|
// did we already find all symbols?
|
|
if (unknownSymbolsToFind == 0u)
|
|
{
|
|
LC_LOG_DEV("All symbols known, exiting");
|
|
return;
|
|
}
|
|
}
|
|
|
|
walkedAlready.insert(symbol);
|
|
}
|
|
|
|
// there are no more symbols to walk, but we haven't found all of them yet.
|
|
|
|
// we can try finding the remaining symbols by matching their sections to sections in the PE image.
|
|
// sections with the same name across several .obj files get merged into one section in the image, which makes it
|
|
// a bit harder to find the address of an .obj's section in the image.
|
|
// in order to do this, we find the section in question inside the image, and gather all different contributions
|
|
// to this section. for each contribution, we then check whether its size matches the one in the .obj, and whether it
|
|
// originated from the .obj in question.
|
|
// if both match, we can finally check the symbol's names to ensure that we found the correct contribution.
|
|
// from there, we can calculate the symbol's section-relative offset and reconstruct its RVA.
|
|
|
|
// start by gathering all static functions and symbols which haven't been found already
|
|
LC_LOG_DEV("Reconstructing symbol RVAs from executable contributions");
|
|
LC_LOG_INDENT_DEV;
|
|
|
|
// fetch all contributions for the .obj we're trying to reconstruct
|
|
const symbols::ContributionDB::ContributionsPerCompiland* contributionsForThisCompiland = symbols::GetContributionsForCompilandName(contributionDb, objPath);
|
|
if (!contributionsForThisCompiland)
|
|
{
|
|
LC_ERROR_DEV("Cannot find contributions for compiland %s", objPath.c_str());
|
|
return;
|
|
}
|
|
|
|
types::vector<const coff::Symbol*> missingSymbols;
|
|
missingSymbols.reserve(unknownSymbolsToFind);
|
|
{
|
|
const size_t count = coffDb->symbols.size();
|
|
for (size_t i = 0u; i < count; ++i)
|
|
{
|
|
const coff::Symbol* symbol = coffDb->symbols[i];
|
|
|
|
// if we are in our second pass (or later), check whether we tried reconstructing this symbol already
|
|
if (pass > 0u)
|
|
{
|
|
const auto it = triedReconstructingAlready.find(symbol);
|
|
if (it != triedReconstructingAlready.end())
|
|
{
|
|
// tried already
|
|
continue;
|
|
}
|
|
}
|
|
triedReconstructingAlready.insert(symbol);
|
|
|
|
const ImmutableString& symbolName = coff::GetSymbolName(coffDb, symbol);
|
|
if (strippedSymbols.find(symbolName) != strippedSymbols.end())
|
|
{
|
|
// the missing symbol is one we stripped
|
|
continue;
|
|
}
|
|
|
|
// only static symbols can be missing, all others need to be known already
|
|
if ((symbol->type == coff::SymbolType::STATIC_FUNCTION) ||
|
|
(symbol->type == coff::SymbolType::STATIC_DATA))
|
|
{
|
|
const symbols::Symbol* srcSymbol = symbols::FindSymbolByName(symbolDB, symbolName);
|
|
if (srcSymbol)
|
|
{
|
|
// found already, nothing more to do
|
|
LC_LOG_DEV("Ignoring known symbol \"%s\"", symbolName.c_str());
|
|
continue;
|
|
}
|
|
else if (symbols::IsRuntimeCheckRelatedSymbol(symbolName))
|
|
{
|
|
// code for runtime checks is always compiled into an .obj and doesn't need to be patched, and therefore
|
|
// there's no need to find all the symbols
|
|
LC_LOG_DEV("Ignoring runtime-check-related symbol \"%s\"", symbolName.c_str());
|
|
continue;
|
|
}
|
|
else if (symbols::IsControlFlowGuardRelatedSymbol(symbolName))
|
|
{
|
|
// control flow guard stores function identifiers in separate symbols in .gfids$y section, which is not
|
|
// an explicit section in the executable, and therefore cannot be found.
|
|
// this is of no interest to us anyway, because we disable CFG.
|
|
LC_LOG_DEV("Ignoring control flow guard-related symbol \"%s\"", symbolName.c_str());
|
|
continue;
|
|
}
|
|
else if (symbols::IsExceptionRelatedSymbol(symbolName))
|
|
{
|
|
// even though exception-related symbols such as unwind tables and handlers are never patched or relocated
|
|
// by us, catch clauses will refer to function and data symbols, and some of them could be stripped by us.
|
|
// we therefore need to reconstruct these symbols as well.
|
|
// we could also try reconstructing all exception-related symbols, but that has a serious impact on
|
|
// performance!
|
|
if (!symbols::IsExceptionClauseSymbol(symbolName))
|
|
{
|
|
// no exception clause, hence we're really not interested
|
|
continue;
|
|
}
|
|
}
|
|
|
|
missingSymbols.push_back(symbol);
|
|
}
|
|
else
|
|
{
|
|
// externally visible COMDAT symbols might not be known at this point, but will be found in one of
|
|
// the OBJ files eventually. this is not an error.
|
|
// ??$__vcrt_va_start_verify_argument_type@QBD@@YAXXZ is probably the most prominent example of where
|
|
// this happens all the time.
|
|
}
|
|
}
|
|
}
|
|
|
|
// next try finding the missing symbols.
|
|
// NOTE: this is carefully constructed to only run into O(N^2) in rare edge cases, because the original O(N^2) algorithm
|
|
// caused a 25-30s slowdown for some users.
|
|
const size_t missingSymbolCount = missingSymbols.size();
|
|
|
|
// TODO: we use uint64_t to store the RVA and whether the missing symbol is an exception clause.
|
|
// once we have our own PDB loading in place, we don't need this anymore and can use a set of uint32_t.
|
|
types::unordered_set<uint64_t> potentialContributionRVAsAcrossAllMissingSymbols;
|
|
potentialContributionRVAsAcrossAllMissingSymbols.reserve(contributionsForThisCompiland->size());
|
|
|
|
for (size_t i = 0u; i < missingSymbolCount; ++i)
|
|
{
|
|
const coff::Symbol* symbol = missingSymbols[i];
|
|
|
|
const ImmutableString& missingSymbolName = coff::GetSymbolName(coffDb, symbol);
|
|
const uint64_t isExceptionClauseSymbol = symbols::IsExceptionClauseSymbol(missingSymbolName) ? (1ull << 32ull) : 0ull;
|
|
|
|
const coff::Section& coffSection = coffDb->sections[symbol->sectionIndex];
|
|
if (coff::IsMSVCJustMyCodeSection(coffSection.name.c_str()))
|
|
{
|
|
LC_LOG_DEV("Ignoring JustMyCode symbol %s in section %s", missingSymbolName.c_str(), coffSection.name.c_str());
|
|
continue;
|
|
}
|
|
|
|
LC_LOG_DEV("Trying to find RVA for static symbol %s in section %s", missingSymbolName.c_str(), coffSection.name.c_str());
|
|
LC_LOG_INDENT_DEV;
|
|
|
|
// the address of the symbol relative to the COFF section it's defined in, e.g.:
|
|
// .bss at COFF RVA 1000
|
|
// symbol0 at COFF RVA 1000, at section relative addr. 0
|
|
// symbol1 at COFF RVA 1004, at section relative addr. 4
|
|
// symbol2 at COFF RVA 1008, at section relative addr. 8
|
|
const uint32_t sectionRelativeAddress = symbol->rva - coffSection.rawDataRva;
|
|
|
|
// find this section in the image
|
|
const symbols::ImageSection* imageSection = symbols::FindImageSectionByName(imageSectionDb, coffSection.name);
|
|
if (!imageSection)
|
|
{
|
|
LC_ERROR_DEV("Cannot find image section %s", coffSection.name.c_str());
|
|
continue;
|
|
}
|
|
|
|
const uint32_t startOfImageSection = imageSection->rva;
|
|
const uint32_t endOfImageSection = startOfImageSection + imageSection->size;
|
|
|
|
// walk all contributions that are part of the image section and discard the ones that cannot match the symbol in question
|
|
auto contributionIt = std::lower_bound(contributionsForThisCompiland->begin(), contributionsForThisCompiland->end(), startOfImageSection, &HasLowerRVA);
|
|
while (contributionIt != contributionsForThisCompiland->end())
|
|
{
|
|
const symbols::Contribution* contribution = *contributionIt;
|
|
++contributionIt;
|
|
|
|
if (contribution->rva >= endOfImageSection)
|
|
{
|
|
// no more contributions that belong to this section
|
|
break;
|
|
}
|
|
|
|
if (contribution->size != coffSection.rawDataSize)
|
|
{
|
|
// section size does not match
|
|
continue;
|
|
}
|
|
else if (sectionRelativeAddress >= contribution->size)
|
|
{
|
|
// the symbol cannot be part of this contributing section because it is not large enough
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
// this is a potential contribution, store it for now
|
|
const uint32_t rva = contribution->rva + sectionRelativeAddress;
|
|
potentialContributionRVAsAcrossAllMissingSymbols.insert(isExceptionClauseSymbol | rva);
|
|
}
|
|
}
|
|
}
|
|
|
|
// populate a cache of all DIA names for all potential contributions once
|
|
// BEGIN EPIC MOD
|
|
struct DiaRvaData
|
|
{
|
|
uint32_t rva;
|
|
bool valid;
|
|
};
|
|
types::StringMap<DiaRvaData> diaNameToRva;
|
|
// END EPIC MOD
|
|
diaNameToRva.reserve(potentialContributionRVAsAcrossAllMissingSymbols.size());
|
|
|
|
types::unordered_map<uint32_t, IDiaSymbol*> rvaToDiaSymbol;
|
|
rvaToDiaSymbol.reserve(potentialContributionRVAsAcrossAllMissingSymbols.size());
|
|
|
|
for (auto potentialContributionsIt : potentialContributionRVAsAcrossAllMissingSymbols)
|
|
{
|
|
const uint64_t setData = potentialContributionsIt;
|
|
const uint32_t rva = setData & 0x00000000FFFFFFFFull;
|
|
const bool isExceptionClauseSymbol = (setData & 0xFFFFFFFF00000000ull) != 0ull;
|
|
|
|
// TODO: no longer needs to be special-cased once our own loading of PDB files is in place
|
|
// exception clauses are labels stored as children of functions, so they need to be special-cased
|
|
IDiaSymbol* diaSymbol = isExceptionClauseSymbol
|
|
? dia::FindLabelByRva(provider->diaSession, rva)
|
|
: dia::FindSymbolByRVA(provider->diaSession, rva);
|
|
|
|
if (diaSymbol)
|
|
{
|
|
const std::wstring& diaName = dia::GetSymbolName(diaSymbol).GetString();
|
|
const ImmutableString& name = string::ToUtf8String(diaName);
|
|
|
|
// BEGIN EPIC MOD
|
|
auto ixb = diaNameToRva.insert(std::make_pair(name, DiaRvaData{ rva, true }));
|
|
if (!ixb.second && ixb.first->second.rva != rva && ixb.first->second.valid)
|
|
{
|
|
LC_LOG_DEV("Ignoring Dia symbol %s for fast path because multiple RVAs claim the symbol", name.c_str());
|
|
ixb.first->second.valid = false;
|
|
}
|
|
// END EPIC MOD
|
|
rvaToDiaSymbol.insert(std::make_pair(rva, diaSymbol));
|
|
}
|
|
}
|
|
|
|
// perform the actual lookup using the cache we just built
|
|
for (size_t i = 0u; i < missingSymbolCount; ++i)
|
|
{
|
|
const coff::Symbol* symbol = missingSymbols[i];
|
|
|
|
const ImmutableString& missingSymbolName = coff::GetSymbolName(coffDb, symbol);
|
|
const coff::Section& coffSection = coffDb->sections[symbol->sectionIndex];
|
|
if (coff::IsMSVCJustMyCodeSection(coffSection.name.c_str()))
|
|
{
|
|
LC_LOG_DEV("Ignoring JustMyCode symbol %s in section %s", missingSymbolName.c_str(), coffSection.name.c_str());
|
|
continue;
|
|
}
|
|
|
|
const std::string& coffUndecoratedName = symbols::UndecorateSymbolName(missingSymbolName);
|
|
|
|
auto diaNameIt = diaNameToRva.find(ImmutableString(coffUndecoratedName.c_str()));
|
|
// BEGIN EPIC MOD
|
|
if (diaNameIt != diaNameToRva.end() && diaNameIt->second.valid)
|
|
// END EPIC MOD
|
|
{
|
|
// fast path.
|
|
// there is a symbol that matches the exact name of the symbol in the .obj file
|
|
// BEGIN EPIC MOD
|
|
const uint32_t rva = diaNameIt->second.rva;
|
|
// END EPIC MOD
|
|
|
|
LC_LOG_DEV("Fast path, found symbol %s at 0x%X", missingSymbolName.c_str(), rva);
|
|
|
|
symbols::CreateNewSymbol(missingSymbolName, rva, symbolDB);
|
|
|
|
openSymbols.push_back(symbol);
|
|
|
|
--unknownSymbolsToFind;
|
|
|
|
// did we already find all symbols?
|
|
if (unknownSymbolsToFind == 0u)
|
|
{
|
|
LC_LOG_DEV("All symbols known, exiting");
|
|
return;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// slow path.
|
|
// unfortunately, there is no exact match, but there might be several symbols/contributions with
|
|
// a name that partially matches that of the symbol in the .obj file.
|
|
// in that case, we check all contributions for this symbol, check whether its name is contained in that of
|
|
// the .obj file, and check all its parents and their names as well.
|
|
// if we find a symbol that matches all of the above, we have a worthy candidate. we can only accept this
|
|
// symbol if it's the *only* candidate though. in case of several ambiguous contributions, we'd rather not
|
|
// make a wrong guess.
|
|
const std::wstring& wideCoffUndecoratedName = string::ToWideString(coffUndecoratedName);
|
|
|
|
types::unordered_set<const symbols::Contribution*> potentialContributions;
|
|
potentialContributions.reserve(contributionsForThisCompiland->size());
|
|
|
|
const uint32_t sectionRelativeAddress = symbol->rva - coffSection.rawDataRva;
|
|
|
|
// find this section in the image
|
|
const symbols::ImageSection* imageSection = symbols::FindImageSectionByName(imageSectionDb, coffSection.name);
|
|
if (!imageSection)
|
|
{
|
|
// BEGIN EPIC MOD
|
|
if (diaNameIt != diaNameToRva.end())
|
|
{
|
|
// fast path.
|
|
// there is a symbol that matches the exact name of the symbol in the .obj file
|
|
const uint32_t rva = diaNameIt->second.rva;
|
|
|
|
LC_LOG_DEV("Fast path, found symbol %s at 0x%X", missingSymbolName.c_str(), rva);
|
|
|
|
symbols::CreateNewSymbol(missingSymbolName, rva, symbolDB);
|
|
|
|
openSymbols.push_back(symbol);
|
|
|
|
--unknownSymbolsToFind;
|
|
|
|
// did we already find all symbols?
|
|
if (unknownSymbolsToFind == 0u)
|
|
{
|
|
LC_LOG_DEV("All symbols known, exiting");
|
|
return;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
LC_ERROR_DEV("Cannot find image section %s", coffSection.name.c_str());
|
|
}
|
|
// END EPIC MOD
|
|
continue;
|
|
}
|
|
|
|
const uint32_t startOfImageSection = imageSection->rva;
|
|
const uint32_t endOfImageSection = startOfImageSection + imageSection->size;
|
|
|
|
// walk all contributions that are part of the image section and discard the ones that cannot match the symbol in question
|
|
auto contributionIt = std::lower_bound(contributionsForThisCompiland->begin(), contributionsForThisCompiland->end(), startOfImageSection, &HasLowerRVA);
|
|
while (contributionIt != contributionsForThisCompiland->end())
|
|
{
|
|
const symbols::Contribution* contribution = *contributionIt;
|
|
++contributionIt;
|
|
|
|
if (contribution->rva >= endOfImageSection)
|
|
{
|
|
// no more contributions that belong to this section
|
|
break;
|
|
}
|
|
|
|
if (contribution->size != coffSection.rawDataSize)
|
|
{
|
|
// section size does not match
|
|
continue;
|
|
}
|
|
else if (sectionRelativeAddress >= contribution->size)
|
|
{
|
|
// the symbol cannot be part of this contributing section because it is not large enough
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
// this is a potential contribution, store it for now
|
|
potentialContributions.emplace(contribution);
|
|
}
|
|
}
|
|
|
|
types::unordered_set<uint32_t> worthyCandidates;
|
|
worthyCandidates.reserve(potentialContributions.size());
|
|
|
|
for (auto it = potentialContributions.begin(); it != potentialContributions.end(); ++it)
|
|
{
|
|
const symbols::Contribution* contribution = *it;
|
|
const uint32_t rva = contribution->rva + sectionRelativeAddress;
|
|
|
|
// get the symbol name at the potential RVA from the DIA cache
|
|
{
|
|
auto cacheIt = rvaToDiaSymbol.find(rva);
|
|
if (cacheIt != rvaToDiaSymbol.end())
|
|
{
|
|
IDiaSymbol* diaSymbol = cacheIt->second;
|
|
const std::wstring& diaName = dia::GetSymbolName(diaSymbol).GetString();
|
|
|
|
if (string::Contains(wideCoffUndecoratedName.c_str(), diaName.c_str()))
|
|
{
|
|
// the name partially matches, now check all its parents
|
|
bool doAllParentsMatch = true;
|
|
IDiaSymbol* parent = dia::GetParent(diaSymbol);
|
|
while (parent)
|
|
{
|
|
// we are only interested in parents which are functions
|
|
if (!dia::IsFunction(parent))
|
|
{
|
|
break;
|
|
}
|
|
|
|
const std::wstring& parentName = dia::GetSymbolName(parent).GetString();
|
|
if (string::Contains(wideCoffUndecoratedName.c_str(), parentName.c_str()))
|
|
{
|
|
parent = dia::GetParent(parent);
|
|
}
|
|
else
|
|
{
|
|
doAllParentsMatch = false;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (doAllParentsMatch)
|
|
{
|
|
worthyCandidates.emplace(rva);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (worthyCandidates.size() == 1u)
|
|
{
|
|
// there was only one worthy candidate
|
|
const uint32_t rva = *worthyCandidates.begin();
|
|
|
|
LC_LOG_DEV("Slow path, found symbol %s at 0x%X", missingSymbolName.c_str(), rva);
|
|
|
|
CreateNewSymbol(missingSymbolName, rva, symbolDB);
|
|
|
|
openSymbols.push_back(symbol);
|
|
|
|
--unknownSymbolsToFind;
|
|
|
|
// did we already find all symbols?
|
|
if (unknownSymbolsToFind == 0u)
|
|
{
|
|
LC_LOG_DEV("All symbols known, exiting");
|
|
return;
|
|
}
|
|
}
|
|
else if (worthyCandidates.size() == 0u)
|
|
{
|
|
// if we had potential candidates but could not find a symbol, there is still a possibility that the
|
|
// symbol has been stripped by the linker due to the /Gw option that puts data symbols into separate
|
|
// sections. this happens in ComplexClassGlobal.cpp in our test cases as well.
|
|
LC_WARNING_DEV("Could not find symbol %s in compiland %s, possibly stripped by linker",
|
|
coff::GetSymbolName(coffDb, symbol).c_str(),
|
|
objPath.c_str());
|
|
}
|
|
else
|
|
{
|
|
LC_ERROR_DEV("Contributions for symbol %s are ambiguous", missingSymbolName.c_str());
|
|
}
|
|
}
|
|
}
|
|
|
|
if (openSymbols.size() != 0u)
|
|
{
|
|
// we found new symbols to walk, so do another pass
|
|
LC_LOG_DEV("Doing another pass");
|
|
++pass;
|
|
goto walkOpenSymbols;
|
|
}
|
|
}
|
|
|
|
|
|
#endif |