Files
UnrealEngine/Engine/Source/Programs/UnrealBuildAccelerator/Common/Private/UbaThread.cpp
2025-05-18 13:04:45 +08:00

500 lines
13 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
#include "UbaThread.h"
#include "UbaPlatform.h"
#if PLATFORM_WINDOWS
#include <tlhelp32.h>
#elif PLATFORM_LINUX
#include <dirent.h>
#include <execinfo.h>
#include <sys/user.h>
#include <sys/uio.h>
#define REG_SP regs.rsp
#else
#include <mach/mach.h>
#include <mach/thread_act.h>
#endif
#define UBA_TRACK_THREADS 0
namespace uba
{
bool AlternateThreadGroupAffinity(void* nativeThreadHandle)
{
#if PLATFORM_WINDOWS
int processorGroupCount = GetProcessorGroupCount();
if (processorGroupCount <= 1)
return true;
static Atomic<int> processorGroupCounter;
u16 processorGroup = u16((processorGroupCounter++) % processorGroupCount);
u32 groupProcessorCount = ::GetActiveProcessorCount(processorGroup);
GROUP_AFFINITY groupAffinity = {};
groupAffinity.Mask = ~0ull >> (int)(64 - groupProcessorCount);
groupAffinity.Group = processorGroup;
return ::SetThreadGroupAffinity(nativeThreadHandle, &groupAffinity, NULL);
#else
return true;
#endif
}
bool SetThreadGroupAffinity(void* nativeThreadHandle, const GroupAffinity& affinity)
{
#if PLATFORM_WINDOWS
if (GetProcessorGroupCount() <= 1)
return true;
GROUP_AFFINITY groupAffinity = {};
groupAffinity.Mask = affinity.mask;
groupAffinity.Group = affinity.group;
return ::SetThreadGroupAffinity(nativeThreadHandle, &groupAffinity, NULL);
#else
return false;
#endif
}
Futex g_allThreadsLock;
Thread* g_firstThread;
Thread::Thread()
{
}
Thread::Thread(Function<u32()>&& func, const tchar* description)
{
Start(std::move(func), description);
}
Thread::~Thread()
{
Wait();
}
void Thread::Start(Function<u32()>&& f, const tchar* description)
{
m_func = std::move(f);
#if PLATFORM_WINDOWS
m_handle = CreateThread(NULL, 0, [](LPVOID p) -> DWORD { return ((Thread*)p)->m_func(); }, this, 0, NULL);
UBA_ASSERT(m_handle);
if (!m_handle)
return;
if (description)
SetThreadDescription(m_handle, description);
AlternateThreadGroupAffinity(m_handle);
#else
int err = 0;
m_finished.Create(true);
static_assert(sizeof(pthread_t) <= sizeof(m_handle), "");
auto& pth = *(pthread_t*)&m_handle;
pthread_attr_t tattr;
// initialized with default attributes
err = pthread_attr_init(&tattr);
// TODO: Need to figure out a better value, or decrease stack usage
// without this though we get a bus error on Intel Macs
#if !defined(__arm__) && !defined(__arm64__)
size_t size = PTHREAD_STACK_MIN * 500;
err = pthread_attr_setstacksize(&tattr, size);
#endif
err = pthread_create(&pth, &tattr, [](void* p) -> void*
{
// Ignore sigint. Uba is designed for someone else to cancel it
#if !PLATFORM_WINDOWS
sigset_t set;
sigemptyset(&set);
sigaddset(&set, SIGINT);
pthread_sigmask(SIG_BLOCK, &set, NULL);
#endif
auto& t = *(Thread*)p;
int res = t.m_func();
t.m_finished.Set();
return (void*)(uintptr_t)res;
}, this);
UBA_ASSERT(err == 0); (void)err;
if (!description)
description = "UbaUnknown";
#if PLATFORM_MACOS
pthread_setname_np(description);
#else
pthread_setname_np(pth, description);
#endif
err = pthread_attr_destroy(&tattr);
UBA_ASSERT(err == 0); (void)err;
#endif
#if UBA_TRACK_THREADS
SCOPED_FUTEX(g_allThreadsLock, lock);
printf("Adding THREAD %llx\n", (u64)*(pthread_t*)&m_handle);
m_next = g_firstThread;
if (m_next)
m_next->m_prev = this;
g_firstThread = this;
#endif
}
bool Thread::Wait(u32 milliseconds, Event* wakeupEvent)
{
SCOPED_READ_LOCK(m_funcLock, readLock);
if (!m_handle)
return true;
auto removeThread = [this]()
{
#if UBA_TRACK_THREADS
SCOPED_FUTEX(g_allThreadsLock, lock);
printf("REMOVING THREAD %llx\n", (u64)*(pthread_t*)&m_handle);
if (m_next)
m_next->m_prev = m_prev;
if (m_prev)
m_prev->m_next = m_next;
else if (g_firstThread == this)
g_firstThread = m_next;
m_next = nullptr;
m_prev = nullptr;
#endif
};
#if PLATFORM_WINDOWS // Optimization, not needed in initial implementation
if (wakeupEvent)
{
HANDLE h[] = { m_handle, wakeupEvent->GetHandle() };
DWORD res = WaitForMultipleObjects(2, h, false, milliseconds);
if (res == WAIT_OBJECT_0 + 1 || res == WAIT_TIMEOUT)
return false;
}
else
{
if (WaitForSingleObject(m_handle, milliseconds) == WAIT_TIMEOUT)
return false;
}
removeThread();
#else
if (!m_finished.IsSet(milliseconds))
return false;
removeThread();
int* ptr = 0;
int res = pthread_join(*(pthread_t*)&m_handle, (void**)&ptr);
UBA_ASSERT(res == 0);
#endif
readLock.Leave();
SCOPED_WRITE_LOCK(m_funcLock, lock);
if (!m_handle)
return true;
#if PLATFORM_WINDOWS
CloseHandle(m_handle);
#endif
m_func = {};
m_handle = nullptr;
return true;
}
bool Thread::GetGroupAffinity(GroupAffinity& out)
{
#if PLATFORM_WINDOWS
if (GetProcessorGroupCount() <= 1)
return true;
GROUP_AFFINITY aff;
if (!::GetThreadGroupAffinity(m_handle, &aff))
return false;
out.mask = aff.Mask;
out.group = aff.Group;
return true;
#else
return false;
#endif
}
bool TraverseAllThreads(const TraverseThreadFunc& func, const TraverseThreadErrorFunc& errorFunc)
{
#if PLATFORM_WINDOWS
StringBuffer<256> error;
auto reportError = [&](HANDLE hThread, const tchar* call) { errorFunc(error.Clear().Appendf(TC("%s failed for thread %llu (%s)"), call, u64(hThread), LastErrorToText().data)); };
#if UBA_TRACK_THREADS
DWORD currentThreadId = GetCurrentThreadId();
SCOPED_FUTEX(g_allThreadsLock, lock);
for (Thread* t=g_firstThread; t; t=t->m_next)
{
HANDLE hThread = t->m_handle;
if (currentThreadId == GetThreadId(hThread))
continue;
if (SuspendThread(hThread) == -1)
{
reportError(hThread, TC("SuspendThread"));
continue;
}
auto rtg = MakeGuard([&]() { ResumeThread(hThread); });
CONTEXT ctx;
memset(&ctx, 0, sizeof(CONTEXT));
ctx.ContextFlags = CONTEXT_FULL;
if (!GetThreadContext(hThread, &ctx))
{
reportError(hThread, TC("GetThreadContext"));
continue;
}
func(0, &ctx);
}
#else
DWORD pid = GetCurrentProcessId();
DWORD tid = GetCurrentThreadId();
HANDLE hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPTHREAD, 0);
if (hSnapshot == INVALID_HANDLE_VALUE)
return false;
auto sg = MakeGuard([&]() { CloseHandle(hSnapshot); });
UnorderedSet<CasKey> handledCallstacks;
THREADENTRY32 te32 = { sizeof(THREADENTRY32) };
if (!Thread32First(hSnapshot, &te32))
return false;
do
{
if (te32.th32OwnerProcessID != pid || te32.th32ThreadID == tid || te32.th32ThreadID == 0)
continue;
HANDLE hThread = OpenThread(THREAD_GET_CONTEXT | THREAD_SET_CONTEXT | THREAD_SUSPEND_RESUME | THREAD_QUERY_INFORMATION, FALSE, te32.th32ThreadID);
if (!hThread)
{
reportError(hThread, TC("OpenThread"));
continue;
}
auto tg = MakeGuard([&]() { CloseHandle(hThread); });
if (SuspendThread(hThread) == -1)
{
reportError(hThread, TC("SuspendThread"));
continue;
}
auto rtg = MakeGuard([&]() { ResumeThread(hThread); });
PWSTR threadDesc = nullptr;
GetThreadDescription(hThread, &threadDesc);
auto tdg = MakeGuard([&]() { LocalFree(threadDesc); });
CONTEXT ctx = {};
ctx.ContextFlags = CONTEXT_ALL;
if (!GetThreadContext(hThread, &ctx))
{
reportError(hThread, TC("GetThreadContext"));
continue;
}
void* callstack[100];
u32 callstackCount = GetCallstack(callstack, 100, 1, &ctx);
func(te32.th32ThreadID, callstack, callstackCount, threadDesc);
}
while (Thread32Next(hSnapshot, &te32));
#endif
return true;
#elif PLATFORM_LINUX
// TODO: None of these approaches work.
// signal path will break if thread is in certain system calls.. and ptrace does not work either :-/
static Event s_ev(false);
static const TraverseThreadFunc* s_func;
s_func = &func;
struct sigaction sa;
memset(&sa, 0, sizeof(sa));
sa.sa_flags = SA_SIGINFO;
sa.sa_sigaction = [](int sig, siginfo_t* info, void* context)
{
void* callstack[100];
u32 callstackCount = GetCallstack(callstack, 100, 1, context);
(*s_func)(syscall(SYS_gettid), callstack, callstackCount, nullptr);
s_ev.Set();
};
sigaction(SIGUSR1, &sa, NULL);
DIR* dir = opendir("/proc/self/task");
if (!dir)
return false;
pid_t currentTid = syscall(SYS_gettid);
struct dirent *entry;
while ((entry = readdir(dir)) != NULL)
{
pid_t tid = (pid_t)atoi(entry->d_name);
if (tid <= 0 || tid == currentTid)
continue;
kill(tid, SIGUSR1);
s_ev.IsSet();
}
#if 0
pid_t parentPid = getpid();
char message[100] = "Original message in parent.";
// We need to fork to be able to use ptrace on threads
pid_t child = fork();
if (child < 0)
{
perror("fork");
return false;
}
if (child == 0) // The child
{
StringBuffer<128>().Appendf("/proc/%u/task", parentPid);
auto g = MakeGuard([] { exit(EXIT_SUCCESS); });
DIR* dir = opendir(StringBuffer<128>().Appendf("/proc/%u/task", parentPid).data);
if (!dir)
return false;
struct dirent *entry;
while ((entry = readdir(dir)) != NULL)
{
pid_t tid = (pid_t)atoi(entry->d_name);
if (tid <= 0)
continue;
if (ptrace(PTRACE_ATTACH, tid, NULL, NULL) == -1)
{
perror("ptrace attach");
continue;
}
int status;
waitpid(tid, &status, 0); // Wait until the thread stops.
struct user_regs_struct regs;
if (ptrace(PTRACE_GETREGS, tid, NULL, &regs) == -1)
{
perror("ptrace getregs");
return false;
}
printf("Callstack for thread %d:\n", tid);
printf("RIP: %llx\n", regs.rip);
// Start from the current base pointer.
unsigned long long fp = regs.rbp;
int frame = 0;
#define MAX_FRAMES 64
while (fp && frame < MAX_FRAMES) {
// Read the saved frame pointer (first word) and return address (second word)
unsigned long long next_fp, ret_addr;
errno = 0;
next_fp = ptrace(PTRACE_PEEKDATA, tid, (void *)fp, NULL);
if (errno != 0) break;
ret_addr = ptrace(PTRACE_PEEKDATA, tid, (void *)(fp + sizeof(unsigned long long)), NULL);
if (errno != 0) break;
printf(" Frame %d: ret_addr = %llx (fp = %llx)\n", frame, ret_addr, fp);
fp = next_fp;
frame++;
}
printf("\n");
if (ptrace(PTRACE_DETACH, tid, NULL, NULL) == -1) {
perror("ptrace detach");
return false;
}
const char* new_msg = "Hello from child!";
struct iovec local[1];
local[0].iov_base = (void *)new_msg;
local[0].iov_len = strlen(new_msg) + 1; // Include the null terminator
// Set up the remote iovec.
// The parent's view of the address is the same as the child's copy after fork.
struct iovec remote[1];
remote[0].iov_base = message;
remote[0].iov_len = strlen(new_msg) + 1;
// Use process_vm_writev to write the new message into the parent's memory.
ssize_t nwritten = process_vm_writev(getppid(), local, 1, remote, 1, 0);
}
closedir(dir);
}
else // The parent
{
while (message[0] == 'O')
{
Sleep(500);
}
}
#endif
#else
task_t task;
kern_return_t kr = task_for_pid(mach_task_self(), getpid(), &task);
if (kr != KERN_SUCCESS)
return false;
thread_act_array_t threads;
mach_msg_type_number_t thread_count;
kr = task_threads(task, &threads, &thread_count);
if (kr != KERN_SUCCESS)
return false;
auto tsg = MakeGuard([&] { vm_deallocate(mach_task_self(), (vm_address_t)threads, thread_count * sizeof(thread_t)); });
for (int i = 0; i < thread_count; i++)
{
if (threads[i] == mach_thread_self())
continue;
auto thread = threads[i];
thread_suspend(thread);
auto g = MakeGuard([&] { thread_resume(thread); });
kern_return_t kr;
uint64_t pc = 0, fp = 0;
#if defined(__x86_64__)
x86_thread_state64_t state;
mach_msg_type_number_t count = x86_THREAD_STATE64_COUNT;
kr = thread_get_state(thread, x86_THREAD_STATE64, (thread_state_t)&state, &count);
if (kr != KERN_SUCCESS)
return false;
pc = state.__rip;
fp = state.__rbp;
#elif defined(__arm64__)
arm_thread_state64_t state;
mach_msg_type_number_t count = ARM_THREAD_STATE64_COUNT;
kr = thread_get_state(thread, ARM_THREAD_STATE64, (thread_state_t)&state, &count);
if (kr != KERN_SUCCESS)
return false;
pc = state.__pc;
fp = state.__fp;
#else
#error "Unsupported architecture"
#endif
void* callstack[100];
u32 callstackCount = 0;
for (int i = 0; i < 32 && fp; i++) {
uint64_t *stack = (uint64_t *)fp;
uint64_t return_addr = stack[1];
fp = stack[0];
if (!fp)
break;
if (i > 0)
callstack[callstackCount++] = (void*)return_addr;
}
g.Execute();
func(thread, callstack, callstackCount, nullptr);
}
#endif
return true;
}
}