Skip to content

Commit d0c9b93

Browse files
committed
Update crash handler capabilities and, making use of that, add temp alloc instrumentation to OOM crash hotspots (To help identify leaks/suboptimal procedures)
1 parent 6fec5d5 commit d0c9b93

File tree

11 files changed

+996
-14
lines changed

11 files changed

+996
-14
lines changed

Client/core/CCrashDumpWriter.cpp

Lines changed: 374 additions & 2 deletions
Large diffs are not rendered by default.

Client/core/CrashHandler.cpp

Lines changed: 127 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,15 @@
1919
#include <SharedUtil.Detours.h>
2020
#include <SharedUtil.Misc.h>
2121
#include <SharedUtil.Memory.h>
22+
#include "../Shared/sdk/CrashTelemetry.h"
2223

2324
#include <algorithm>
2425
#include <array>
2526
#include <atomic>
2627
#include <chrono>
2728
#include <cctype>
2829
#include <cstdint>
30+
#include <cstring>
2931
#include <exception>
3032
#include <errno.h>
3133
#include <intrin.h>
@@ -40,6 +42,9 @@
4042
#include <string_view>
4143
#include <variant>
4244
#include <vector>
45+
#if defined(_MSC_VER)
46+
#include <new.h>
47+
#endif
4348

4449
#if defined(_MSC_VER)
4550
#include <corecrt.h>
@@ -90,6 +95,16 @@ inline std::atomic<DWORD> g_initializationPhase{INIT_PHASE_MINIMAL};
9095

9196
using CrashHandlerResult = std::variant<std::monostate, std::string, DWORD, std::exception_ptr>;
9297

98+
#if defined(__cplusplus)
99+
[[noreturn]] void __cdecl CppNewHandler() noexcept;
100+
#endif
101+
102+
#if defined(_MSC_VER)
103+
static int __cdecl CppNewHandlerBridge(size_t size) noexcept;
104+
#else
105+
static void CppNewHandlerBridge() noexcept;
106+
#endif
107+
93108
[[nodiscard]] BOOL BUGSUTIL_DLLINTERFACE __stdcall IsFatalException(DWORD exceptionCode) noexcept
94109
{
95110
switch (exceptionCode)
@@ -175,6 +190,11 @@ static void StoreBasicExceptionInfo(_EXCEPTION_POINTERS* pException) noexcept
175190
info.timestamp = std::chrono::system_clock::now();
176191
info.threadId = GetCurrentThreadId();
177192
info.processId = GetCurrentProcessId();
193+
const auto telemetryNote = CrashTelemetry::BuildAllocationTelemetryNote();
194+
if (!telemetryNote.empty())
195+
{
196+
info.additionalInfo = telemetryNote;
197+
}
178198
g_lastExceptionInfo = info;
179199
}
180200
catch (...)
@@ -491,6 +511,12 @@ static void LogEnhancedExceptionInfo(_EXCEPTION_POINTERS* pException) noexcept
491511
case EXCEPTION_GUARD_PAGE:
492512
info.exceptionType = "SEH:GuardPage";
493513
break;
514+
case CUSTOM_EXCEPTION_CODE_OOM:
515+
info.exceptionType = "User:OutOfMemory";
516+
break;
517+
case CUSTOM_EXCEPTION_CODE_WATCHDOG_TIMEOUT:
518+
info.exceptionType = "User:WatchdogTimeout";
519+
break;
494520
default:
495521
if (info.exceptionCode >= 0xC0000000 && info.exceptionCode <= 0xCFFFFFFF)
496522
{
@@ -509,6 +535,15 @@ static void LogEnhancedExceptionInfo(_EXCEPTION_POINTERS* pException) noexcept
509535

510536
info.exceptionDescription = GetExceptionCodeDescription(info.exceptionCode);
511537

538+
if (auto telemetryNote = CrashTelemetry::BuildAllocationTelemetryNote(); !telemetryNote.empty())
539+
{
540+
if (!info.additionalInfo.empty())
541+
{
542+
info.additionalInfo.push_back('\n');
543+
}
544+
info.additionalInfo += telemetryNote;
545+
}
546+
512547
g_lastExceptionInfo = info;
513548
}
514549
catch (...)
@@ -518,6 +553,15 @@ static void LogEnhancedExceptionInfo(_EXCEPTION_POINTERS* pException) noexcept
518553
}
519554
}
520555

556+
static void CaptureAllocationTelemetry(_EXCEPTION_POINTERS* pException) noexcept
557+
{
558+
if (pException == nullptr || pException->ExceptionRecord == nullptr)
559+
return;
560+
561+
StoreBasicExceptionInfo(pException);
562+
LogEnhancedExceptionInfo(pException);
563+
}
564+
521565
static std::variant<DWORD, std::string> HandleExceptionModern(_EXCEPTION_POINTERS* pException) noexcept
522566
{
523567
if (pException == nullptr || pException->ExceptionRecord == nullptr)
@@ -621,7 +665,12 @@ static std::mutex g_handlerStateMutex;
621665
static std::atomic<PFNCHFILTFN> g_pfnCrashCallback{nullptr};
622666
static std::atomic<LPTOP_LEVEL_EXCEPTION_FILTER> g_pfnOrigFilt{nullptr};
623667
static std::atomic<std::terminate_handler> g_pfnOrigTerminate{nullptr};
668+
#if defined(_MSC_VER)
669+
using CrtNewHandler = int(__cdecl*)(size_t);
670+
static std::atomic<CrtNewHandler> g_pfnOrigNewHandler{nullptr};
671+
#else
624672
static std::atomic<std::new_handler> g_pfnOrigNewHandler{nullptr};
673+
#endif
625674
static std::atomic<decltype(&SetUnhandledExceptionFilter)> g_pfnKernelSetUnhandledExceptionFilter{nullptr};
626675
static decltype(&SetUnhandledExceptionFilter) g_kernelSetUnhandledExceptionFilterTrampoline = nullptr;
627676

@@ -633,10 +682,45 @@ static std::atomic<bool> g_bInPureCallHandler{false};
633682
static std::atomic<bool> g_bInTerminateHandler{false};
634683
static std::atomic<bool> g_bInNewHandler{false};
635684

685+
#if defined(_MSC_VER)
686+
static int __cdecl CppNewHandlerBridge(size_t size) noexcept
687+
{
688+
const auto telemetry = CrashTelemetry::CaptureContext();
689+
if ((!telemetry.hasData || telemetry.requestedSize == 0) && size > 0)
690+
{
691+
CrashTelemetry::SetAllocationContext(size, nullptr, "operator new", "std::new_handler");
692+
}
693+
694+
if (auto previous = g_pfnOrigNewHandler.load(std::memory_order_acquire))
695+
{
696+
return previous(size);
697+
}
698+
699+
CppNewHandler();
700+
return 0;
701+
}
702+
#else
703+
static void CppNewHandlerBridge() noexcept
704+
{
705+
const auto telemetry = CrashTelemetry::CaptureContext();
706+
if (!telemetry.hasData)
707+
{
708+
CrashTelemetry::SetAllocationContext(0, nullptr, "operator new", "std::new_handler");
709+
}
710+
711+
if (auto previous = g_pfnOrigNewHandler.load(std::memory_order_acquire))
712+
{
713+
previous();
714+
return;
715+
}
716+
717+
CppNewHandler();
718+
}
719+
#endif
720+
636721
LONG __stdcall CrashHandlerExceptionFilter(EXCEPTION_POINTERS* pExPtrs);
637722

638723
[[noreturn]] void __cdecl CppTerminateHandler() noexcept;
639-
[[noreturn]] void __cdecl CppNewHandler() noexcept;
640724
void __cdecl AbortSignalHandler(int signal) noexcept;
641725
[[noreturn]] void __cdecl PureCallHandler() noexcept;
642726

@@ -758,6 +842,8 @@ void __cdecl AbortSignalHandler([[maybe_unused]] int signal) noexcept
758842
exPtrs.ExceptionRecord = &exRecord;
759843
exPtrs.ContextRecord = &ctx;
760844

845+
CaptureAllocationTelemetry(&exPtrs);
846+
761847
PFNCHFILTFN callback = g_pfnCrashCallback.load(std::memory_order_acquire);
762848

763849
if (callback != nullptr)
@@ -796,13 +882,19 @@ void __cdecl AbortSignalHandler([[maybe_unused]] int signal) noexcept
796882
LogHandlerEvent(DEBUG_PREFIX_PURECALL.data(), "Pure virtual function call detected");
797883
SafeDebugOutput(DEBUG_SEPARATOR);
798884

799-
EXCEPTION_RECORD* pExRecord{nullptr};
800-
CONTEXT* pCtx{nullptr};
885+
EXCEPTION_RECORD* pExRecord{nullptr};
886+
CONTEXT* pCtx{nullptr};
801887
EXCEPTION_POINTERS exPtrs{};
802888

889+
const bool haveContext = BuildExceptionContext(exPtrs, pExRecord, pCtx, EXCEPTION_NONCONTINUABLE_EXCEPTION);
890+
if (haveContext)
891+
{
892+
CaptureAllocationTelemetry(&exPtrs);
893+
}
894+
803895
PFNCHFILTFN callback = g_pfnCrashCallback.load(std::memory_order_acquire);
804896

805-
if (callback != nullptr && BuildExceptionContext(exPtrs, pExRecord, pCtx, EXCEPTION_NONCONTINUABLE_EXCEPTION))
897+
if (callback != nullptr && haveContext)
806898
{
807899
LogHandlerEvent(DEBUG_PREFIX_PURECALL.data(), "Calling crash handler callback");
808900

@@ -850,7 +942,11 @@ class CleanUpCrashHandler
850942

851943
if (auto newHandler = g_pfnOrigNewHandler.exchange(nullptr, std::memory_order_seq_cst); newHandler != nullptr)
852944
{
945+
#if defined(_MSC_VER)
946+
_set_new_handler(newHandler);
947+
#else
853948
std::set_new_handler(newHandler);
949+
#endif
854950
}
855951

856952
if (auto abortHandler = g_pfnOrigAbortHandler.exchange(nullptr, std::memory_order_seq_cst); abortHandler != nullptr)
@@ -985,7 +1081,11 @@ static void InstallCppHandlers() noexcept
9851081

9861082
if (g_pfnOrigNewHandler.load(std::memory_order_acquire) == nullptr)
9871083
{
988-
std::new_handler previous = std::set_new_handler(CppNewHandler);
1084+
#if defined(_MSC_VER)
1085+
CrtNewHandler previous = _set_new_handler(CppNewHandlerBridge);
1086+
#else
1087+
std::new_handler previous = std::set_new_handler(CppNewHandlerBridge);
1088+
#endif
9891089
g_pfnOrigNewHandler.store(previous, std::memory_order_release);
9901090
SafeDebugOutput("CrashHandler: C++ new handler installed\n");
9911091
}
@@ -1468,7 +1568,11 @@ static void UninstallCrashHandlers() noexcept
14681568

14691569
if (auto newHandler = g_pfnOrigNewHandler.exchange(nullptr, std::memory_order_acq_rel); newHandler != nullptr)
14701570
{
1571+
#if defined(_MSC_VER)
1572+
_set_new_handler(newHandler);
1573+
#else
14711574
std::set_new_handler(newHandler);
1575+
#endif
14721576
}
14731577

14741578
if (auto abortHandler = g_pfnOrigAbortHandler.exchange(nullptr, std::memory_order_acq_rel); abortHandler != nullptr)
@@ -1569,13 +1673,24 @@ static bool BuildExceptionContext(EXCEPTION_POINTERS& outExPtrs, EXCEPTION_RECOR
15691673

15701674
SafeDebugOutput(DEBUG_SEPARATOR);
15711675

1676+
if (auto telemetryNote = CrashTelemetry::BuildAllocationTelemetryNote(); !telemetryNote.empty())
1677+
{
1678+
SafeDebugPrintPrefixed(DEBUG_PREFIX_CPP, "%s\n", telemetryNote.c_str());
1679+
}
1680+
15721681
EXCEPTION_RECORD* pExRecord = nullptr;
15731682
CONTEXT* pCtx = nullptr;
15741683
EXCEPTION_POINTERS exPtrs{};
15751684

1685+
const bool haveContext = BuildExceptionContext(exPtrs, pExRecord, pCtx, CPP_EXCEPTION_CODE);
1686+
if (haveContext)
1687+
{
1688+
CaptureAllocationTelemetry(&exPtrs);
1689+
}
1690+
15761691
PFNCHFILTFN callback = g_pfnCrashCallback.load(std::memory_order_acquire);
15771692

1578-
if (callback != nullptr && BuildExceptionContext(exPtrs, pExRecord, pCtx, CPP_EXCEPTION_CODE))
1693+
if (callback != nullptr && haveContext)
15791694
{
15801695
SafeDebugPrintPrefixed(DEBUG_PREFIX_CPP, "Calling crash handler callback\n");
15811696

@@ -1647,6 +1762,12 @@ static void ReportCurrentCppException() noexcept
16471762
SafeDebugOutput("C++ NEW HANDLER: Memory allocation failed\n");
16481763
SafeDebugOutput(DEBUG_SEPARATOR);
16491764

1765+
if (auto telemetryNote = CrashTelemetry::BuildAllocationTelemetryNote(); !telemetryNote.empty())
1766+
{
1767+
SafeDebugOutput(telemetryNote.c_str());
1768+
SafeDebugOutput("\n");
1769+
}
1770+
16501771
std::terminate();
16511772
}
16521773

Client/core/Graphics/CPixelsManager.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "StdInc.h"
1212
#include "CFileFormat.h"
1313
#include "CPixelsManager.h"
14+
#include <CrashTelemetry.h>
1415

1516
///////////////////////////////////////////////////////////////
1617
// Object creation
@@ -750,6 +751,22 @@ bool CPixelsManager::ChangePixelsFormat(const CPixels& oldPixels, CPixels& newPi
750751
if (oldFormat == EPixelsFormat::UNKNOWN || newFormat == EPixelsFormat::UNKNOWN)
751752
return false;
752753

754+
const uint sourceBytes = oldPixels.GetSize();
755+
uint width = 0;
756+
uint height = 0;
757+
GetPixelsSize(oldPixels, width, height);
758+
759+
SString telemetryDetail;
760+
telemetryDetail.Format("%s->%s q=%d bytes=%u dims=%ux%u",
761+
EnumToString(oldFormat).c_str(),
762+
EnumToString(newFormat).c_str(),
763+
uiQuality,
764+
sourceBytes,
765+
width,
766+
height);
767+
// Tag conversions here so crashes show which pixel formats/dimensions were being processed.
768+
CrashTelemetry::Scope conversionScope(sourceBytes, oldPixels.GetData(), "Pixels::ChangeFormat", telemetryDetail.c_str());
769+
753770
if (oldFormat == newFormat)
754771
{
755772
// No change

Client/mods/deathmatch/StdInc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#define MTA_CLIENT
55
#define SHARED_UTIL_WITH_FAST_HASH_MAP
66
#include "SharedUtil.h"
7+
#include <CrashTelemetry.h>
78

89
#include <string.h>
910
#include <stdio.h>

Client/mods/deathmatch/logic/CClientEntity.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -747,6 +747,13 @@ bool CClientEntity::CallEvent(const char* szName, const CLuaArguments& Arguments
747747
if (!g_pClientGame->GetDebugHookManager()->OnPreEvent(szName, Arguments, this, NULL))
748748
return false;
749749

750+
const char* thisTypeName = GetTypeName() ? GetTypeName() : "<unknown>";
751+
const ElementID thisId = GetID();
752+
SString telemetryDetail;
753+
telemetryDetail.Format("%s %s(%u)", szName, thisTypeName, static_cast<unsigned int>(thisId));
754+
// Capture the element+event context so any crash (even core.dll faults) reports the last event being dispatched.
755+
CrashTelemetry::Scope entityScope(0, this, "Entity::CallEvent", telemetryDetail.c_str());
756+
750757
TIMEUS startTime = GetTimeUs();
751758

752759
CEvents* pEvents = g_pClientGame->GetEvents();

Client/mods/deathmatch/logic/CMapEventManager.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,30 @@ bool CMapEventManager::Call(const char* szName, const CLuaArguments& Arguments,
187187
if (!g_pClientGame->GetDebugHookManager()->OnPreEventFunction(szName, Arguments, pSource, nullptr, pMapEvent))
188188
continue;
189189

190+
const char* scriptName = pMapEvent->GetVM()->GetScriptName();
191+
const CResource* pEventResource = pMapEvent->GetVM()->GetResource();
192+
const char* resourceName = pEventResource ? pEventResource->GetName().c_str() : "<no-resource>";
193+
194+
SString sourceTag = "<null>";
195+
if (pSource)
196+
{
197+
const char* sourceTypeName = pSource->GetTypeName() ? pSource->GetTypeName() : "<unknown>";
198+
sourceTag.Format("%s(%u)", sourceTypeName, static_cast<unsigned int>(pSource->GetID()));
199+
}
200+
201+
SString thisTag = "<null>";
202+
if (pThis)
203+
{
204+
const char* thisTypeName = pThis->GetTypeName() ? pThis->GetTypeName() : "<unknown>";
205+
thisTag.Format("%s(%u)", thisTypeName, static_cast<unsigned int>(pThis->GetID()));
206+
}
207+
208+
SString telemetryDetail;
209+
telemetryDetail.Format("%s::%s src=%s this=%s", scriptName, resourceName, sourceTag.c_str(), thisTag.c_str());
210+
// Tag each Lua event dispatch so crash dumps show which
211+
// resource/event/element executed last
212+
CrashTelemetry::Scope eventScope(0, pThis, "LuaEvent::Call", telemetryDetail.c_str());
213+
190214
// Store the current values of the globals
191215
lua_getglobal(pState, "source");
192216
CLuaArgument OldSource(pState, -1);

Client/mods/deathmatch/logic/lua/CLuaArguments.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,9 @@ bool CLuaArguments::Call(CLuaMain* pLuaMain, const CLuaFunctionRef& iLuaFunction
202202
assert(pLuaMain);
203203
TIMEUS startTime = GetTimeUs();
204204

205+
const SString& functionTag = pLuaMain->GetFunctionTag(iLuaFunction.ToInt());
206+
CrashTelemetry::Scope telemetryScope(0, pLuaMain, "Lua::Call", functionTag.c_str());
207+
205208
// Add the function name to the stack and get the event from the table
206209
lua_State* luaVM = pLuaMain->GetVirtualMachine();
207210
assert(luaVM);
@@ -244,7 +247,7 @@ bool CLuaArguments::Call(CLuaMain* pLuaMain, const CLuaFunctionRef& iLuaFunction
244247
lua_pop(luaVM, 1);
245248
}
246249

247-
CClientPerfStatLuaTiming::GetSingleton()->UpdateLuaTiming(pLuaMain, pLuaMain->GetFunctionTag(iLuaFunction.ToInt()), GetTimeUs() - startTime);
250+
CClientPerfStatLuaTiming::GetSingleton()->UpdateLuaTiming(pLuaMain, functionTag, GetTimeUs() - startTime);
248251
return true;
249252
}
250253

@@ -254,6 +257,8 @@ bool CLuaArguments::CallGlobal(CLuaMain* pLuaMain, const char* szFunction, CLuaA
254257
assert(szFunction);
255258
TIMEUS startTime = GetTimeUs();
256259

260+
CrashTelemetry::Scope telemetryScope(0, pLuaMain, "Lua::CallGlobal", szFunction);
261+
257262
// Add the function name to the stack and get the event from the table
258263
lua_State* luaVM = pLuaMain->GetVirtualMachine();
259264
assert(luaVM);

0 commit comments

Comments
 (0)