Skip to content

Commit d662368

Browse files
jbachorikclaude
andcommitted
fix: close query-only sigaction loop and fix debug-mode SIGABRT
Extend sigaction interception to cover query-only calls (sigaction(SIGSEGV/SIGBUS, nullptr, &oldact)): return the saved JVM handler instead of ours, so callers that store oldact and later chain to it don't loop back into our handler. Fix intermittent SIGABRT in debug builds on aarch64 GraalVM: extend crashProtectionActive() with a VMThread::isExceptionActive() fallback so the cast_to() assert no longer fires for threads without ProfiledThread TLS when setjmp crash protection is already active in walkVM. Add OS::resetSignalHandlersForTesting() to prevent static state from leaking between sigaction interception unit tests. Add ASCII flow diagram to sigaction_hook documenting the full handler chain and interception cases. Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
1 parent 11061e5 commit d662368

8 files changed

Lines changed: 133 additions & 24 deletions

File tree

build-logic/conventions/src/main/kotlin/com/datadoghq/profiler/ProfilerTestPlugin.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -569,7 +569,7 @@ class ProfilerTestPlugin : Plugin<Project> {
569569
val profilerLibProject = project.rootProject.findProject(profilerLibProjectPath)
570570

571571
if (profilerLibProject != null && testTask != null) {
572-
// Wire test -> gtest dependency (C++ unit tests run before Java integration tests)
572+
// gtest runs before test (C++ unit tests run before Java integration tests)
573573
val gtestTaskName = "gtest${capitalizedCfgName}"
574574
try {
575575
val gtestTask = profilerLibProject.tasks.named(gtestTaskName)

ddprof-lib/src/main/cpp/os.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ class OS {
157157
static SigAction getSegvChainTarget();
158158
static SigAction getBusChainTarget();
159159
static void* getSigactionHook();
160+
static void resetSignalHandlersForTesting();
160161

161162
static int getMaxThreadId(int floor) {
162163
int maxThreadId = getMaxThreadId();

ddprof-lib/src/main/cpp/os_linux.cpp

Lines changed: 95 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -756,10 +756,10 @@ void OS::protectSignalHandlers(SigAction segvHandler, SigAction busHandler) {
756756
// Save the current (JVM's) signal handlers BEFORE we install ours.
757757
// These will be returned as oldact when we intercept other libraries' sigaction calls,
758758
// so they chain to JVM instead of back to us (which would cause infinite loops).
759-
if (!_orig_handlers_saved && _real_sigaction != nullptr) {
759+
if (!__atomic_load_n(&_orig_handlers_saved, __ATOMIC_ACQUIRE) && _real_sigaction != nullptr) {
760760
_real_sigaction(SIGSEGV, nullptr, &_orig_segv_sigaction);
761761
_real_sigaction(SIGBUS, nullptr, &_orig_bus_sigaction);
762-
_orig_handlers_saved = true;
762+
__atomic_store_n(&_orig_handlers_saved, true, __ATOMIC_RELEASE);
763763
}
764764
_protected_segv_handler = segvHandler;
765765
_protected_bus_handler = busHandler;
@@ -773,7 +773,67 @@ SigAction OS::getBusChainTarget() {
773773
return __atomic_load_n(&_bus_chain_target, __ATOMIC_ACQUIRE);
774774
}
775775

776-
// sigaction hook - called via GOT patching to intercept sigaction calls
776+
// sigaction_hook - intercepts sigaction(2) calls from any library via GOT patching.
777+
//
778+
// PROBLEM SOLVED
779+
// ==============
780+
// Without interception, a library (e.g. wasmtime) can overwrite our SIGSEGV handler:
781+
//
782+
// Before: kernel --> our_handler --> JVM_handler
783+
// After lib calls sigaction(SIGSEGV, lib_handler, &oldact):
784+
// kernel --> lib_handler
785+
// lib_handler stores oldact = our_handler as its chain target
786+
// => when lib chains on unhandled fault: lib_handler --> our_handler --> lib_handler --> ...
787+
// INFINITE LOOP
788+
//
789+
// HANDLER CHAIN AFTER SETUP
790+
// ==========================
791+
//
792+
// protectSignalHandlers() replaceSigsegvHandler() LibraryPatcher::patch_sigaction()
793+
// | | |
794+
// v v v
795+
// save JVM handler install our_handler GOT-patch sigaction
796+
// into _orig_segv_sigaction as real OS handler => all future sigaction()
797+
// calls go through us
798+
//
799+
// Signal delivery chain:
800+
//
801+
// kernel
802+
// |
803+
// v
804+
// our_handler (installed via replaceSigsegvHandler, never displaced)
805+
// |
806+
// +-- handled by us? --> done
807+
// |
808+
// v (not handled)
809+
// _segv_chain_target (lib_handler, set when we intercepted lib's sigaction call)
810+
// |
811+
// +-- handled by lib? --> done
812+
// |
813+
// v (lib chains to its saved oldact)
814+
// _orig_segv_sigaction (JVM's original handler, what we returned as oldact to lib)
815+
// |
816+
// v
817+
// JVM handles or terminates
818+
//
819+
// INTERCEPTION LOGIC (this function)
820+
// ===================================
821+
// Case 1 - Install call [act != nullptr, SA_SIGINFO]:
822+
// - Save lib's handler as _segv_chain_target (we'll call it if we can't handle)
823+
// - Return _orig_segv_sigaction as oldact (NOT our handler, to break the loop)
824+
// - Do NOT actually install lib's handler (keep ours on top)
825+
//
826+
// Case 2 - Query-only call [act == nullptr, oldact != nullptr]:
827+
// - Return _orig_segv_sigaction as oldact (same reason: lib must not see our handler)
828+
// - A lib that queries, stores the result, then uses it as a chain target would
829+
// loop if we returned our handler here.
830+
//
831+
// Case 3 - 1-arg handler [act != nullptr, no SA_SIGINFO]:
832+
// - Pass through: we cannot safely chain 1-arg handlers (different calling convention)
833+
//
834+
// Case 4 - Any other signal, or protection not yet active:
835+
// - Pass through to real sigaction unchanged.
836+
//
777837
static int sigaction_hook(int signum, const struct sigaction* act, struct sigaction* oldact) {
778838
// _real_sigaction must be resolved before any GOT patching happens
779839
if (_real_sigaction == nullptr) {
@@ -782,20 +842,23 @@ static int sigaction_hook(int signum, const struct sigaction* act, struct sigact
782842
}
783843

784844
// If this is SIGSEGV or SIGBUS and we have protected handlers installed,
785-
// intercept the call to keep our handler on top
786-
if (act != nullptr) {
787-
if (signum == SIGSEGV && _protected_segv_handler != nullptr) {
788-
// Only intercept SA_SIGINFO handlers (3-arg form) for safe chaining
845+
// intercept the call to keep our handler on top.
846+
// We intercept both install calls (act != nullptr) and query-only calls (act == nullptr)
847+
// to ensure callers always see the JVM's original handler, never ours.
848+
// A caller that gets our handler as oldact and later chains to it would cause an
849+
// infinite loop: us -> them -> us -> ...
850+
if (signum == SIGSEGV && _protected_segv_handler != nullptr) {
851+
if (act != nullptr) {
852+
// Install call: only intercept SA_SIGINFO handlers (3-arg form) for safe chaining
789853
if (act->sa_flags & SA_SIGINFO) {
790854
SigAction new_handler = act->sa_sigaction;
791855
// Don't intercept if it's our own handler being installed
792856
if (new_handler != _protected_segv_handler) {
793857
// Save their handler as our chain target
794858
__atomic_exchange_n(&_segv_chain_target, new_handler, __ATOMIC_ACQ_REL);
795859
if (oldact != nullptr) {
796-
// Return the original (JVM's) handler, not our handler.
797-
// This way, when the intercepted library chains to "previous",
798-
// it goes to JVM, not back to us (which would cause infinite loops).
860+
// Return the original (JVM's) handler, not ours, to prevent
861+
// the caller from chaining back to us.
799862
*oldact = _orig_segv_sigaction;
800863
}
801864
Counters::increment(SIGACTION_INTERCEPTED);
@@ -804,19 +867,28 @@ static int sigaction_hook(int signum, const struct sigaction* act, struct sigact
804867
}
805868
}
806869
// Let 1-arg handlers (without SA_SIGINFO) pass through - we can't safely chain them
807-
} else if (signum == SIGBUS && _protected_bus_handler != nullptr) {
870+
} else if (oldact != nullptr) {
871+
// Query-only call: return the JVM's original handler, not ours.
872+
// Same reason: a caller that stores our handler and later chains to it causes loops.
873+
*oldact = _orig_segv_sigaction;
874+
return 0;
875+
}
876+
} else if (signum == SIGBUS && _protected_bus_handler != nullptr) {
877+
if (act != nullptr) {
808878
if (act->sa_flags & SA_SIGINFO) {
809879
SigAction new_handler = act->sa_sigaction;
810880
if (new_handler != _protected_bus_handler) {
811881
__atomic_exchange_n(&_bus_chain_target, new_handler, __ATOMIC_ACQ_REL);
812882
if (oldact != nullptr) {
813-
// Return the original (JVM's) handler, not our handler.
814883
*oldact = _orig_bus_sigaction;
815884
}
816885
Counters::increment(SIGACTION_INTERCEPTED);
817886
return 0;
818887
}
819888
}
889+
} else if (oldact != nullptr) {
890+
*oldact = _orig_bus_sigaction;
891+
return 0;
820892
}
821893
}
822894

@@ -828,4 +900,15 @@ void* OS::getSigactionHook() {
828900
return (void*)sigaction_hook;
829901
}
830902

903+
void OS::resetSignalHandlersForTesting() {
904+
__atomic_store_n(&_orig_handlers_saved, false, __ATOMIC_RELEASE);
905+
memset(&_orig_segv_sigaction, 0, sizeof(_orig_segv_sigaction));
906+
memset(&_orig_bus_sigaction, 0, sizeof(_orig_bus_sigaction));
907+
_protected_segv_handler = nullptr;
908+
_protected_bus_handler = nullptr;
909+
__atomic_store_n(&_segv_chain_target, (SigAction)nullptr, __ATOMIC_RELEASE);
910+
__atomic_store_n(&_bus_chain_target, (SigAction)nullptr, __ATOMIC_RELEASE);
911+
// _real_sigaction is intentionally not reset: safe to reuse across tests
912+
}
913+
831914
#endif // __linux__

ddprof-lib/src/main/cpp/os_macos.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -500,4 +500,8 @@ void* OS::getSigactionHook() {
500500
return nullptr; // No sigaction interception on macOS
501501
}
502502

503+
void OS::resetSignalHandlersForTesting() {
504+
// No-op: no sigaction interception state on macOS
505+
}
506+
503507
#endif // __APPLE__

ddprof-lib/src/main/cpp/profiler.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ static void (*orig_trapHandler)(int signo, siginfo_t *siginfo, void *ucontext);
5151
static void (*orig_segvHandler)(int signo, siginfo_t *siginfo, void *ucontext);
5252
static void (*orig_busHandler)(int signo, siginfo_t *siginfo, void *ucontext);
5353

54-
5554
static Engine noop_engine;
5655
static PerfEvents perf_events;
5756
static WallClockASGCT wall_asgct_engine;
@@ -1143,11 +1142,6 @@ int Profiler::crashHandlerInternal(int signo, siginfo_t *siginfo, void *ucontext
11431142
return 0; // not handled, safe to chain
11441143
}
11451144

1146-
// Legacy wrapper for external callers (if any)
1147-
bool Profiler::crashHandler(int signo, siginfo_t *siginfo, void *ucontext) {
1148-
return crashHandlerInternal(signo, siginfo, ucontext) > 0;
1149-
}
1150-
11511145
void Profiler::setupSignalHandlers() {
11521146
// Do not re-run the signal setup (run only when VM has not been loaded yet)
11531147
if (__sync_bool_compare_and_swap(&_signals_initialized, false, true)) {

ddprof-lib/src/main/cpp/profiler.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,6 @@ class alignas(alignof(SpinLock)) Profiler {
193193
void lockAll();
194194
void unlockAll();
195195

196-
static bool crashHandler(int signo, siginfo_t *siginfo, void *ucontext);
197196
static int crashHandlerInternal(int signo, siginfo_t *siginfo, void *ucontext);
198197
static void check_JDK_8313796_workaround();
199198

ddprof-lib/src/main/cpp/vmStructs.h

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,11 @@ class VMNMethod;
3131
// sending SIGABRT which is uncatchable by crash protection.
3232
// When crash protection is active the assert is redundant — any bad read will
3333
// be caught by the SIGSEGV handler and recovered via longjmp — so we skip it.
34-
inline bool crashProtectionActive() {
35-
ProfiledThread* pt = ProfiledThread::currentSignalSafe();
36-
return pt != nullptr && pt->isCrashProtectionActive();
37-
}
34+
//
35+
// Defined at the bottom of this file after VMThread is declared so that the
36+
// VMThread fallback path (isExceptionActive) is accessible without forward-
37+
// declaring the full class.
38+
inline bool crashProtectionActive();
3839

3940
template <typename T>
4041
inline T* cast_to(const void* ptr) {
@@ -788,6 +789,17 @@ DECLARE(VMThread)
788789
return *(void**) at(_thread_exception_offset);
789790
}
790791

792+
// Returns true if setjmp crash protection is currently active for this thread.
793+
// Reads the exception field via direct pointer arithmetic, deliberately bypassing
794+
// at() and its crashProtectionActive() assertion to avoid infinite recursion.
795+
// Safe because 'this' is the current live thread (we are in its signal handler).
796+
static bool isExceptionActive() {
797+
if (_thread_exception_offset < 0) return false;
798+
VMThread* vt = current();
799+
if (vt == nullptr) return false;
800+
return *(const void* const*)((const char*)vt + _thread_exception_offset) != nullptr;
801+
}
802+
791803
NOADDRSANITIZE VMJavaFrameAnchor* anchor() {
792804
if (!cachedIsJavaThread()) return NULL;
793805
assert(_thread_anchor_offset >= 0);
@@ -1123,4 +1135,18 @@ class InterpreterFrame : VMStructs {
11231135
}
11241136
};
11251137

1138+
// Defined here (after VMThread) so the VMThread::isExceptionActive() fallback
1139+
// is accessible. The forward declaration at the top of this file allows cast_to()
1140+
// to reference it before VMThread is declared.
1141+
inline bool crashProtectionActive() {
1142+
ProfiledThread* pt = ProfiledThread::currentSignalSafe();
1143+
if (pt != nullptr && pt->isCrashProtectionActive()) return true;
1144+
// Fallback for threads without ProfiledThread TLS (e.g. JVM internal threads):
1145+
// if walkVM has set up setjmp protection via vm_thread->exception(), the assert
1146+
// is equally redundant — any bad read will be caught by the SIGSEGV handler.
1147+
// Uses VMThread::isExceptionActive() which reads the field directly without
1148+
// going through at() to avoid recursive assertion.
1149+
return VMThread::key() >= 0 && VMThread::isExceptionActive();
1150+
}
1151+
11261152
#endif // _VMSTRUCTS_H

ddprof-lib/src/test/cpp/sigaction_interception_ut.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@ class SigactionInterceptionTest : public ::testing::Test {
123123
void TearDown() override {
124124
// Restore original SIGSEGV handler
125125
sigaction(SIGSEGV, &saved_segv_action, nullptr);
126+
// Reset static interception state so tests don't bleed into each other
127+
OS::resetSignalHandlersForTesting();
126128
}
127129
};
128130

0 commit comments

Comments
 (0)