Skip to content

Commit 21731bd

Browse files
jbachorikclaude
andcommitted
Add likely/unlikely hints across hot-path subsystems
Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
1 parent 7f03ffc commit 21731bd

7 files changed

Lines changed: 37 additions & 37 deletions

File tree

ddprof-lib/src/main/cpp/buffers.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ class Buffer {
4242
virtual int limit() const { return _limit; }
4343

4444
bool flushIfNeeded(FlushCallback callback, int limit = BUFFER_LIMIT) {
45-
if (_offset > limit) {
45+
if (unlikely(_offset > limit)) {
4646
if (callback(_data, _offset) == _offset) {
4747
reset();
4848
return true;

ddprof-lib/src/main/cpp/codeCache.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,11 @@ char *NativeFunc::create(const char *name, short lib_index) {
2525
void NativeFunc::destroy(char *name) { free(from(name)); }
2626

2727
char NativeFunc::read_mark(const char* name) {
28-
if (name == nullptr) {
28+
if (unlikely(name == nullptr)) {
2929
return 0;
3030
}
3131
NativeFunc* func = from(name);
32-
if (!is_aligned(func, sizeof(func))) {
32+
if (unlikely(!is_aligned(func, sizeof(func)))) {
3333
return 0;
3434
}
3535
// Use SafeAccess to read the mark field in signal handler context
@@ -394,7 +394,7 @@ void CodeCache::setDwarfTable(FrameDesc *table, int length) {
394394
}
395395

396396
FrameDesc CodeCache::findFrameDesc(const void *pc) {
397-
if (_dwarf_table == NULL || _dwarf_table_length == 0) {
397+
if (unlikely(_dwarf_table == NULL || _dwarf_table_length == 0)) {
398398
// No DWARF data available - use default frame pointer unwinding
399399
// This handles OpenJ9 and other VMs that don't provide DWARF info
400400
return FrameDesc::default_frame;
@@ -415,7 +415,7 @@ FrameDesc CodeCache::findFrameDesc(const void *pc) {
415415
}
416416
}
417417

418-
if (low > 0) {
418+
if (likely(low > 0)) {
419419
return _dwarf_table[low - 1];
420420
} else if (target_loc - _plt_offset < _plt_size) {
421421
return FrameDesc::empty_frame;

ddprof-lib/src/main/cpp/context.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ Context& Contexts::initializeContextTls() {
4040

4141
Context& Contexts::get() {
4242
ProfiledThread* thrd = ProfiledThread::currentSignalSafe();
43-
if (thrd == nullptr || !thrd->isContextTlsInitialized()) {
43+
if (unlikely(thrd == nullptr || !thrd->isContextTlsInitialized())) {
4444
return DD_EMPTY_CONTEXT;
4545
}
4646
// Return via stored pointer - never access context_tls_v1 from signal handler

ddprof-lib/src/main/cpp/flightRecorder.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -794,7 +794,7 @@ void Recording::flush(Buffer *buf) {
794794
}
795795

796796
void Recording::flushIfNeeded(Buffer *buf, int limit) {
797-
if (buf->offset() >= limit) {
797+
if (unlikely(buf->offset() >= limit)) {
798798
flush(buf);
799799
}
800800
}

ddprof-lib/src/main/cpp/linearAllocator.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ void *LinearAllocator::alloc(size_t size) {
138138

139139
// CRITICAL FIX: After detachChunks() fails, _tail may be nullptr.
140140
// We must handle this gracefully to prevent crash.
141-
if (chunk == nullptr) {
141+
if (unlikely(chunk == nullptr)) {
142142
return nullptr;
143143
}
144144

@@ -157,7 +157,7 @@ void *LinearAllocator::alloc(size_t size) {
157157
ASAN_UNPOISON_MEMORY_REGION(allocated_ptr, size);
158158
#endif
159159

160-
if (_chunk_size / 2 - offs < size) {
160+
if (unlikely(_chunk_size / 2 - offs < size)) {
161161
// Stepped over a middle of the chunk - it's time to prepare a new one
162162
reserveChunk(chunk);
163163
}
@@ -171,7 +171,7 @@ void *LinearAllocator::alloc(size_t size) {
171171

172172
Chunk *LinearAllocator::allocateChunk(Chunk *current) {
173173
Chunk *chunk = (Chunk *)OS::safeAlloc(_chunk_size);
174-
if (chunk != NULL) {
174+
if (likely(chunk != NULL)) {
175175
chunk->prev = current;
176176
chunk->offs = sizeof(Chunk);
177177

@@ -208,18 +208,18 @@ void LinearAllocator::reserveChunk(Chunk *current) {
208208
Chunk *LinearAllocator::getNextChunk(Chunk *current) {
209209
Chunk *reserve = _reserve;
210210

211-
if (reserve == current) {
211+
if (unlikely(reserve == current)) {
212212
// Unlikely case: no reserve yet.
213213
// It's probably being allocated right now, so let's compete
214214
reserve = allocateChunk(current);
215-
if (reserve == NULL) {
215+
if (unlikely(reserve == NULL)) {
216216
// Not enough memory
217217
return NULL;
218218
}
219219

220220
Chunk *prev_reserve =
221221
__sync_val_compare_and_swap(&_reserve, current, reserve);
222-
if (prev_reserve != current) {
222+
if (unlikely(prev_reserve != current)) {
223223
freeChunk(reserve);
224224
reserve = prev_reserve;
225225
}

ddprof-lib/src/main/cpp/profiler.cpp

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -762,9 +762,9 @@ u64 Profiler::recordJVMTISample(u64 counter, int tid, jthread thread, jint event
762762
atomicIncRelaxed(_total_samples);
763763

764764
u32 lock_index = getLockIndex(tid);
765-
if (!_locks[lock_index].tryLock() &&
765+
if (unlikely(!_locks[lock_index].tryLock() &&
766766
!_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() &&
767-
!_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) {
767+
!_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock())) {
768768
// Too many concurrent signals already
769769
atomicIncRelaxed(_failures[-ticks_skipped]);
770770

@@ -809,9 +809,9 @@ void Profiler::recordDeferredSample(int tid, u64 call_trace_id, jint event_type,
809809
atomicIncRelaxed(_total_samples);
810810

811811
u32 lock_index = getLockIndex(tid);
812-
if (!_locks[lock_index].tryLock() &&
812+
if (unlikely(!_locks[lock_index].tryLock() &&
813813
!_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() &&
814-
!_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) {
814+
!_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock())) {
815815
// Too many concurrent signals already
816816
atomicIncRelaxed(_failures[-ticks_skipped]);
817817
return;
@@ -827,9 +827,9 @@ void Profiler::recordSample(void *ucontext, u64 counter, int tid,
827827
atomicIncRelaxed(_total_samples);
828828

829829
u32 lock_index = getLockIndex(tid);
830-
if (!_locks[lock_index].tryLock() &&
830+
if (unlikely(!_locks[lock_index].tryLock() &&
831831
!_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() &&
832-
!_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) {
832+
!_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock())) {
833833
// Too many concurrent signals already
834834
atomicIncRelaxed(_failures[-ticks_skipped]);
835835

@@ -847,7 +847,7 @@ void Profiler::recordSample(void *ucontext, u64 counter, int tid,
847847
// record a null stacktrace we can skip the unwind if we've got a
848848
// call_trace_id determined to be reusable at a higher level
849849

850-
if (!_omit_stacktraces && call_trace_id == 0) {
850+
if (likely(!_omit_stacktraces && call_trace_id == 0)) {
851851
u64 startTime = TSC::ticks();
852852
ASGCT_CallFrame *frames = _calltrace_buffer[lock_index]->_asgct_frames;
853853

@@ -857,7 +857,7 @@ void Profiler::recordSample(void *ucontext, u64 counter, int tid,
857857
ASGCT_CallFrame *native_stop = frames + num_frames;
858858
num_frames += getNativeTrace(ucontext, native_stop, event_type, tid,
859859
&java_ctx, &truncated, lock_index);
860-
if (num_frames < _max_stack_depth) {
860+
if (likely(num_frames < _max_stack_depth)) {
861861
int max_remaining = _max_stack_depth - num_frames;
862862
if (_features.mixed) {
863863
int vm_start = num_frames;
@@ -885,14 +885,14 @@ void Profiler::recordSample(void *ucontext, u64 counter, int tid,
885885
}
886886
}
887887
}
888-
if (num_frames == 0) {
888+
if (unlikely(num_frames == 0)) {
889889
num_frames += makeFrame(frames + num_frames, BCI_ERROR, "no_Java_frame");
890890
}
891891

892892
call_trace_id =
893893
_call_trace_storage.put(num_frames, frames, truncated, counter);
894894
ProfiledThread *thread = ProfiledThread::currentSignalSafe();
895-
if (thread != nullptr) {
895+
if (likely(thread != nullptr)) {
896896
thread->recordCallTraceId(call_trace_id);
897897
}
898898
u64 duration = TSC::ticks() - startTime;
@@ -907,9 +907,9 @@ void Profiler::recordSample(void *ucontext, u64 counter, int tid,
907907

908908
void Profiler::recordWallClockEpoch(int tid, WallClockEpochEvent *event) {
909909
u32 lock_index = getLockIndex(tid);
910-
if (!_locks[lock_index].tryLock() &&
910+
if (unlikely(!_locks[lock_index].tryLock() &&
911911
!_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() &&
912-
!_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) {
912+
!_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock())) {
913913
return;
914914
}
915915
_jfr.wallClockEpoch(lock_index, event);
@@ -918,9 +918,9 @@ void Profiler::recordWallClockEpoch(int tid, WallClockEpochEvent *event) {
918918

919919
void Profiler::recordTraceRoot(int tid, TraceRootEvent *event) {
920920
u32 lock_index = getLockIndex(tid);
921-
if (!_locks[lock_index].tryLock() &&
921+
if (unlikely(!_locks[lock_index].tryLock() &&
922922
!_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() &&
923-
!_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) {
923+
!_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock())) {
924924
return;
925925
}
926926
_jfr.recordTraceRoot(lock_index, tid, event);
@@ -929,9 +929,9 @@ void Profiler::recordTraceRoot(int tid, TraceRootEvent *event) {
929929

930930
void Profiler::recordQueueTime(int tid, QueueTimeEvent *event) {
931931
u32 lock_index = getLockIndex(tid);
932-
if (!_locks[lock_index].tryLock() &&
932+
if (unlikely(!_locks[lock_index].tryLock() &&
933933
!_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() &&
934-
!_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) {
934+
!_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock())) {
935935
return;
936936
}
937937
_jfr.recordQueueTime(lock_index, tid, event);
@@ -955,9 +955,9 @@ void Profiler::recordExternalSample(u64 weight, int tid, int num_frames,
955955

956956
u64 call_trace_id =
957957
_call_trace_storage.put(num_frames, extended_frames, truncated, weight);
958-
if (!_locks[lock_index].tryLock() &&
958+
if (unlikely(!_locks[lock_index].tryLock() &&
959959
!_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() &&
960-
!_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) {
960+
!_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock())) {
961961
// Too many concurrent signals already
962962
atomicIncRelaxed(_failures[-ticks_skipped]);
963963
return;
@@ -980,9 +980,9 @@ void Profiler::writeDatadogProfilerSetting(int tid, int length,
980980
const char *name, const char *value,
981981
const char *unit) {
982982
u32 lock_index = getLockIndex(tid);
983-
if (!_locks[lock_index].tryLock() &&
983+
if (unlikely(!_locks[lock_index].tryLock() &&
984984
!_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() &&
985-
!_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) {
985+
!_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock())) {
986986
return;
987987
}
988988
_jfr.recordDatadogSetting(lock_index, length, name, value, unit);
@@ -995,9 +995,9 @@ void Profiler::writeHeapUsage(long value, bool live) {
995995
return;
996996
}
997997
u32 lock_index = getLockIndex(tid);
998-
if (!_locks[lock_index].tryLock() &&
998+
if (unlikely(!_locks[lock_index].tryLock() &&
999999
!_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() &&
1000-
!_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) {
1000+
!_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock())) {
10011001
return;
10021002
}
10031003
_jfr.recordHeapUsage(lock_index, value, live);

ddprof-lib/src/main/cpp/threadFilter.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ ThreadFilter::SlotID ThreadFilter::registerThread() {
104104

105105
// Allocate a new slot
106106
SlotID index = _next_index.fetch_add(1, std::memory_order_relaxed);
107-
if (index >= kMaxThreads) {
107+
if (unlikely(index >= kMaxThreads)) {
108108
// Revert the increment and return failure
109109
_next_index.fetch_sub(1, std::memory_order_relaxed);
110110
return -1;
@@ -113,7 +113,7 @@ ThreadFilter::SlotID ThreadFilter::registerThread() {
113113
const int chunk_idx = index >> kChunkShift;
114114

115115
// Ensure the chunk is initialized (lock-free)
116-
if (chunk_idx >= _num_chunks.load(std::memory_order_acquire)) {
116+
if (unlikely(chunk_idx >= _num_chunks.load(std::memory_order_acquire))) {
117117
// Update the chunk count atomically
118118
int expected_chunks = chunk_idx;
119119
int desired_chunks = chunk_idx + 1;

0 commit comments

Comments
 (0)