@@ -69,7 +69,7 @@ For benchmark data, see
6969│ │ recordBuffer.putLong(spanIdOffset, reverseBytes(spanId)) │ │
7070│ │ 3. sidecar[0..9] ← 0 │ │
7171│ │ attrs_data_size ← LRS_ENTRY_SIZE (keeps fixed LRS at [0]) │ │
72- │ │ 4. writeOrderedLong( sidecarBuffer, lrsSidecarOffset, lrs) │ │
72+ │ │ 4. sidecarBuffer.putLong( lrsSidecarOffset, lrs) │ │
7373│ │ writeLrsHex(lrs) — update fixed LRS entry in attrs_data │ │
7474│ │ 5. attach() — storeFence, valid ← 1 │ │
7575│ └───────────────────────────────────────────────────────────────┘ │
@@ -85,8 +85,8 @@ For benchmark data, see
8585│ │ │ attrs_data_size(u16) │ │ ▲ (DD signal handler) │
8686│ │ │ attrs_data[612] │ │ ┌───────────────────────────┐ │
8787│ │ └──────────────────────┘ │ │ TLS pointer (8B) │ │
88- │ └────────────────────────────────┘ │ &custom_labels_current_ │ │
89- │ ▲ ▲ │ set_v2 │ │
88+ │ └────────────────────────────────┘ │ otel_thread_ctx_v1 │ │
89+ │ ▲ ▲ │ (thread_local, DLLEXPORT) │ │
9090│ │ │ └───────────────────────────┘ │
9191│ DD signal handler External OTEP │
9292│ reads span_id profiler reads │
@@ -112,15 +112,15 @@ For benchmark data, see
112112│ ├─ sidecarBuffer (DirectByteBuffer → tag encodings + LRS) │
113113│ ├─ put(lrs, spanId, trHi, trLo) → setContextDirect() │
114114│ ├─ setContextAttribute(keyIdx, value) → setContextAttributeDirect │
115- │ └─ Process-wide caches: │
116- │ └─ attrCache[256 ]: String → {int encoding, byte[] utf8} │
115+ │ └─ Per-thread caches: │
116+ │ └─ attrCache[CACHE_SIZE ]: String → {int encoding, byte[] utf8}│
117117│ │
118118│ BufferWriter (memory ordering abstraction) │
119119│ ├─ BufferWriter8 (Java 8: Unsafe) │
120- │ │ ├─ putOrderedLong / putLongVolatile │
120+ │ │ ├─ putOrderedLong / putOrderedInt │
121121│ │ └─ storeFence → Unsafe.storeFence() │
122122│ └─ BufferWriter9 (Java 9+: VarHandle) │
123- │ ├─ setRelease / setVolatile │
123+ │ ├─ setRelease │
124124│ └─ storeFence → VarHandle.storeStoreFence() │
125125└──────────────────────────────────────────────────────────────────────┘
126126
@@ -134,15 +134,12 @@ For benchmark data, see
134134│ ├─ u64 _otel_local_root_span_id │
135135│ └─ bool _otel_ctx_initialized │
136136│ │
137- │ OtelContexts (otel_context.cpp) │
138- │ └─ getSpanId(record, &spanId) — acquire load of valid flag │
139- │ │
140- │ otel_thread_ctx_v1 (thread_local, DLLEXPORT) │
137+ │ otel_thread_ctx_v1 (thread_local, DLLEXPORT) │
141138│ └─ OTEP #4947 TLS pointer for external profiler discovery │
142139│ │
143140│ Recording::writeCurrentContext(Buffer*) (signal handler) │
144141│ ├─ ContextApi::get(spanId, rootSpanId) │
145- │ │ └─ OtelContexts::getSpanId() with acquire fence │
142+ │ │ └─ acquire load of valid flag, big-endian decode of span_id │
146143│ └─ thrd->getOtelTagEncoding(i) for each attribute │
147144└─────────────────────────────────────────────────────────────────────┘
148145```
@@ -227,7 +224,7 @@ Time 1: Mutate record fields
227224 recordBuffer.putLong(spanIdOffset, ...)
228225 sidecar[0..9] ← 0 ← zero tag encodings
229226 attrs_data_size ← LRS_ENTRY_SIZE ← keep only fixed LRS entry at attrs_data[0]
230- writeOrderedLong( sidecarBuffer, lrsSidecarOffset, lrs) ← update sidecar LRS
227+ sidecarBuffer.putLong( lrsSidecarOffset, lrs) ← update sidecar LRS
231228 writeLrsHex(lrs) ← update LRS in attrs_data
232229
233230 ⚡ SIGPROF may arrive here — handler sees valid=0, skips record
@@ -248,21 +245,24 @@ void Recording::writeCurrentContext(Buffer *buf) {
248245 buf->putVar64(spanId);
249246 buf->putVar64(rootSpanId);
250247
248+ size_t numAttrs = Profiler::instance()->numContextAttributes();
251249 ProfiledThread* thrd = hasContext ? ProfiledThread::currentSignalSafe() : nullptr;
252250 for (size_t i = 0; i < numAttrs; i++) {
253251 buf->putVar32(thrd != nullptr ? thrd->getOtelTagEncoding(i) : 0);
254252 }
255253}
256254```
257255
258- `ContextApi::get()` calls `OtelContexts::getSpanId()` which performs :
256+ `ContextApi::get()` performs (context_api.cpp) :
259257
260258```cpp
261- // otel_context.cpp
259+ OtelThreadContextRecord* record = thrd->getOtelContextRecord();
262260if (__atomic_load_n(&record->valid, __ATOMIC_ACQUIRE) != 1) {
263261 return false; // record is being mutated — emit zeros
264262}
265- span_id = bytesToU64(record->span_id);
263+ u64 val = 0;
264+ for (int i = 0; i < 8; i++) { val = (val << 8) | record->span_id[i]; }
265+ span_id = val;
266266```
267267
268268The acquire fence pairs with the writer's ` storeFence ` + ` valid=1 `
@@ -376,8 +376,7 @@ encoding = registerConstant0(value); // JNI → Dictionary lookup
376376utf8 = value. getBytes(UTF_8 ); // one allocation, cached
377377attrCacheEncodings[slot] = encoding;
378378attrCacheBytes[slot] = utf8;
379- BUFFER_WRITER . storeFence(); // publish data before key
380- attrCacheKeys[slot] = value; // commit
379+ attrCacheKeys[slot] = value; // cache is per-thread; no fence needed
381380```
382381
383382` registerConstant0 ` crosses JNI once to register the value in the
0 commit comments