Skip to content

Commit 7a4328e

Browse files
authored
chore: Add more benchmarks (#146)
1 parent e0780bf commit 7a4328e

5 files changed

Lines changed: 507 additions & 0 deletions

File tree

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
"""Benchmarks for ID generation.
2+
3+
get_span_id and get_trace_id are called on every span creation, so their
4+
cost accumulates in high-throughput tracing workloads. This module
5+
compares the two generators: UUIDGenerator (default) and OTELIDGenerator
6+
(enabled via BRAINTRUST_OTEL_COMPAT=true).
7+
"""
8+
9+
import pathlib
10+
import sys
11+
12+
import pyperf
13+
14+
15+
if __package__ in (None, ""):
16+
sys.path.insert(0, str(pathlib.Path(__file__).resolve().parents[2]))
17+
18+
from braintrust.id_gen import OTELIDGenerator, UUIDGenerator
19+
20+
from benchmarks._utils import disable_pyperf_psutil
21+
22+
23+
def main(runner: pyperf.Runner | None = None) -> None:
24+
if runner is None:
25+
disable_pyperf_psutil()
26+
runner = pyperf.Runner()
27+
28+
uuid_gen = UUIDGenerator()
29+
otel_gen = OTELIDGenerator()
30+
31+
runner.bench_func("id_gen.uuid.span_id", uuid_gen.get_span_id)
32+
runner.bench_func("id_gen.uuid.trace_id", uuid_gen.get_trace_id)
33+
runner.bench_func("id_gen.otel.span_id", otel_gen.get_span_id)
34+
runner.bench_func("id_gen.otel.trace_id", otel_gen.get_trace_id)
35+
36+
37+
if __name__ == "__main__":
38+
main()
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
"""Benchmarks for merge_dicts and merge_dicts_with_paths.
2+
3+
merge_dicts is called on every span log update and during row merging,
4+
making it one of the most frequently executed SDK functions.
5+
6+
Note: merge_dicts mutates merge_into, so each benchmark wrapper creates a
7+
fresh copy of the target dict before calling. This means each bench_func
8+
measures a shallow/deep copy plus the merge itself — the copy cost is
9+
intentionally kept proportional to the input size so relative comparisons
10+
remain valid.
11+
"""
12+
13+
import copy
14+
import pathlib
15+
import sys
16+
from typing import Any
17+
18+
import pyperf
19+
20+
21+
if __package__ in (None, ""):
22+
sys.path.insert(0, str(pathlib.Path(__file__).resolve().parents[2]))
23+
24+
from braintrust.util import merge_dicts
25+
26+
from benchmarks._utils import disable_pyperf_psutil
27+
from benchmarks.fixtures import make_large_payload, make_medium_payload, make_small_payload
28+
29+
30+
# Updates are pre-built once; only merge_into is copied per iteration.
31+
_SMALL_UPDATE: dict[str, Any] = {
32+
"metadata": {"extra_key": "extra_value"},
33+
"scores": {"relevance": 0.8},
34+
"tags": ["new_tag"],
35+
}
36+
37+
_MEDIUM_UPDATE: dict[str, Any] = {
38+
"metadata": {"workspace_id": "workspace-789", "new_flag": True},
39+
"metrics": {"cached_tokens": 64},
40+
"tags": ["updated", "benchmark"],
41+
}
42+
43+
_LARGE_UPDATE: dict[str, Any] = {
44+
"metadata": {"routing": {"tier": "standard"}, "extra": "value"},
45+
"metrics": {"cached_tokens": 512},
46+
"tags": ["updated"],
47+
"output": {"summary": "revised"},
48+
}
49+
50+
# Pre-built base payloads (copied per iteration, not mutated at module level).
51+
_SMALL_BASE = make_small_payload()
52+
_MEDIUM_BASE = make_medium_payload()
53+
_LARGE_BASE = make_large_payload()
54+
55+
_NESTED_BASE: dict[str, Any] = {
56+
"a": {"b": {"c": {"d": 1, "e": 2}, "f": 3}, "g": 4},
57+
"h": {"i": {"j": {"k": 5}}},
58+
}
59+
_NESTED_UPDATE: dict[str, Any] = {
60+
"a": {"b": {"c": {"d": 99}, "new": "value"}, "g": 99},
61+
"h": {"i": {"j": {"new_key": "hello"}}},
62+
}
63+
64+
# Tags set-union: top-level "tags" field uses set-union semantics in merge_dicts.
65+
_TAGS_UPDATE: dict[str, Any] = {"tags": ["c", "d", "e"]}
66+
67+
68+
def _bench_small() -> None:
69+
merge_dicts(dict(_SMALL_BASE), _SMALL_UPDATE)
70+
71+
72+
def _bench_medium() -> None:
73+
# Shallow copy is enough: _MEDIUM_UPDATE only touches top-level dict values.
74+
merge_dicts(dict(_MEDIUM_BASE), _MEDIUM_UPDATE)
75+
76+
77+
def _bench_large() -> None:
78+
merge_dicts(dict(_LARGE_BASE), _LARGE_UPDATE)
79+
80+
81+
def _bench_nested() -> None:
82+
# Deep copy required because the update recurses into nested dicts.
83+
merge_dicts(copy.deepcopy(_NESTED_BASE), _NESTED_UPDATE)
84+
85+
86+
def _bench_tags_union() -> None:
87+
# Tags list grows on each call, so start from a fresh copy every time.
88+
merge_dicts({"tags": ["a", "b"], "value": 1}, _TAGS_UPDATE)
89+
90+
91+
def main(runner: pyperf.Runner | None = None) -> None:
92+
if runner is None:
93+
disable_pyperf_psutil()
94+
runner = pyperf.Runner()
95+
96+
runner.bench_func("merge_dicts[small]", _bench_small)
97+
runner.bench_func("merge_dicts[medium]", _bench_medium)
98+
runner.bench_func("merge_dicts[large]", _bench_large)
99+
runner.bench_func("merge_dicts[nested-deep]", _bench_nested)
100+
runner.bench_func("merge_dicts[tags-union]", _bench_tags_union)
101+
102+
103+
if __name__ == "__main__":
104+
main()
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
"""Benchmarks for merge_row_batch and batch_items.
2+
3+
merge_row_batch is called before every flush to the Braintrust API to
4+
de-duplicate and merge rows in a pending batch. batch_items is used to
5+
split the resulting rows into API-request-sized chunks.
6+
7+
Both functions mutate their inputs, so each benchmark wrapper builds fresh
8+
row lists per iteration.
9+
"""
10+
11+
import pathlib
12+
import sys
13+
14+
import pyperf
15+
16+
17+
if __package__ in (None, ""):
18+
sys.path.insert(0, str(pathlib.Path(__file__).resolve().parents[2]))
19+
20+
from braintrust.db_fields import IS_MERGE_FIELD
21+
from braintrust.merge_row_batch import batch_items, merge_row_batch
22+
23+
from benchmarks._utils import disable_pyperf_psutil
24+
25+
26+
# ---------------------------------------------------------------------------
27+
# Row factories — called inside each benchmark wrapper to get fresh dicts.
28+
# ---------------------------------------------------------------------------
29+
30+
31+
def _unique_rows(n: int) -> list[dict]:
32+
"""n rows, all distinct IDs — no merging needed."""
33+
return [{"id": f"row-{i}", "project_id": "proj-1", "value": i} for i in range(n)]
34+
35+
36+
def _merge_rows(n: int) -> list[dict]:
37+
"""n rows forming n//2 pairs: first is a base, second is an IS_MERGE update."""
38+
rows = []
39+
for i in range(n // 2):
40+
rows.append({"id": f"row-{i}", "project_id": "proj-1", "payload": {"a": i}})
41+
rows.append(
42+
{
43+
"id": f"row-{i}",
44+
"project_id": "proj-1",
45+
"payload": {"b": i + 100},
46+
IS_MERGE_FIELD: True,
47+
}
48+
)
49+
return rows
50+
51+
52+
def _mixed_rows(n: int) -> list[dict]:
53+
"""Mix of unique rows and merge pairs (roughly half each)."""
54+
rows = []
55+
for i in range(n // 4):
56+
# pair that will be merged
57+
rows.append({"id": f"merge-{i}", "project_id": "proj-1", "payload": {"a": i}})
58+
rows.append(
59+
{
60+
"id": f"merge-{i}",
61+
"project_id": "proj-1",
62+
"payload": {"b": i + 100},
63+
IS_MERGE_FIELD: True,
64+
}
65+
)
66+
for i in range(n // 2):
67+
rows.append({"id": f"unique-{i}", "project_id": "proj-1", "value": i})
68+
return rows
69+
70+
71+
# ---------------------------------------------------------------------------
72+
# Benchmark wrappers
73+
# ---------------------------------------------------------------------------
74+
75+
_SMALL_N = 10
76+
_MEDIUM_N = 50
77+
_LARGE_N = 200
78+
79+
80+
def _bench_no_conflict_small() -> None:
81+
merge_row_batch(_unique_rows(_SMALL_N))
82+
83+
84+
def _bench_no_conflict_medium() -> None:
85+
merge_row_batch(_unique_rows(_MEDIUM_N))
86+
87+
88+
def _bench_no_conflict_large() -> None:
89+
merge_row_batch(_unique_rows(_LARGE_N))
90+
91+
92+
def _bench_all_merge_small() -> None:
93+
merge_row_batch(_merge_rows(_SMALL_N))
94+
95+
96+
def _bench_all_merge_medium() -> None:
97+
merge_row_batch(_merge_rows(_MEDIUM_N))
98+
99+
100+
def _bench_mixed_medium() -> None:
101+
merge_row_batch(_mixed_rows(_MEDIUM_N))
102+
103+
104+
# batch_items: split a list of strings by item-count and byte-count limits.
105+
_BATCH_STRINGS = [f"item-payload-{i:04d}" * 4 for i in range(200)]
106+
_ITEM_SIZE = len(_BATCH_STRINGS[0].encode())
107+
108+
109+
def _bench_batch_items_count_limit() -> None:
110+
batch_items(_BATCH_STRINGS, batch_max_num_items=20)
111+
112+
113+
def _bench_batch_items_byte_limit() -> None:
114+
batch_items(
115+
_BATCH_STRINGS,
116+
batch_max_num_bytes=_ITEM_SIZE * 15,
117+
get_byte_size=lambda s: len(s.encode()),
118+
)
119+
120+
121+
def _bench_batch_items_both_limits() -> None:
122+
batch_items(
123+
_BATCH_STRINGS,
124+
batch_max_num_items=20,
125+
batch_max_num_bytes=_ITEM_SIZE * 15,
126+
get_byte_size=lambda s: len(s.encode()),
127+
)
128+
129+
130+
def main(runner: pyperf.Runner | None = None) -> None:
131+
if runner is None:
132+
disable_pyperf_psutil()
133+
runner = pyperf.Runner()
134+
135+
runner.bench_func("merge_row_batch[no-conflict-small]", _bench_no_conflict_small)
136+
runner.bench_func("merge_row_batch[no-conflict-medium]", _bench_no_conflict_medium)
137+
runner.bench_func("merge_row_batch[no-conflict-large]", _bench_no_conflict_large)
138+
runner.bench_func("merge_row_batch[all-merge-small]", _bench_all_merge_small)
139+
runner.bench_func("merge_row_batch[all-merge-medium]", _bench_all_merge_medium)
140+
runner.bench_func("merge_row_batch[mixed-medium]", _bench_mixed_medium)
141+
142+
runner.bench_func("batch_items[count-limit]", _bench_batch_items_count_limit)
143+
runner.bench_func("batch_items[byte-limit]", _bench_batch_items_byte_limit)
144+
runner.bench_func("batch_items[both-limits]", _bench_batch_items_both_limits)
145+
146+
147+
if __name__ == "__main__":
148+
main()
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
"""Benchmarks for SpanComponentsV3 and SpanComponentsV4 encode/decode.
2+
3+
These are on the hot path: every span serializes/deserializes parent context.
4+
"""
5+
6+
import pathlib
7+
import secrets
8+
import sys
9+
import uuid
10+
11+
import pyperf
12+
13+
14+
if __package__ in (None, ""):
15+
sys.path.insert(0, str(pathlib.Path(__file__).resolve().parents[2]))
16+
17+
from braintrust.span_identifier_v3 import SpanComponentsV3, SpanObjectTypeV3
18+
from braintrust.span_identifier_v4 import SpanComponentsV4
19+
20+
from benchmarks._utils import disable_pyperf_psutil
21+
22+
23+
def main(runner: pyperf.Runner | None = None) -> None:
24+
if runner is None:
25+
disable_pyperf_psutil()
26+
runner = pyperf.Runner()
27+
28+
# V3 — UUID-based IDs
29+
v3_obj_only = SpanComponentsV3(
30+
object_type=SpanObjectTypeV3.PROJECT_LOGS,
31+
object_id=str(uuid.uuid4()),
32+
)
33+
v3_full = SpanComponentsV3(
34+
object_type=SpanObjectTypeV3.EXPERIMENT,
35+
object_id=str(uuid.uuid4()),
36+
row_id=str(uuid.uuid4()),
37+
span_id=str(uuid.uuid4()),
38+
root_span_id=str(uuid.uuid4()),
39+
)
40+
v3_obj_only_str = v3_obj_only.to_str()
41+
v3_full_str = v3_full.to_str()
42+
43+
runner.bench_func("span_components.v3.to_str[object-only]", v3_obj_only.to_str)
44+
runner.bench_func("span_components.v3.to_str[full-uuid]", v3_full.to_str)
45+
runner.bench_func("span_components.v3.from_str[object-only]", SpanComponentsV3.from_str, v3_obj_only_str)
46+
runner.bench_func("span_components.v3.from_str[full-uuid]", SpanComponentsV3.from_str, v3_full_str)
47+
48+
# V4 — OTEL hex IDs for span_id (8-byte) and root_span_id (16-byte)
49+
v4_obj_only = SpanComponentsV4(
50+
object_type=SpanObjectTypeV3.PROJECT_LOGS,
51+
object_id=str(uuid.uuid4()),
52+
)
53+
v4_full_otel = SpanComponentsV4(
54+
object_type=SpanObjectTypeV3.EXPERIMENT,
55+
object_id=str(uuid.uuid4()),
56+
row_id=str(uuid.uuid4()),
57+
span_id=secrets.token_hex(8),
58+
root_span_id=secrets.token_hex(16),
59+
)
60+
v4_obj_only_str = v4_obj_only.to_str()
61+
v4_full_otel_str = v4_full_otel.to_str()
62+
63+
runner.bench_func("span_components.v4.to_str[object-only]", v4_obj_only.to_str)
64+
runner.bench_func("span_components.v4.to_str[full-otel]", v4_full_otel.to_str)
65+
runner.bench_func("span_components.v4.from_str[object-only]", SpanComponentsV4.from_str, v4_obj_only_str)
66+
runner.bench_func("span_components.v4.from_str[full-otel]", SpanComponentsV4.from_str, v4_full_otel_str)
67+
68+
# Cross-version: V4 decoder reading a V3-encoded string (backwards-compat path)
69+
runner.bench_func("span_components.v4.from_str[v3-encoded]", SpanComponentsV4.from_str, v3_full_str)
70+
71+
72+
if __name__ == "__main__":
73+
main()

0 commit comments

Comments
 (0)