Skip to content

Commit 5d76276

Browse files
committed
feat: add optimization for duration
1 parent 288336e commit 5d76276

3 files changed

Lines changed: 731 additions & 0 deletions

File tree

packages/optimization/src/ldai_optimization/client.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
OptimizationResultPayload,
3434
)
3535
from ldai_optimization.prompts import (
36+
_acceptance_criteria_implies_duration_optimization,
3637
build_message_history_text,
3738
build_new_variation_prompt,
3839
build_reasoning_history,
@@ -80,6 +81,12 @@ def _compute_validation_count(pool_size: int) -> int:
8081
# the variation step is treated as a failure.
8182
_MAX_VARIATION_RETRIES = 3
8283

84+
# Duration gate: a candidate must be at least this much faster than the baseline
85+
# (history[0].duration_ms) to pass the duration check when acceptance criteria
86+
# imply a latency optimization goal. 0.80 means the candidate must clock in at
87+
# under 80% of the baseline — i.e. at least 20% improvement.
88+
_DURATION_TOLERANCE = 0.80
89+
8390
# Maps SDK status strings to the API status/activity values expected by
8491
# agent_optimization_result records. Defined at module level to avoid
8592
# allocating the dict on every on_status_update invocation.
@@ -328,6 +335,7 @@ async def _call_judges(
328335
variables: Optional[Dict[str, Any]] = None,
329336
agent_tools: Optional[List[ToolDefinition]] = None,
330337
expected_response: Optional[str] = None,
338+
agent_duration_ms: Optional[float] = None,
331339
) -> Dict[str, JudgeResult]:
332340
"""
333341
Call all judges in parallel (auto-path).
@@ -344,6 +352,9 @@ async def _call_judges(
344352
:param agent_tools: Normalised list of tool dicts that were available to the agent
345353
:param expected_response: Optional ground truth expected response. When provided,
346354
judges are instructed to factor it into their scoring alongside acceptance criteria.
355+
:param agent_duration_ms: Wall-clock duration of the agent call in milliseconds.
356+
Forwarded to acceptance judges whose statement implies a latency goal so they
357+
can mention the duration change in their rationale.
347358
:return: Dictionary of judge results (score and rationale)
348359
"""
349360
if not self._options.judges:
@@ -396,6 +407,7 @@ async def _call_judges(
396407
variables=resolved_variables,
397408
agent_tools=resolved_agent_tools,
398409
expected_response=expected_response,
410+
agent_duration_ms=agent_duration_ms,
399411
)
400412
judge_results[judge_key] = result
401413

@@ -613,6 +625,7 @@ async def _evaluate_acceptance_judge(
613625
variables: Optional[Dict[str, Any]] = None,
614626
agent_tools: Optional[List[ToolDefinition]] = None,
615627
expected_response: Optional[str] = None,
628+
agent_duration_ms: Optional[float] = None,
616629
) -> JudgeResult:
617630
"""
618631
Evaluate using an acceptance statement judge.
@@ -627,6 +640,9 @@ async def _evaluate_acceptance_judge(
627640
:param agent_tools: Normalised list of tool dicts that were available to the agent
628641
:param expected_response: Optional ground truth expected response. When provided,
629642
injected into instructions and judge message so the judge can score actual vs. expected.
643+
:param agent_duration_ms: Wall-clock duration of the agent call in milliseconds.
644+
When the acceptance statement implies a latency goal, the judge is instructed
645+
to mention the duration change in its rationale.
630646
:return: The judge result with score and rationale
631647
"""
632648
if not optimization_judge.acceptance_statement:
@@ -662,6 +678,32 @@ async def _evaluate_acceptance_judge(
662678
'Example: {"score": 0.8, "rationale": "The response matches the acceptance statement well."}'
663679
)
664680

681+
if (
682+
agent_duration_ms is not None
683+
and _acceptance_criteria_implies_duration_optimization(
684+
{judge_key: optimization_judge}
685+
)
686+
):
687+
baseline_ms = (
688+
self._history[0].duration_ms
689+
if self._history and self._history[0].duration_ms is not None
690+
else None
691+
)
692+
instructions += (
693+
f"\n\nThe acceptance criteria for this judge includes a latency/duration goal. "
694+
f"The agent's response took {agent_duration_ms:.0f}ms to generate. "
695+
)
696+
if baseline_ms is not None:
697+
delta_ms = agent_duration_ms - baseline_ms
698+
direction = "faster" if delta_ms < 0 else "slower"
699+
instructions += (
700+
f"The baseline duration (first iteration) was {baseline_ms:.0f}ms. "
701+
f"This response was {abs(delta_ms):.0f}ms {direction} than the baseline. "
702+
)
703+
instructions += (
704+
"Please mention the duration and any change from baseline in your rationale."
705+
)
706+
665707
if resolved_variables:
666708
instructions += f"\n\nThe following variables were available to the agent: {json.dumps(resolved_variables)}"
667709

@@ -911,6 +953,11 @@ async def _run_ground_truth_optimization(
911953
else:
912954
sample_passed = self._evaluate_response(optimize_context)
913955

956+
if sample_passed and _acceptance_criteria_implies_duration_optimization(
957+
self._options.judges
958+
):
959+
sample_passed = self._evaluate_duration(optimize_context)
960+
914961
if not sample_passed:
915962
logger.info(
916963
"[GT Attempt %d] -> Sample %d/%d FAILED",
@@ -1147,6 +1194,9 @@ async def _generate_new_variation(
11471194
)
11481195
self._safe_status_update("generating variation", status_ctx, iteration)
11491196

1197+
optimize_for_duration = _acceptance_criteria_implies_duration_optimization(
1198+
self._options.judges
1199+
)
11501200
instructions = build_new_variation_prompt(
11511201
self._history,
11521202
self._options.judges,
@@ -1156,6 +1206,7 @@ async def _generate_new_variation(
11561206
self._options.model_choices,
11571207
self._options.variable_choices,
11581208
self._initial_instructions,
1209+
optimize_for_duration=optimize_for_duration,
11591210
)
11601211

11611212
# Create a flat history list (without nested history) to avoid exponential growth
@@ -1486,6 +1537,7 @@ async def _execute_agent_turn(
14861537
variables=optimize_context.current_variables,
14871538
agent_tools=agent_tools,
14881539
expected_response=expected_response,
1540+
agent_duration_ms=agent_duration_ms,
14891541
)
14901542

14911543
return dataclasses.replace(
@@ -1523,6 +1575,38 @@ def _evaluate_response(self, optimize_context: OptimizationContext) -> bool:
15231575

15241576
return True
15251577

1578+
def _evaluate_duration(self, optimize_context: OptimizationContext) -> bool:
1579+
"""
1580+
Check whether the candidate's duration meets the improvement target vs. the baseline.
1581+
1582+
The baseline is history[0].duration_ms — the very first completed iteration,
1583+
representing the original unoptimized configuration's latency. The candidate
1584+
must be at least _DURATION_TOLERANCE faster (default: 20% improvement).
1585+
1586+
Returns True without blocking when no baseline is available (empty history or
1587+
history[0].duration_ms is None), or when the candidate's duration_ms was not
1588+
captured. This avoids penalising configurations when timing data is missing.
1589+
1590+
:param optimize_context: The completed turn context containing duration_ms
1591+
:return: True if the duration requirement is met or cannot be checked
1592+
"""
1593+
if not self._history or self._history[0].duration_ms is None:
1594+
return True
1595+
if optimize_context.duration_ms is None:
1596+
return True
1597+
baseline = self._history[0].duration_ms
1598+
passed = optimize_context.duration_ms < baseline * _DURATION_TOLERANCE
1599+
if not passed:
1600+
logger.warning(
1601+
"[Iteration %d] -> Duration check failed: %.0fms >= baseline %.0fms * %.0f%% (%.0fms)",
1602+
optimize_context.iteration,
1603+
optimize_context.duration_ms,
1604+
baseline,
1605+
_DURATION_TOLERANCE * 100,
1606+
baseline * _DURATION_TOLERANCE,
1607+
)
1608+
return passed
1609+
15261610
def _handle_success(
15271611
self, optimize_context: OptimizationContext, iteration: int
15281612
) -> Any:
@@ -1691,6 +1775,11 @@ async def _run_validation_phase(
16911775
else:
16921776
sample_passed = self._evaluate_response(val_ctx)
16931777

1778+
if sample_passed and _acceptance_criteria_implies_duration_optimization(
1779+
self._options.judges
1780+
):
1781+
sample_passed = self._evaluate_duration(val_ctx)
1782+
16941783
last_ctx = val_ctx
16951784

16961785
if not sample_passed:
@@ -1798,6 +1887,11 @@ async def _run_optimization(
17981887
iteration,
17991888
)
18001889

1890+
if initial_passed and _acceptance_criteria_implies_duration_optimization(
1891+
self._options.judges
1892+
):
1893+
initial_passed = self._evaluate_duration(optimize_context)
1894+
18011895
if initial_passed:
18021896
all_valid, last_ctx = await self._run_validation_phase(
18031897
optimize_context, iteration

packages/optimization/src/ldai_optimization/prompts.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,42 @@
11
"""Prompt-building functions for LaunchDarkly AI optimization."""
22

3+
import re
34
from typing import Any, Dict, List, Optional
45

56
from ldai_optimization.dataclasses import (
67
OptimizationContext,
78
OptimizationJudge,
89
)
910

11+
_DURATION_KEYWORDS = re.compile(
12+
r"\b(fast|faster|quickly|quick|latency|low-latency|duration|response\s+time|"
13+
r"time\s+to\s+respond|milliseconds|performant|snappy|efficient|seconds)\b|"
14+
r"(?<![a-zA-Z])ms\b",
15+
re.IGNORECASE,
16+
)
17+
18+
19+
def _acceptance_criteria_implies_duration_optimization(
20+
judges: Optional[Dict[str, OptimizationJudge]],
21+
) -> bool:
22+
"""Return True if any judge acceptance statement implies a latency optimization goal.
23+
24+
Scans each judge's acceptance_statement for latency-related keywords. The
25+
check is case-insensitive. Returns False when judges is None or no judge
26+
carries an acceptance statement.
27+
28+
:param judges: Judge configuration dict from OptimizationOptions, or None.
29+
:return: True if duration optimization should be applied.
30+
"""
31+
if not judges:
32+
return False
33+
for judge in judges.values():
34+
if judge.acceptance_statement and _DURATION_KEYWORDS.search(
35+
judge.acceptance_statement
36+
):
37+
return True
38+
return False
39+
1040

1141
def build_message_history_text(
1242
history: List[OptimizationContext],
@@ -82,6 +112,7 @@ def build_new_variation_prompt(
82112
model_choices: List[str],
83113
variable_choices: List[Dict[str, Any]],
84114
initial_instructions: str,
115+
optimize_for_duration: bool = False,
85116
) -> str:
86117
"""
87118
Build the LLM prompt for generating an improved agent configuration.
@@ -99,6 +130,8 @@ def build_new_variation_prompt(
99130
:param model_choices: List of model IDs the LLM may select from
100131
:param variable_choices: List of variable dicts (used to derive placeholder names)
101132
:param initial_instructions: The original unmodified instructions template
133+
:param optimize_for_duration: When True, appends a duration optimization section
134+
instructing the LLM to prefer faster models and simpler instructions.
102135
:return: The assembled prompt string
103136
"""
104137
sections = [
@@ -112,6 +145,7 @@ def build_new_variation_prompt(
112145
variation_prompt_improvement_instructions(
113146
history, model_choices, variable_choices, initial_instructions
114147
),
148+
variation_prompt_duration_optimization(model_choices) if optimize_for_duration else "",
115149
]
116150

117151
return "\n\n".join(s for s in sections if s)
@@ -211,6 +245,8 @@ def variation_prompt_configuration(
211245
if previous_ctx.user_input:
212246
lines.append(f"User question: {previous_ctx.user_input}")
213247
lines.append(f"Agent response: {previous_ctx.completion_response}")
248+
if previous_ctx.duration_ms is not None:
249+
lines.append(f"Agent duration: {previous_ctx.duration_ms:.0f}ms")
214250
return "\n".join(lines)
215251
else:
216252
return "\n".join(
@@ -262,6 +298,8 @@ def variation_prompt_feedback(
262298
if result.rationale:
263299
feedback_line += f"\n Reasoning: {result.rationale}"
264300
lines.append(feedback_line)
301+
if ctx.duration_ms is not None:
302+
lines.append(f"Agent duration: {ctx.duration_ms:.0f}ms")
265303
return "\n".join(lines)
266304

267305

@@ -487,3 +525,33 @@ def variation_prompt_improvement_instructions(
487525
parameters_instructions,
488526
]
489527
)
528+
529+
530+
def variation_prompt_duration_optimization(model_choices: List[str]) -> str:
531+
"""
532+
Duration optimization section of the variation prompt.
533+
534+
Included when acceptance criteria imply a latency reduction goal. Instructs
535+
the LLM to treat response speed as a secondary objective — quality criteria
536+
must still be met first — and provides concrete guidance on how to reduce
537+
latency through model selection and instruction simplification.
538+
539+
:param model_choices: List of model IDs the LLM may select from, so it can
540+
apply its own knowledge of which models tend to be faster.
541+
:return: The duration optimization prompt block.
542+
"""
543+
return "\n".join(
544+
[
545+
"## Duration Optimization:",
546+
"The acceptance criteria for this optimization implies that response latency should be reduced.",
547+
"In addition to improving quality, generate a variation that aims to reduce the agent's response time.",
548+
"You may:",
549+
"- Select a faster model from the available choices if quality requirements can still be met.",
550+
f" Available models: {model_choices}",
551+
" Use your knowledge of these models to prefer those that are known to respond more quickly.",
552+
"- Simplify or shorten the instructions where this does not compromise the acceptance criteria.",
553+
" Shorter prompts reduce input token counts and typically yield faster responses.",
554+
"- Avoid increasing max_tokens or other parameters that extend generation time.",
555+
"Quality criteria remain the primary objective — do not sacrifice passing scores to achieve lower latency.",
556+
]
557+
)

0 commit comments

Comments
 (0)