Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
d052af7
FE-864: Plan orchestrator brownfield enhancements
kostandinang Jun 15, 2026
a5b1240
FE-864: Add agent-extension-host contract (dual-mode pi harness)
kostandinang Jun 15, 2026
bc0bf39
FE-864: Base the Arc-1 linear stack on agent-extension-host
kostandinang Jun 15, 2026
dc0a67f
FE-864: Reconcile brunch-detect + harness-dep-install scope with code
kostandinang Jun 15, 2026
7c07076
FE-864: Correct CLI surface — commands are plan/cook/serve, brigade n…
kostandinang Jun 16, 2026
7e4879a
FE-879: lazy per-slice cook worktrees + shared node_modules
kostandinang Jun 16, 2026
688a4ba
FE-879: register cook-worktree-laziness in PLAN.md
kostandinang Jun 16, 2026
f30ae2e
FE-879: fail loudly when a slice id collides with a parent entry
kostandinang Jun 17, 2026
e48f383
FE-867: agent-extension-host mode-neutral contract (slice 1)
kostandinang Jun 15, 2026
7f88926
FE-867: tighten agent-extension-host neutrality & witness proofs
kostandinang Jun 15, 2026
7d156f6
FE-871: brunch toolchain detection — detectProfile (slice 1)
kostandinang Jun 15, 2026
de58a46
FE-871: wire brownfield toolchain detection into plan emission
kostandinang Jun 15, 2026
4b89fec
FE-864: frame app-runtime-probe + integration-oracle as harness-owned…
kostandinang Jun 15, 2026
c0ac108
FE-871: fail loud on ambiguous evidence; drop per-stack detection bra…
kostandinang Jun 16, 2026
61deca9
FE-871: co-locate generated tests in the repo's own test dir (slice 3)
kostandinang Jun 16, 2026
bbf3411
FE-871: monorepo-robust test-dir + workspace runner detection (slice 4)
kostandinang Jun 16, 2026
68cf310
FE-871: detect root-level test layouts
kostandinang Jun 17, 2026
7c08f72
FE-872: classify test-run failures as infra vs test (slice 1)
kostandinang Jun 15, 2026
0457242
FE-872: name the toolchain cause in infra-failure halt reason (slice 2)
kostandinang Jun 15, 2026
2b14efc
FE-872: pin greenfield dep manifest/lockfile capture on promotion (sl…
kostandinang Jun 15, 2026
6ec39d7
FE-864: record dogfood-spike verdict (brownfield cook end-to-end)
kostandinang Jun 16, 2026
09aebfb
FE-872: unify test execution on one runner + verification seam (slice 4)
kostandinang Jun 16, 2026
abba593
FE-872: classify only missing runner spawn errors as infra
kostandinang Jun 16, 2026
e2e2800
FE-872: avoid overclaiming infra halt details
kostandinang Jun 16, 2026
6df14ba
FE-875: app runtime probe — boot + HTTP probe + reachability classifi…
kostandinang Jun 16, 2026
1e6535f
FE-875: harness-owned ProbeSpec resolution — buildProbeSpec allocates…
kostandinang Jun 16, 2026
8ced0ac
FE-875: bound the app probe's HTTP calls so a hung app can't hang the…
kostandinang Jun 16, 2026
4706dd8
FE-875: keep app probe deadlines strict
kostandinang Jun 16, 2026
2310eac
FE-876: integration oracle Half A — fold runProbe reachability into t…
kostandinang Jun 16, 2026
07c2419
FE-876: integration oracle Half B seam — reachability intent + inject…
kostandinang Jun 16, 2026
acc5610
FE-877: brownfield promotion — commit the cook result onto cook/<runI…
kostandinang Jun 16, 2026
1b4b694
FE-878: brunch serve — one-shot plan-then-cook capstone (closes Arc 1)
kostandinang Jun 16, 2026
98933bf
FE-878: thread launch cwd into brunch serve's cook stage
kostandinang Jun 16, 2026
d7041e6
FE-864: reconcile SPEC invariant drift from the Arc-1 stack
kostandinang Jun 16, 2026
c396280
FE-878: extract shared completed-spec gate for plan/serve CLI
kostandinang Jun 16, 2026
b11b140
FE-878: extract CLI presentation seam; migrate plan surface (slice 1a)
kostandinang Jun 16, 2026
4085b84
FE-878: migrate cook surface to the presentation seam (slice 1b)
kostandinang Jun 16, 2026
f2bcb7e
FE-878: Ink TUI presenter — egg logo + brigade tracker (slice 2a)
kostandinang Jun 16, 2026
161e20e
FE-878: live waiting-state — pending panel + wait brackets (slice 2b)
kostandinang Jun 16, 2026
20be019
FE-878: own the bus lifecycle so the TUI tears down (ln-review #1)
kostandinang Jun 16, 2026
65cfe3e
FE-878: golden-test the cook banner + summary line strings (ln-review…
kostandinang Jun 16, 2026
d508316
FE-878: brunch wordmark header in brand gradient; revert brigade to m…
kostandinang Jun 16, 2026
6e86878
FE-878: calm the pending-panel timer to whole seconds
kostandinang Jun 16, 2026
50d8b5f
FE-878: align serve flag handling with cook
kostandinang Jun 16, 2026
b44eaa1
FE-878: resolve brownfield orchestration review comments
kostandinang Jun 19, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
185 changes: 183 additions & 2 deletions memory/PLAN.md

Large diffs are not rendered by default.

15 changes: 11 additions & 4 deletions memory/SPEC.md

Large diffs are not rendered by default.

537 changes: 534 additions & 3 deletions package-lock.json

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@
"drizzle-orm": "^0.45.2",
"embla-carousel-react": "^8.6.0",
"express": "^5.2.1",
"ink": "^7.0.6",
"lucide-react": "^1.8.0",
"md-pen": "^1.2.0",
"motion": "^12.38.0",
Expand Down Expand Up @@ -118,6 +119,7 @@
"code-inspector-plugin": "^1.5.1",
"drizzle-kit": "^0.31.10",
"happy-dom": "^20.8.9",
"ink-testing-library": "^4.0.0",
"oxfmt": "^0.43.0",
"oxlint": "^1.58.0",
"oxlint-tsgolint": "^0.19.0",
Expand Down
144 changes: 144 additions & 0 deletions src/agent-extension-host.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import { readFileSync } from 'node:fs';
import { dirname, join } from 'node:path';
import { fileURLToPath } from 'node:url';

import { describe, expect, it } from 'vitest';

import { type AgentExtensionConsumerWitness, flattenCapabilityIds } from './agent-extension-host.js';
import { createPiActions } from './orchestrator/src/pi-actions.js';
import type { InterviewerTools } from './server/interview.js';
import { createExplorationTools } from './server/tools/index.js';

const here = dirname(fileURLToPath(import.meta.url));

// The cook (`execute`) consumer, described as host plugins — one cook action per
// capability. Proven below against the real `createPiActions()` surface.
const cookWitness = {
consumerId: 'cook',
mode: 'execute',
plugins: [
{
id: 'execute.evaluate-done',
mode: 'execute',
capabilities: [
{
id: 'evaluate-done',
summary: 'Decide a slice is done by running its verification targets.',
handler: null,
},
],
},
{
id: 'execute.write-tests',
mode: 'execute',
capabilities: [{ id: 'write-tests', summary: 'Write failing tests for a slice.', handler: null }],
},
{
id: 'execute.write-code',
mode: 'execute',
capabilities: [{ id: 'write-code', summary: 'Write code to make a slice pass.', handler: null }],
},
{
id: 'execute.assess-semantic',
mode: 'execute',
capabilities: [
{ id: 'assess-semantic', summary: 'Assess semantic satisfaction of a slice.', handler: null },
],
},
{
id: 'execute.verify-epic',
mode: 'execute',
capabilities: [{ id: 'verify-epic', summary: 'Write + run an epic integration test.', handler: null }],
},
],
} as const satisfies AgentExtensionConsumerWitness;

// The interview (`elicit`) consumer as the neutrality WITNESS. The interview keeps
// its own runtime (Vercel AI SDK); this only proves its capability surface fits
// the same host contract. `as const` preserves the literal ids for the type-level
// coverage proof below.
const interviewWitness = {
consumerId: 'interview',
mode: 'elicit',
plugins: [
{
id: 'elicit.ask-question',
mode: 'elicit',
capabilities: [{ id: 'ask_question', summary: 'Ask the user a structured question.', handler: null }],
},
{
id: 'elicit.preface',
mode: 'elicit',
capabilities: [
{ id: 'present_preface', summary: 'Present a provisional context preface.', handler: null },
],
},
{
id: 'elicit.phase-closure',
mode: 'elicit',
capabilities: [
{ id: 'propose_phase_closure', summary: 'Propose closing the current phase.', handler: null },
],
},
{
id: 'elicit.workspace-exploration',
mode: 'elicit',
capabilities: [
{ id: 'read_file', summary: 'Read a workspace file.', handler: null },
{ id: 'grep', summary: 'Search workspace file contents.', handler: null },
{ id: 'find_files', summary: 'Find workspace files.', handler: null },
{ id: 'list_directory', summary: 'List a workspace directory.', handler: null },
],
},
],
} as const satisfies AgentExtensionConsumerWitness;

describe('agent-extension-host contract is a mode-neutral core', () => {
it('the contract module is dependency-free, which is what keeps it mode-neutral', () => {
const src = readFileSync(join(here, 'agent-extension-host.ts'), 'utf8');
// No imports is the load-bearing guarantee: a module that imports nothing
// cannot reference an `execute`-only type (Slice/Epic/Plan/Toolchain/worktree…)
// or an SDK type. That makes neutrality structural rather than a denylist of
// names we have to remember to update.
expect(src).not.toMatch(/^\s*import[\s{*]/m);
});

it('a consumer witness only loads plugins of its own mode (per-mode registration)', () => {
for (const witness of [cookWitness, interviewWitness]) {
for (const plugin of witness.plugins) {
expect(plugin.mode).toBe(witness.mode);
}
}
});
});

describe('two-consumer proof — both real surfaces fit the host contract', () => {
it('the cook execute surface matches the registered capabilities exactly', () => {
const registered = new Set(flattenCapabilityIds(cookWitness));
const actual = new Set(Object.keys(createPiActions()));
expect(registered).toEqual(actual);
});

it('the interview exploration plugin matches the real tool surface exactly', () => {
// `createExplorationTools` is DB-free, so this family is proven bidirectionally
// against live code: the witness may neither omit a real tool nor invent a
// phantom one. The three native interviewer tools (ask_question /
// present_preface / propose_phase_closure) can't be checked this way —
// constructing them needs a live DB — so their coverage is type-level only
// (the `keyof InterviewerTools` assertion below), which is superset-only: it
// proves the witness omits no real tool, not that it invents none.
const explorationPlugin = interviewWitness.plugins.find((p) => p.id === 'elicit.workspace-exploration');
const witnessed = new Set(explorationPlugin?.capabilities.map((c) => c.id));
const actual = new Set(Object.keys(createExplorationTools(here)));
expect(witnessed).toEqual(actual);
});

it('the interview witness covers every interviewer tool id (type-enforced under lint --type-check)', () => {
type ElicitCapabilityId = (typeof interviewWitness.plugins)[number]['capabilities'][number]['id'];
// If the interview adds a tool not represented in the witness, `Covered`
// becomes `false` and this assignment fails the type-aware lint gate.
type Covered = keyof InterviewerTools extends ElicitCapabilityId ? true : false;
const covered: Covered = true;
expect(covered).toBe(true);
});
});
58 changes: 58 additions & 0 deletions src/agent-extension-host.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// Agent extension host — the mode-neutral contract (FE-867).
//
// The pi harness is reused across two jobs: driving specification (`elicit`)
// and driving cook (`execute`). Rather than two harnesses, treat it as one
// dual-mode *agent-extension host*: a mode-agnostic core that consumers extend
// by registering capabilities as per-mode plugins. Modes differ only by which
// plugins they load.
//
// This module is the serialization point with the parallel pi-harness work that
// owns the core *implementation*. It deliberately defines only transport-safe
// contract metadata — no session lifecycle, no stream/dispatch runtime, no SDK
// types — so it stays neutral across both consumers (cook via the pi SDK, the
// interview via the Vercel AI SDK) and across whichever runtime lands later.
//
// Invariant (checkable): this file has no imports and names no `execute`-only
// concept (slice / epic / plan / worktree / test-runner / toolchain). If it did,
// it would no longer be a mode-neutral core. See agent-extension-host.test.ts.

/** The two ways the shared agent-extension host is driven. */
export type AgentExtensionMode = 'elicit' | 'execute';

/**
* Transport-safe descriptor of one capability a consumer registers against the
* host. Mirrors `capability-registry.ts`: metadata only — the executable handler
* lives behind the host's dispatch, so this contract never owns runtime semantics.
*/
export interface AgentExtensionCapabilityContract {
id: string;
summary: string;
handler: null;
}

/**
* A plugin is the unit of per-mode registration: a named bundle of capabilities
* loaded into one mode. "Modes differ only by which plugins they load" is exactly
* this — `execute` loads the cook plugins, `elicit` loads the interview plugins.
*/
export interface AgentExtensionPluginContract {
id: string;
mode: AgentExtensionMode;
capabilities: readonly AgentExtensionCapabilityContract[];
}

/**
* A consumer (e.g. cook, the interview) described as the set of plugins it loads
* into a single mode. Used to prove a real consumer fits the host contract
* without migrating its runtime — the "witness" of mode-neutrality.
*/
export interface AgentExtensionConsumerWitness {
consumerId: string;
mode: AgentExtensionMode;
plugins: readonly AgentExtensionPluginContract[];
}

/** Enumerate the capability ids a consumer registers — the host's dispatch keys. */
export function flattenCapabilityIds(witness: AgentExtensionConsumerWitness): string[] {
return witness.plugins.flatMap((plugin) => plugin.capabilities.map((capability) => capability.id));
}
170 changes: 170 additions & 0 deletions src/orchestrator/src/app-probe.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
// The probe boots a *real* app process in a tmp worktree and exercises it over
// the wire — no mocks — so these tests pin the actual boot/ready/probe/teardown
// behavior the orphan check depends on. Apps are zero-dep `node:http` scripts.

import { mkdtempSync, rmSync, writeFileSync } from 'node:fs';
import { tmpdir } from 'node:os';
import { join } from 'node:path';

import { afterEach, describe, expect, it } from 'vitest';

import { buildProbeSpec, runProbe } from './app-probe.js';
import type { ProbeSpec } from './types.js';

const dirs: string[] = [];

afterEach(() => {
for (const dir of dirs.splice(0)) rmSync(dir, { recursive: true, force: true });
});

function sandbox(serverSource: string): string {
const dir = mkdtempSync(join(tmpdir(), 'app-probe-'));
dirs.push(dir);
writeFileSync(join(dir, 'server.js'), serverSource);
return dir;
}

/** An app that answers `routes` (path → status); everything else is 404. */
const appServing = (routes: Record<string, number>): string =>
`const http = require('node:http');\n` +
`const routes = ${JSON.stringify(routes)};\n` +
`http.createServer((req, res) => {\n` +
` const status = routes[req.url] ?? 404;\n` +
` res.writeHead(status); res.end(String(status));\n` +
`}).listen(Number(process.env.PORT), '127.0.0.1');\n`;

// Dogfoods the harness-owned spec builder: the test supplies only argv + paths,
// `buildProbeSpec` allocates the port and assembles the URLs the app boots on.
async function specFor(routes: Record<string, number>): Promise<{ spec: ProbeSpec; dir: string }> {
const spec = await buildProbeSpec({
boot: ['node', 'server.js'],
readyPath: '/health',
featurePath: '/feature',
});
return { dir: sandbox(appServing(routes)), spec };
}

describe('runProbe classifies real app reachability', () => {
it('an app whose feature endpoint answers 2xx → reachable', async () => {
const { spec, dir } = await specFor({ '/health': 200, '/feature': 200 });
const result = await runProbe(spec, dir);
expect(result.kind).toBe('reachable');
expect(result.reachable).toBe(true);
expect(result.status).toBe(200);
});

it('an app that boots but 404s the feature endpoint → not-reachable (the orphan)', async () => {
// Feature module present-but-unwired replays as: server up, route absent.
const { spec, dir } = await specFor({ '/health': 200 });
const result = await runProbe(spec, dir);
expect(result.kind).toBe('not-reachable');
expect(result.reachable).toBe(false);
expect(result.status).toBe(404);
});

it('a boot command that exits immediately → infra (distinct from not-reachable)', async () => {
const dir = sandbox('process.exit(1);\n');
const result = await runProbe(
{ boot: ['node', 'server.js'], readyUrl: 'http://127.0.0.1:1/x', featureUrl: 'http://127.0.0.1:1/x' },
dir,
);
expect(result.kind).toBe('infra');
expect(result.reachable).toBe(false);
});

it('a missing boot binary → infra, not a crash', async () => {
const dir = sandbox(appServing({ '/health': 200 }));
const started = Date.now();
const result = await runProbe(
{
boot: ['definitely-not-a-real-binary-xyz'],
readyUrl: 'http://127.0.0.1:1/x',
featureUrl: 'http://127.0.0.1:1/x',
},
dir,
);
expect(result.kind).toBe('infra');
expect(Date.now() - started).toBeLessThan(1_000);
});
});

describe('runProbe bounds its HTTP calls so a hung app cannot hang the probe', () => {
// A server that accepts connections (and the HTTP request) but never sends a
// response — the case the wall-clock deadline alone can't catch, because a
// bare `await fetch` would block forever between deadline checks.
const neverResponds = (readyRoutes: Record<string, number> = {}): string =>
`const http = require('node:http');\n` +
`const ready = ${JSON.stringify(readyRoutes)};\n` +
`http.createServer((req, res) => {\n` +
` if (ready[req.url] !== undefined) { res.writeHead(ready[req.url]); res.end('ok'); return; }\n` +
` /* otherwise: never respond */\n` +
`}).listen(Number(process.env.PORT), '127.0.0.1');\n`;

it('a ready path that accepts connections but never responds → infra within the deadline', async () => {
const spec = await buildProbeSpec({
boot: ['node', 'server.js'],
readyPath: '/health',
featurePath: '/feature',
});
const dir = sandbox(neverResponds());
const started = Date.now();
const result = await runProbe(spec, dir, { readyTimeoutMs: 600, readyAttemptMs: 2_000 });
expect(result.kind).toBe('infra');
expect(Date.now() - started).toBeLessThan(1_200);
});

it('a booted app whose feature endpoint never responds → infra, not a hang', async () => {
const spec = await buildProbeSpec({
boot: ['node', 'server.js'],
readyPath: '/health',
featurePath: '/feature',
});
const dir = sandbox(neverResponds({ '/health': 200 }));
const result = await runProbe(spec, dir, { requestTimeoutMs: 300 });
expect(result.kind).toBe('infra');
expect(result.output).toMatch(/feature probe request failed/);
});
});

describe('runProbe tears the boot process down', () => {
it('the booted app is no longer listening after the probe returns', async () => {
const { spec, dir } = await specFor({ '/health': 200, '/feature': 200 });
await runProbe(spec, dir);
// The port the app bound should be free again — nothing left listening.
await expect(fetch(spec.featureUrl)).rejects.toThrow();
});
});

describe('buildProbeSpec resolves a target into a runnable spec', () => {
it('allocates a port and assembles ready/feature URLs from the paths', async () => {
const spec = await buildProbeSpec({
boot: ['node', 'server.js'],
readyPath: '/health',
featurePath: '/feature',
});
const port = Number(spec.env?.PORT);
expect(port).toBeGreaterThan(0);
expect(spec.readyUrl).toBe(`http://127.0.0.1:${port}/health`);
expect(spec.featureUrl).toBe(`http://127.0.0.1:${port}/feature`);
expect(spec.boot).toEqual(['node', 'server.js']);
});

it('layers caller env under the allocated PORT so PORT always wins', async () => {
const spec = await buildProbeSpec({
boot: ['node', 'server.js'],
readyPath: '/',
featurePath: '/',
env: { NODE_ENV: 'test', PORT: '1' },
});
expect(spec.env?.NODE_ENV).toBe('test');
expect(Number(spec.env?.PORT)).toBeGreaterThan(1);
});

it('hands out distinct ports across concurrent allocations', async () => {
const specs = await Promise.all(
Array.from({ length: 8 }, () => buildProbeSpec({ boot: ['x'], readyPath: '/', featurePath: '/' })),
);
const ports = specs.map((s) => Number(s.env?.PORT));
expect(new Set(ports).size).toBe(ports.length);
});
});
Loading
Loading