From b26ad4b0cb7c2a8ab21a3afd81e1387b37d94111 Mon Sep 17 00:00:00 2001 From: Logan Nguyen Date: Wed, 1 Jul 2026 17:48:50 -0500 Subject: [PATCH] fix: cancel in-flight generation when starting a new session Signed-off-by: Logan Nguyen --- src/hooks/__tests__/useModel.test.tsx | 79 +++++++++++++++++++++++++++ src/hooks/useModel.ts | 64 +++++++++++++++------- 2 files changed, 123 insertions(+), 20 deletions(-) diff --git a/src/hooks/__tests__/useModel.test.tsx b/src/hooks/__tests__/useModel.test.tsx index 78767644..a673a6c1 100644 --- a/src/hooks/__tests__/useModel.test.tsx +++ b/src/hooks/__tests__/useModel.test.tsx @@ -901,6 +901,51 @@ describe('useModel', () => { expect.objectContaining({ reason: 'user_reset' }), ); }); + + it('cancels the in-flight backend generation when starting a new session', async () => { + // Stall ask_model so the generation stays active while we reset. + let resolveInvoke!: () => void; + invoke.mockImplementationOnce( + async (_cmd: string, args?: Record) => { + if (args && 'onEvent' in args) { + return new Promise((res) => { + resolveInvoke = res; + }); + } + }, + ); + + const { result } = renderHook(() => useModel('')); + + act(() => { + void result.current.ask('hello'); + }); + expect(result.current.isGenerating).toBe(true); + + await act(async () => { + result.current.reset(); + await Promise.resolve(); + }); + + // A new session must stop the backend stream, not just the frontend + // view - otherwise the old generation holds the engine's single slot + // and the next turn queues behind it. + expect(invoke).toHaveBeenCalledWith('cancel_generation'); + + act(() => { + resolveInvoke?.(); + }); + }); + + it('does not call cancel_generation when reset runs with no active generation', () => { + const { result } = renderHook(() => useModel('')); + + act(() => { + result.current.reset(); + }); + + expect(invoke).not.toHaveBeenCalledWith('cancel_generation'); + }); }); // ─── onTurnComplete callback ───────────────────────────────────────────────── @@ -1125,6 +1170,40 @@ describe('useModel', () => { expect.objectContaining({ reason: 'history_load' }), ); }); + + it('cancels the in-flight backend generation when loading another conversation', async () => { + // Stall ask_model so the generation stays active while we load. + let resolveInvoke!: () => void; + invoke.mockImplementationOnce( + async (_cmd: string, args?: Record) => { + if (args && 'onEvent' in args) { + return new Promise((res) => { + resolveInvoke = res; + }); + } + }, + ); + + const { result } = renderHook(() => useModel('')); + + act(() => { + void result.current.ask('original'); + }); + expect(result.current.isGenerating).toBe(true); + + await act(async () => { + result.current.loadMessages([ + { id: 'l1', role: 'user', content: 'loaded' }, + ]); + await Promise.resolve(); + }); + + expect(invoke).toHaveBeenCalledWith('cancel_generation'); + + act(() => { + resolveInvoke?.(); + }); + }); }); // ─── ThinkingToken handling ────────────────────────────────────────────────── diff --git a/src/hooks/useModel.ts b/src/hooks/useModel.ts index 06ccf2ef..7c96b5fd 100644 --- a/src/hooks/useModel.ts +++ b/src/hooks/useModel.ts @@ -255,6 +255,30 @@ export function useModel( return true; }, []); + /** + * Signals the backend to stop the active generation and tracks the + * in-flight cancel in `pendingCancelRef` so the next `ask()` / + * `askSearch()` awaits the round-trip before starting a new turn. That + * wait is what stops a fresh request from racing the outgoing one onto + * the engine's single decode slot. Idempotent while a cancel is already + * pending, so overlapping callers (double cancel, cancel-then-reset) only + * fire `cancel_generation` once. Returns the pending-cancel promise. + */ + const requestBackendCancel = useCallback((): Promise => { + if (!pendingCancelRef.current) { + pendingCancelRef.current = (async () => { + try { + await invoke('cancel_generation'); + } catch { + // Local hard-abort already reset the UI; backend best-effort only. + } finally { + pendingCancelRef.current = null; + } + })(); + } + return pendingCancelRef.current; + }, []); + /** * Submits a message to the Ollama backend and starts the streaming response. * @@ -717,22 +741,8 @@ export function useModel( } abortActiveGeneration(); - - if (!pendingCancelRef.current) { - const cancelPromise = (async () => { - try { - await invoke('cancel_generation'); - } catch { - // Local hard-abort already reset the UI; backend best-effort only. - } finally { - pendingCancelRef.current = null; - } - })(); - pendingCancelRef.current = cancelPromise; - } - - await pendingCancelRef.current; - }, [abortActiveGeneration, isGenerating]); + await requestBackendCancel(); + }, [abortActiveGeneration, isGenerating, requestBackendCancel]); /** Resets all conversation state for a fresh session. * @@ -746,7 +756,15 @@ export function useModel( * user-visible reset. */ const reset = useCallback(() => { - abortActiveGeneration(); + const hadActiveGeneration = abortActiveGeneration(); + // Starting a fresh session must also stop any in-flight backend stream, + // not just the frontend view abort above. Otherwise the outgoing + // generation keeps the engine's single decode slot and the next turn + // queues behind it. Routed through the same `pendingCancelRef` plumbing + // `cancel()` uses so the next `ask()` awaits the cancel round-trip. + if (hadActiveGeneration) { + void requestBackendCancel(); + } setMessages([]); const outgoingId = traceConversationIdRef.current; if (outgoingId !== null && !isFirstTurnRef.current) { @@ -758,7 +776,7 @@ export function useModel( traceConversationIdRef.current = null; isFirstTurnRef.current = true; void invoke('reset_conversation'); - }, [abortActiveGeneration]); + }, [abortActiveGeneration, requestBackendCancel]); /** Replaces the current message list with a previously loaded set of messages. * @@ -770,7 +788,13 @@ export function useModel( */ const loadMessages = useCallback( (msgs: Message[]) => { - abortActiveGeneration(); + const hadActiveGeneration = abortActiveGeneration(); + // Loading another conversation is a session boundary too: stop the + // in-flight backend stream so it does not hold the engine slot and + // stall the loaded conversation's next turn. Same plumbing as reset(). + if (hadActiveGeneration) { + void requestBackendCancel(); + } const outgoingId = traceConversationIdRef.current; if (outgoingId !== null && !isFirstTurnRef.current) { void invoke('record_conversation_end', { @@ -782,7 +806,7 @@ export function useModel( isFirstTurnRef.current = true; setMessages(msgs); }, - [abortActiveGeneration], + [abortActiveGeneration, requestBackendCancel], ); /**