Only persist assistant history after successful TTS delivery

Move the conversational backend's _finalize() call out of stream()'s finally block and expose it as a public commit(text) method instead. The previous behavior persisted whatever was yielded even when the stream errored, the user interrupted, or a TTS send failed — so saved history diverged from what the user actually heard, and the next turn replayed phantom context to the LLM. main.py now calls backend.commit(response_text) only after a turn successfully completes and produced content. Agentic backend gets a no-op commit() since history lives on the remote service.
2026-04-21 07:37:34 +00:00 · 2026-04-12 19:11:09 -07:00 · 2026-04-12 19:11:09 -07:00 · 44c7be03b8
commit 44c7be03b8
parent 0bc3ae209f
4 changed files with 21 additions and 11 deletions
--- a/pipeline/conversation/agentic.py
+++ b/pipeline/conversation/agentic.py
@ -68,6 +68,9 @@ class AgenticBackend:
            if delta:
                yield delta

+    def commit(self, text: str) -> None:
+        pass  # remote service owns history
+
    def reset(self) -> None:
        pass  # session reset would require an API call if supported

--- a/pipeline/conversation/base.py
+++ b/pipeline/conversation/base.py
@ -11,6 +11,11 @@ class ConversationBackend(Protocol):
        """Send a user message, yield assistant text deltas as they arrive."""
        ...

+    def commit(self, text: str) -> None:
+        """Persist the assistant response after successful delivery. Call
+        only after the turn actually played out. No-op for managed backends."""
+        ...
+
    def reset(self) -> None:
        """Clear conversation history / start a new session."""
        ...
--- a/pipeline/conversation/conversational.py
+++ b/pipeline/conversation/conversational.py
@ -75,17 +75,17 @@ class ConversationalBackend:
        kwargs["stream"] = True
        stream = await self.client.chat.completions.create(**kwargs)

-        parts: list[str] = []
-        try:
-            async for chunk in stream:
-                if not chunk.choices:
-                    continue
-                delta = chunk.choices[0].delta.content or ""
-                if delta:
-                    parts.append(delta)
-                    yield delta
-        finally:
-            self._finalize("".join(parts))
+        async for chunk in stream:
+            if not chunk.choices:
+                continue
+            delta = chunk.choices[0].delta.content or ""
+            if delta:
+                yield delta
+
+    def commit(self, text: str) -> None:
+        """Persist the assistant response to history after successful
+        delivery. The caller joins whatever was actually sent to TTS."""
+        self._finalize(text)

    def reset(self) -> None:
        self.messages = [{"role": "system", "content": self.cfg["system_prompt"]}]
--- a/pipeline/main.py
+++ b/pipeline/main.py
@ -389,6 +389,8 @@ async def process_utterances(config: dict, manager: DeviceManager, utterance_que
                                 volume=0, fade=0)

            response_text = " ".join(full_response)
+            if response_text:
+                device.conversation.commit(response_text)
            device.last_response = response_text
            elapsed = time.monotonic() - turn_t0
            ttfs = f"{first_sentence_at - turn_t0:.2f}s" if first_sentence_at else "—"