Only persist assistant history after successful TTS delivery

Move the conversational backend's _finalize() call out of stream()'s
finally block and expose it as a public commit(text) method instead.
The previous behavior persisted whatever was yielded even when the
stream errored, the user interrupted, or a TTS send failed — so saved
history diverged from what the user actually heard, and the next turn
replayed phantom context to the LLM.

main.py now calls backend.commit(response_text) only after a turn
successfully completes and produced content. Agentic backend gets a
no-op commit() since history lives on the remote service.
This commit is contained in:
justLV 2026-04-12 19:11:09 -07:00
parent 0bc3ae209f
commit 44c7be03b8
4 changed files with 21 additions and 11 deletions

View file

@ -68,6 +68,9 @@ class AgenticBackend:
if delta:
yield delta
def commit(self, text: str) -> None:
pass # remote service owns history
def reset(self) -> None:
pass # session reset would require an API call if supported

View file

@ -11,6 +11,11 @@ class ConversationBackend(Protocol):
"""Send a user message, yield assistant text deltas as they arrive."""
...
def commit(self, text: str) -> None:
"""Persist the assistant response after successful delivery. Call
only after the turn actually played out. No-op for managed backends."""
...
def reset(self) -> None:
"""Clear conversation history / start a new session."""
...

View file

@ -75,17 +75,17 @@ class ConversationalBackend:
kwargs["stream"] = True
stream = await self.client.chat.completions.create(**kwargs)
parts: list[str] = []
try:
async for chunk in stream:
if not chunk.choices:
continue
delta = chunk.choices[0].delta.content or ""
if delta:
parts.append(delta)
yield delta
finally:
self._finalize("".join(parts))
async for chunk in stream:
if not chunk.choices:
continue
delta = chunk.choices[0].delta.content or ""
if delta:
yield delta
def commit(self, text: str) -> None:
"""Persist the assistant response to history after successful
delivery. The caller joins whatever was actually sent to TTS."""
self._finalize(text)
def reset(self) -> None:
self.messages = [{"role": "system", "content": self.cfg["system_prompt"]}]

View file

@ -389,6 +389,8 @@ async def process_utterances(config: dict, manager: DeviceManager, utterance_que
volume=0, fade=0)
response_text = " ".join(full_response)
if response_text:
device.conversation.commit(response_text)
device.last_response = response_text
elapsed = time.monotonic() - turn_t0
ttfs = f"{first_sentence_at - turn_t0:.2f}s" if first_sentence_at else ""