lab-04-production-assistant
Production Assistant
README
# lab-04-production-assistant — capstone arc Wire the boring adult stuff together: **routing**, **retrieval**, a **human approval** path for spicy intents, a **trace log** you can actually read, and a **token budget** cap that prevents runaway loops from impersonating "just one more retrieval." All local. All deterministic. Still more honest than half the "production" demos you'll read this week. ## Run ```bash cd labs/langchain/lab-04-production-assistant uv run python src/main.py ``` ## Test ```bash uv run pytest tests/ ``` ## Stretch goals - Add a "fast path" cache keyed by normalized query. - Emit a JSON trace file and diff it in tests.
pyproject.toml
[project] name = "lab-04-production-assistant" version = "0.1.0" requires-python = ">=3.11" dependencies = [] [project.optional-dependencies] dev = ["pytest>=8.0"] [tool.uv] dev-dependencies = []
Starter Python
"""Deterministic local assistant: route, RAG, HITL gate, trace, budget."""
from __future__ import annotations
import re
from dataclasses import dataclass, field
from production_rag import answer_with_citations
def tokenize(s: str) -> set[str]:
return {t for t in re.findall(r"[a-z0-9]+", s.lower()) if len(t) > 1}
SENSITIVE_MARKERS = ("delete", "wipe", "exfil", "password", "dump")
@dataclass
class CapstoneAssistant:
budget: int = 100
trace: list[str] = field(default_factory=list)
def _spend(self, cost: int, note: str) -> bool:
if self.budget < cost:
self.trace.append(f"budget:blocked:{note}")
return False
self.budget -= cost
self.trace.append(f"budget:spent:{cost}:{note}")
return True
def _needs_hitl(self, message: str) -> bool:
t = message.lower()
return any(m in t for m in SENSITIVE_MARKERS)
def _route(self, message: str) -> str:
toks = tokenize(message)
if "widget" in toks or "sku" in toks or "titanium" in toks:
return "rag"
if self._needs_hitl(message):
return "hitl"
return "direct"
def run_turn(self, message: str, human_approved: bool | None) -> dict:
route = self._route(message)
self.trace.append(f"route:{route}")
if route == "hitl":
if human_approved is None:
self.trace.append("hitl:pending")
return {
"status": "need_human",
"summary": "Sensitive verbs detected — approval required.",
"trace": list(self.trace),
"budget_remaining": self.budget,
}
if not human_approved:
self.trace.append("hitl:rejected")
return {
"status": "blocked",
"answer": "Not running that without explicit approval.",
"trace": list(self.trace),
"budget_remaining": self.budget,
}
# approved: fall through to answer with extra spend
if not self._spend(30, "hitl_exec"):
return {
"status": "budget_exhausted",
"answer": "",
"trace": list(self.trace),
"budget_remaining": self.budget,
}
self.trace.append("hitl:executed")
return {
"status": "ok",
"answer": "Sensitive action executed under explicit human approval (offline simulation).",
"trace": list(self.trace),
"budget_remaining": self.budget,
}
if route == "rag":
if not self._spend(20, "retrieve"):
return {
"status": "budget_exhausted",
"answer": "",
"trace": list(self.trace),
"budget_remaining": self.budget,
}
rag = answer_with_citations(message)
self.trace.append(f"rag:{rag['citations'][0]['id']}")
if not self._spend(25, "synthesize"):
return {
"status": "budget_exhausted",
"answer": rag["answer"],
"trace": list(self.trace),
"budget_remaining": self.budget,
}
return {
"status": "ok",
"answer": rag["answer"],
"citations": rag["citations"],
"trace": list(self.trace),
"budget_remaining": self.budget,
}
# direct
if not self._spend(15, "direct"):
return {
"status": "budget_exhausted",
"answer": "",
"trace": list(self.trace),
"budget_remaining": self.budget,
}
ans = f"Direct channel: you asked {message!r} — default safe response."
self.trace.append("direct:replied")
return {
"status": "ok",
"answer": ans,
"trace": list(self.trace),
"budget_remaining": self.budget,
}