lab-04-production-assistant

Production Assistant

README

# lab-04-production-assistant — capstone arc

Wire the boring adult stuff together: **routing**, **retrieval**, a **human approval** path for spicy intents, a **trace log** you can actually read, and a **token budget** cap that prevents runaway loops from impersonating "just one more retrieval."

All local. All deterministic. Still more honest than half the "production" demos you'll read this week.

## Run

```bash
cd labs/langchain/lab-04-production-assistant
uv run python src/main.py
```

## Test

```bash
uv run pytest tests/
```

## Stretch goals

- Add a "fast path" cache keyed by normalized query.
- Emit a JSON trace file and diff it in tests.

pyproject.toml

[project]
name = "lab-04-production-assistant"
version = "0.1.0"
requires-python = ">=3.11"
dependencies = []

[project.optional-dependencies]
dev = ["pytest>=8.0"]

[tool.uv]
dev-dependencies = []

Starter Python

"""Deterministic local assistant: route, RAG, HITL gate, trace, budget."""

from __future__ import annotations

import re
from dataclasses import dataclass, field

from production_rag import answer_with_citations


def tokenize(s: str) -> set[str]:
    return {t for t in re.findall(r"[a-z0-9]+", s.lower()) if len(t) > 1}


SENSITIVE_MARKERS = ("delete", "wipe", "exfil", "password", "dump")


@dataclass
class CapstoneAssistant:
    budget: int = 100
    trace: list[str] = field(default_factory=list)

    def _spend(self, cost: int, note: str) -> bool:
        if self.budget < cost:
            self.trace.append(f"budget:blocked:{note}")
            return False
        self.budget -= cost
        self.trace.append(f"budget:spent:{cost}:{note}")
        return True

    def _needs_hitl(self, message: str) -> bool:
        t = message.lower()
        return any(m in t for m in SENSITIVE_MARKERS)

    def _route(self, message: str) -> str:
        toks = tokenize(message)
        if "widget" in toks or "sku" in toks or "titanium" in toks:
            return "rag"
        if self._needs_hitl(message):
            return "hitl"
        return "direct"

    def run_turn(self, message: str, human_approved: bool | None) -> dict:
        route = self._route(message)
        self.trace.append(f"route:{route}")

        if route == "hitl":
            if human_approved is None:
                self.trace.append("hitl:pending")
                return {
                    "status": "need_human",
                    "summary": "Sensitive verbs detected — approval required.",
                    "trace": list(self.trace),
                    "budget_remaining": self.budget,
                }
            if not human_approved:
                self.trace.append("hitl:rejected")
                return {
                    "status": "blocked",
                    "answer": "Not running that without explicit approval.",
                    "trace": list(self.trace),
                    "budget_remaining": self.budget,
                }
            # approved: fall through to answer with extra spend
            if not self._spend(30, "hitl_exec"):
                return {
                    "status": "budget_exhausted",
                    "answer": "",
                    "trace": list(self.trace),
                    "budget_remaining": self.budget,
                }
            self.trace.append("hitl:executed")
            return {
                "status": "ok",
                "answer": "Sensitive action executed under explicit human approval (offline simulation).",
                "trace": list(self.trace),
                "budget_remaining": self.budget,
            }

        if route == "rag":
            if not self._spend(20, "retrieve"):
                return {
                    "status": "budget_exhausted",
                    "answer": "",
                    "trace": list(self.trace),
                    "budget_remaining": self.budget,
                }
            rag = answer_with_citations(message)
            self.trace.append(f"rag:{rag['citations'][0]['id']}")
            if not self._spend(25, "synthesize"):
                return {
                    "status": "budget_exhausted",
                    "answer": rag["answer"],
                    "trace": list(self.trace),
                    "budget_remaining": self.budget,
                }
            return {
                "status": "ok",
                "answer": rag["answer"],
                "citations": rag["citations"],
                "trace": list(self.trace),
                "budget_remaining": self.budget,
            }

        # direct
        if not self._spend(15, "direct"):
            return {
                "status": "budget_exhausted",
                "answer": "",
                "trace": list(self.trace),
                "budget_remaining": self.budget,
            }
        ans = f"Direct channel: you asked {message!r} — default safe response."
        self.trace.append("direct:replied")
        return {
            "status": "ok",
            "answer": ans,
            "trace": list(self.trace),
            "budget_remaining": self.budget,
        }