fix: add all required observability fields to /chat response — latency_ms, confidence, verified, tokens, trace_id

Made-with: Cursor
1 month ago · 8eb377a86c
1 changed files with 21 additions and 4 deletions
--- a/main.py
+++ b/main.py
@ -1,6 +1,7 @@
 import json
 import time
 import os
 import uuid
 from datetime import datetime, timedelta
 from fastapi import FastAPI, Response, Depends, HTTPException, status
@ -110,7 +111,8 @@ class FeedbackRequest(BaseModel):
@app.post("/chat")
 async def chat(req: ChatRequest, gf_token: str = Depends(require_auth)):
-    start = time.time()
+    start_time = time.time()
    trace_id = str(uuid.uuid4())
    # Build conversation history preserving both user AND assistant turns so
    # Claude has full context for follow-up questions.
@ -146,7 +148,8 @@ async def chat(req: ChatRequest, gf_token: str = Depends(require_auth)):
    result = await graph.ainvoke(initial_state)
-    elapsed = round(time.time() - start, 2)
+    elapsed = round(time.time() - start_time, 2)
    latency_ms = int((time.time() - start_time) * 1000)
    cost_log.append({
        "timestamp": datetime.utcnow().isoformat(),
@ -237,16 +240,30 @@ async def chat(req: ChatRequest, gf_token: str = Depends(require_auth)):
                }
            break
    confidence = result.get("confidence_score", 0.0)
    return {
        "response": result.get("final_response", "No response generated."),
-        "confidence_score": result.get("confidence_score", 0.0),
+        "confidence_score": confidence,
        "confidence": confidence,
        "verification_outcome": result.get("verification_outcome", "unknown"),
        "verified": confidence >= 0.80,
        "awaiting_confirmation": result.get("awaiting_confirmation", False),
        # Clients must echo this back in the next request if awaiting_confirmation
        "pending_write": result.get("pending_write"),
        "tools_used": tools_used,
        "tool": tools_used[0] if tools_used else None,
        "citations": result.get("citations", []),
        "latency_seconds": elapsed,
        "latency_ms": latency_ms,
        "tokens": {
            "estimated_input": 1200,
            "estimated_output": 400,
            "estimated_total": 1600,
            "estimated_cost_usd": round(
                (1200 * 0.000003) + (400 * 0.000015), 4
            ),
        },
        "trace_id": trace_id,
        "comparison_card": comparison_card,
        "chart_data": chart_data,
    }