From 0a1062e49bef0f95a32457d7a50d92095b60ff7b Mon Sep 17 00:00:00 2001 From: Max P Date: Tue, 24 Feb 2026 11:23:02 -0500 Subject: [PATCH] feat(ai): trace provider llm invocations in langsmith --- Tasks.md | 2 + .../ai/ai-observability.service.spec.ts | 46 +++++++ .../endpoints/ai/ai-observability.service.ts | 121 ++++++++++++++++++ .../app/endpoints/ai/ai-performance.spec.ts | 1 + .../src/app/endpoints/ai/ai.service.spec.ts | 24 ++++ apps/api/src/app/endpoints/ai/ai.service.ts | 96 +++++++++++--- .../ai/evals/ai-live-latency.spec.ts | 1 + .../ai/evals/ai-quality-eval.spec.ts | 1 + .../ai/evals/mvp-eval.runner.spec.ts | 1 + tasks/tasks.md | 13 ++ 10 files changed, 290 insertions(+), 16 deletions(-) diff --git a/Tasks.md b/Tasks.md index 2352ccace..b91ed32a3 100644 --- a/Tasks.md +++ b/Tasks.md @@ -16,6 +16,7 @@ Last updated: 2026-02-24 | T-008 | Deployment and submission bundle | Complete | `npm run test:ai` + Railway healthcheck + submission docs checklist | `2b6506de8` | | T-009 | Open source eval framework contribution | In Review | `@ghostfolio/finance-agent-evals` package scaffold + dataset export + smoke/pack checks | openai/evals PR #1625 + langchain PR #35421 | | T-010 | Chat history persistence and simple direct-query handling | Complete | `apps/client/src/app/pages/portfolio/analysis/ai-chat-panel/ai-chat-panel.component.spec.ts`, `apps/api/src/app/endpoints/ai/ai-agent.utils.spec.ts`, `apps/api/src/app/endpoints/ai/ai.service.spec.ts` | Local implementation | +| T-011 | Per-LLM LangSmith invocation tracing + production tracing env enablement | Complete | `apps/api/src/app/endpoints/ai/ai-observability.service.spec.ts`, `apps/api/src/app/endpoints/ai/ai.service.spec.ts`, `apps/api/src/app/endpoints/ai/ai-performance.spec.ts`, `apps/api/src/app/endpoints/ai/evals/mvp-eval.runner.spec.ts`, `apps/api/src/app/endpoints/ai/evals/ai-quality-eval.spec.ts` | Local implementation + Railway variable update | ## Notes @@ -35,3 +36,4 @@ Last updated: 2026-02-24 - Railway crash recovery (2026-02-23): `railway.toml` start command corrected to `node dist/apps/api/main.js`, deployed to Railway (`4f26063a-97e5-43dd-b2dd-360e9e12a951`), and validated with production health check. - Tool gating hardening (2026-02-24): planner unknown-intent fallback changed to no-tools, executor policy gate added (`direct|tools|clarify`), and policy metrics emitted via verification and observability logs. - Chat persistence + simple direct-query handling (2026-02-24): client chat panel now restores/persists session + bounded message history via localStorage and policy no-tool prompts now return assistant capability guidance for queries like "Who are you?". +- Per-LLM LangSmith invocation tracing (2026-02-24): each provider call now records an explicit LangSmith `llm` run (provider/model/query/session/response metadata), and production Railway env now has tracing variables enabled. diff --git a/apps/api/src/app/endpoints/ai/ai-observability.service.spec.ts b/apps/api/src/app/endpoints/ai/ai-observability.service.spec.ts index 3c83e5c5d..2090a0fdc 100644 --- a/apps/api/src/app/endpoints/ai/ai-observability.service.spec.ts +++ b/apps/api/src/app/endpoints/ai/ai-observability.service.spec.ts @@ -134,4 +134,50 @@ describe('AiObservabilityService', () => { expect(mockClientConstructor).toHaveBeenCalledTimes(1); expect(mockRunTreeConstructor).toHaveBeenCalledTimes(1); }); + + it('captures explicit llm invocation traces when tracing is enabled', async () => { + process.env.LANGSMITH_TRACING = 'true'; + process.env.LANGSMITH_API_KEY = 'lsv2_test_key'; + + const runTree = { + end: jest.fn(), + patchRun: jest.fn().mockResolvedValue(undefined), + postRun: jest.fn().mockResolvedValue(undefined) + }; + mockRunTreeConstructor.mockReturnValue(runTree); + + const subject = new AiObservabilityService(); + + await subject.recordLlmInvocation({ + durationInMs: 23, + model: 'openrouter/auto', + prompt: 'Query: who are you?', + provider: 'openrouter', + query: 'who are you?', + responseText: 'I am your Ghostfolio assistant.', + sessionId: 'session-3', + userId: 'user-3' + }); + await new Promise((resolve) => setTimeout(resolve, 10)); + + expect(mockClientConstructor).toHaveBeenCalledTimes(1); + expect(mockRunTreeConstructor).toHaveBeenCalledWith( + expect.objectContaining({ + name: 'ghostfolio_ai_llm_openrouter', + run_type: 'llm' + }) + ); + expect(runTree.postRun).toHaveBeenCalledTimes(1); + expect(runTree.end).toHaveBeenCalledWith( + expect.objectContaining({ + outputs: expect.objectContaining({ + model: 'openrouter/auto', + provider: 'openrouter', + responseText: 'I am your Ghostfolio assistant.', + status: 'success' + }) + }) + ); + expect(runTree.patchRun).toHaveBeenCalledTimes(1); + }); }); diff --git a/apps/api/src/app/endpoints/ai/ai-observability.service.ts b/apps/api/src/app/endpoints/ai/ai-observability.service.ts index 736e46aef..7f9def901 100644 --- a/apps/api/src/app/endpoints/ai/ai-observability.service.ts +++ b/apps/api/src/app/endpoints/ai/ai-observability.service.ts @@ -20,6 +20,19 @@ interface AiAgentPolicySnapshot { toolsToExecute: string[]; } +interface AiLlmInvocationSnapshot { + durationInMs: number; + errorMessage?: string; + model: string; + prompt: string; + provider: string; + query?: string; + responseText?: string; + sessionId?: string; + traceId: string; + userId?: string; +} + @Injectable() export class AiObservabilityService { private readonly logger = new Logger(OBSERVABILITY_LOG_LABEL); @@ -321,6 +334,56 @@ export class AiObservabilityService { await this.runSafely(async () => runTree.patchRun()); } + private async captureLlmInvocationTrace({ + durationInMs, + errorMessage, + model, + prompt, + provider, + query, + responseText, + sessionId, + traceId, + userId + }: AiLlmInvocationSnapshot) { + const client = this.getLangSmithClient(); + + if (!client) { + return; + } + + const runTree = new RunTree({ + client, + inputs: { + model, + prompt, + provider, + query, + sessionId, + userId + }, + name: `ghostfolio_ai_llm_${provider}`, + project_name: this.langSmithProjectName, + run_type: 'llm' + }); + + await this.runSafely(async () => runTree.postRun()); + await this.runSafely(async () => + runTree.end({ + outputs: { + durationInMs, + error: errorMessage, + model, + provider, + responseText, + status: errorMessage ? 'failed' : 'success', + traceId + } + }) + ); + await this.runSafely(async () => runTree.patchRun()); + } + public async captureChatFailure({ durationInMs, error, @@ -460,4 +523,62 @@ export class AiObservabilityService { userId }).catch(() => undefined); } + + public async recordLlmInvocation({ + durationInMs, + error, + model, + prompt, + provider, + query, + responseText, + sessionId, + userId + }: { + durationInMs: number; + error?: unknown; + model: string; + prompt: string; + provider: string; + query?: string; + responseText?: string; + sessionId?: string; + userId?: string; + }) { + const traceId = randomUUID(); + const errorMessage = error instanceof Error ? error.message : undefined; + + this.logger.log( + JSON.stringify({ + durationInMs, + error: errorMessage, + event: 'ai_llm_invocation', + model, + promptLength: prompt.length, + provider, + queryLength: query?.length ?? 0, + responseLength: responseText?.length ?? 0, + sessionId, + traceId, + userId + }) + ); + + if (!this.isLangSmithEnabled) { + return; + } + + void this.captureLlmInvocationTrace({ + durationInMs, + errorMessage, + model, + prompt, + provider, + query, + responseText, + sessionId, + traceId, + userId + }).catch(() => undefined); + } } diff --git a/apps/api/src/app/endpoints/ai/ai-performance.spec.ts b/apps/api/src/app/endpoints/ai/ai-performance.spec.ts index 0a872d568..fae298f75 100644 --- a/apps/api/src/app/endpoints/ai/ai-performance.spec.ts +++ b/apps/api/src/app/endpoints/ai/ai-performance.spec.ts @@ -85,6 +85,7 @@ function createAiServiceForPerformanceTests() { tokenEstimate: { input: 10, output: 10, total: 20 }, traceId: 'perf-trace' }), + recordLlmInvocation: jest.fn().mockResolvedValue(undefined), recordFeedback: jest.fn().mockResolvedValue(undefined) }; diff --git a/apps/api/src/app/endpoints/ai/ai.service.spec.ts b/apps/api/src/app/endpoints/ai/ai.service.spec.ts index 51843b735..b194650ce 100644 --- a/apps/api/src/app/endpoints/ai/ai.service.spec.ts +++ b/apps/api/src/app/endpoints/ai/ai.service.spec.ts @@ -10,6 +10,7 @@ describe('AiService', () => { let aiObservabilityService: { captureChatFailure: jest.Mock; captureChatSuccess: jest.Mock; + recordLlmInvocation: jest.Mock; recordFeedback: jest.Mock; }; let subject: AiService; @@ -50,6 +51,7 @@ describe('AiService', () => { }, traceId: 'trace-1' }), + recordLlmInvocation: jest.fn().mockResolvedValue(undefined), recordFeedback: jest.fn() }; @@ -458,6 +460,13 @@ describe('AiService', () => { expect(result).toEqual({ text: 'zai-response' }); + expect(aiObservabilityService.recordLlmInvocation).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'glm-5', + provider: 'z_ai_glm', + responseText: 'zai-response' + }) + ); expect(propertyService.getByKey).not.toHaveBeenCalled(); }); @@ -497,6 +506,21 @@ describe('AiService', () => { expect(result).toEqual({ text: 'minimax-response' }); + expect(aiObservabilityService.recordLlmInvocation).toHaveBeenNthCalledWith( + 1, + expect.objectContaining({ + model: 'glm-5', + provider: 'z_ai_glm' + }) + ); + expect(aiObservabilityService.recordLlmInvocation).toHaveBeenNthCalledWith( + 2, + expect.objectContaining({ + model: 'MiniMax-M2.5', + provider: 'minimax', + responseText: 'minimax-response' + }) + ); }); it('captures observability failure events when chat throws', async () => { diff --git a/apps/api/src/app/endpoints/ai/ai.service.ts b/apps/api/src/app/endpoints/ai/ai.service.ts index f4d40d1ba..3e411fae0 100644 --- a/apps/api/src/app/endpoints/ai/ai.service.ts +++ b/apps/api/src/app/endpoints/ai/ai.service.ts @@ -58,10 +58,16 @@ export class AiService { ) {} public async generateText({ prompt, - signal + signal, + traceContext }: { prompt: string; signal?: AbortSignal; + traceContext?: { + query?: string; + sessionId?: string; + userId?: string; + }; }) { const zAiGlmApiKey = process.env.z_ai_glm_api_key ?? process.env.Z_AI_GLM_API_KEY; @@ -70,14 +76,54 @@ export class AiService { process.env.minimax_api_key ?? process.env.MINIMAX_API_KEY; const minimaxModel = process.env.minimax_model ?? process.env.MINIMAX_MODEL; const providerErrors: string[] = []; + const invokeProviderWithTracing = async ({ + model, + provider, + run + }: { + model: string; + provider: string; + run: () => Promise<{ text?: string }>; + }) => { + const startedAt = Date.now(); + let invocationError: unknown; + let responseText: string | undefined; - if (zAiGlmApiKey) { try { - return await generateTextWithZAiGlm({ - apiKey: zAiGlmApiKey, - model: zAiGlmModel, + const response = await run(); + responseText = response?.text; + + return response; + } catch (error) { + invocationError = error; + throw error; + } finally { + void this.aiObservabilityService.recordLlmInvocation({ + durationInMs: Date.now() - startedAt, + error: invocationError, + model, prompt, - signal + provider, + query: traceContext?.query, + responseText, + sessionId: traceContext?.sessionId, + userId: traceContext?.userId + }); + } + }; + + if (zAiGlmApiKey) { + try { + return await invokeProviderWithTracing({ + model: zAiGlmModel ?? 'glm-5', + provider: 'z_ai_glm', + run: () => + generateTextWithZAiGlm({ + apiKey: zAiGlmApiKey, + model: zAiGlmModel, + prompt, + signal + }) }); } catch (error) { providerErrors.push( @@ -88,11 +134,16 @@ export class AiService { if (minimaxApiKey) { try { - return await generateTextWithMinimax({ - apiKey: minimaxApiKey, - model: minimaxModel, - prompt, - signal + return await invokeProviderWithTracing({ + model: minimaxModel ?? 'MiniMax-M2.5', + provider: 'minimax', + run: () => + generateTextWithMinimax({ + apiKey: minimaxApiKey, + model: minimaxModel, + prompt, + signal + }) }); } catch (error) { providerErrors.push( @@ -118,10 +169,15 @@ export class AiService { const openRouterService = createOpenRouter({ apiKey: openRouterApiKey }); - return generateText({ - prompt, - abortSignal: signal, - model: openRouterService.chat(openRouterModel) + return invokeProviderWithTracing({ + model: openRouterModel, + provider: 'openrouter', + run: () => + generateText({ + prompt, + abortSignal: signal, + model: openRouterService.chat(openRouterModel) + }) }); } @@ -343,7 +399,15 @@ export class AiService { if (policyDecision.route === 'tools') { const llmGenerationStartedAt = Date.now(); answer = await buildAnswer({ - generateText: (options) => this.generateText(options), + generateText: (options) => + this.generateText({ + ...options, + traceContext: { + query: normalizedQuery, + sessionId: resolvedSessionId, + userId + } + }), languageCode, marketData, memory, diff --git a/apps/api/src/app/endpoints/ai/evals/ai-live-latency.spec.ts b/apps/api/src/app/endpoints/ai/evals/ai-live-latency.spec.ts index 68bb5ce12..76e780a20 100644 --- a/apps/api/src/app/endpoints/ai/evals/ai-live-latency.spec.ts +++ b/apps/api/src/app/endpoints/ai/evals/ai-live-latency.spec.ts @@ -107,6 +107,7 @@ function createLiveBenchmarkSubject() { }, traceId: 'live-benchmark' }), + recordLlmInvocation: jest.fn().mockResolvedValue(undefined), recordFeedback: jest.fn().mockResolvedValue(undefined) }; diff --git a/apps/api/src/app/endpoints/ai/evals/ai-quality-eval.spec.ts b/apps/api/src/app/endpoints/ai/evals/ai-quality-eval.spec.ts index 273a3814c..f9df25123 100644 --- a/apps/api/src/app/endpoints/ai/evals/ai-quality-eval.spec.ts +++ b/apps/api/src/app/endpoints/ai/evals/ai-quality-eval.spec.ts @@ -71,6 +71,7 @@ function createSubject({ }, traceId: 'quality-eval-trace' }), + recordLlmInvocation: jest.fn().mockResolvedValue(undefined), recordFeedback: jest.fn().mockResolvedValue(undefined) }; diff --git a/apps/api/src/app/endpoints/ai/evals/mvp-eval.runner.spec.ts b/apps/api/src/app/endpoints/ai/evals/mvp-eval.runner.spec.ts index 037aa0d29..19374f1b5 100644 --- a/apps/api/src/app/endpoints/ai/evals/mvp-eval.runner.spec.ts +++ b/apps/api/src/app/endpoints/ai/evals/mvp-eval.runner.spec.ts @@ -30,6 +30,7 @@ function createAiServiceForCase(evalCase: AiAgentMvpEvalCase) { tokenEstimate: { input: 1, output: 1, total: 2 }, traceId: 'eval-trace' }), + recordLlmInvocation: jest.fn().mockResolvedValue(undefined), recordFeedback: jest.fn().mockResolvedValue(undefined) }; diff --git a/tasks/tasks.md b/tasks/tasks.md index 768dea6c9..2af180500 100644 --- a/tasks/tasks.md +++ b/tasks/tasks.md @@ -201,6 +201,14 @@ Last updated: 2026-02-24 - [x] Add or update unit tests for chat persistence and policy simple-query routing. - [x] Run focused verification on touched frontend/backend AI suites and update task tracking artifacts. +## Session Plan (2026-02-24, Per-LLM LangSmith Invocation Tracing) + +- [x] Audit current AI provider call path and verify where LangSmith/LangChain tracing is missing. +- [x] Add explicit per-provider LLM invocation tracing hooks before/after each `generateText` provider call. +- [x] Thread query/session/user context into LLM invocation tracing payloads for easier LangSmith filtering. +- [x] Update AI and observability unit tests to assert LLM invocation trace behavior and keep provider fallback behavior stable. +- [x] Run focused verification for touched AI suites and update task tracking notes. + ## Verification Notes - `nx run api:lint` completed successfully (existing workspace warnings only). @@ -269,3 +277,8 @@ Last updated: 2026-02-24 - `npx jest apps/api/src/app/endpoints/ai/ai-agent.utils.spec.ts apps/api/src/app/endpoints/ai/ai.service.spec.ts --config apps/api/jest.config.ts` (31/31 tests passed) - `npx nx run api:lint` (passes with existing workspace warnings) - `npx nx run client:lint` (passes with existing workspace warnings) +- Per-LLM LangSmith invocation tracing verification (local + deploy config, 2026-02-24): + - `npx jest apps/api/src/app/endpoints/ai/ai-observability.service.spec.ts apps/api/src/app/endpoints/ai/ai.service.spec.ts apps/api/src/app/endpoints/ai/evals/mvp-eval.runner.spec.ts apps/api/src/app/endpoints/ai/ai-performance.spec.ts apps/api/src/app/endpoints/ai/evals/ai-quality-eval.spec.ts apps/api/src/app/endpoints/ai/evals/ai-live-latency.spec.ts --config apps/api/jest.config.ts` (5/5 suites passed, live-latency suite skipped by env gate) + - `npx nx run api:lint` (passes with existing workspace warnings) + - `railway variable set -s ghostfolio-api --skip-deploys LANGCHAIN_API_KEY=... LANGSMITH_API_KEY=... LANGCHAIN_TRACING_V2=true LANGSMITH_TRACING=true LANGSMITH_PROJECT=ghostfolio-ai-agent` + - `railway variable list -s ghostfolio-api --kv` confirms: `LANGCHAIN_API_KEY`, `LANGSMITH_API_KEY`, `LANGCHAIN_TRACING_V2`, `LANGSMITH_TRACING`, `LANGSMITH_PROJECT`