feat(ai): trace provider llm invocations in langsmith

4 months ago · 0a1062e49b
10 changed files with 290 additions and 16 deletions
--- a/Tasks.md
+++ b/Tasks.md
@ -16,6 +16,7 @@ Last updated: 2026-02-24
 | T-008 | Deployment and submission bundle | Complete | `npm run test:ai` + Railway healthcheck + submission docs checklist | `2b6506de8` |
 | T-009 | Open source eval framework contribution | In Review | `@ghostfolio/finance-agent-evals` package scaffold + dataset export + smoke/pack checks | openai/evals PR #1625 + langchain PR #35421 |
 | T-010 | Chat history persistence and simple direct-query handling | Complete | `apps/client/src/app/pages/portfolio/analysis/ai-chat-panel/ai-chat-panel.component.spec.ts`, `apps/api/src/app/endpoints/ai/ai-agent.utils.spec.ts`, `apps/api/src/app/endpoints/ai/ai.service.spec.ts` | Local implementation |
 | T-011 | Per-LLM LangSmith invocation tracing + production tracing env enablement | Complete | `apps/api/src/app/endpoints/ai/ai-observability.service.spec.ts`, `apps/api/src/app/endpoints/ai/ai.service.spec.ts`, `apps/api/src/app/endpoints/ai/ai-performance.spec.ts`, `apps/api/src/app/endpoints/ai/evals/mvp-eval.runner.spec.ts`, `apps/api/src/app/endpoints/ai/evals/ai-quality-eval.spec.ts` | Local implementation + Railway variable update |
 ## Notes
@ -35,3 +36,4 @@ Last updated: 2026-02-24
 - Railway crash recovery (2026-02-23): `railway.toml` start command corrected to `node dist/apps/api/main.js`, deployed to Railway (`4f26063a-97e5-43dd-b2dd-360e9e12a951`), and validated with production health check.
 - Tool gating hardening (2026-02-24): planner unknown-intent fallback changed to no-tools, executor policy gate added (`direct|tools|clarify`), and policy metrics emitted via verification and observability logs.
 - Chat persistence + simple direct-query handling (2026-02-24): client chat panel now restores/persists session + bounded message history via localStorage and policy no-tool prompts now return assistant capability guidance for queries like "Who are you?".
 - Per-LLM LangSmith invocation tracing (2026-02-24): each provider call now records an explicit LangSmith `llm` run (provider/model/query/session/response metadata), and production Railway env now has tracing variables enabled.
--- a/apps/api/src/app/endpoints/ai/ai-observability.service.spec.ts
+++ b/apps/api/src/app/endpoints/ai/ai-observability.service.spec.ts
@ -134,4 +134,50 @@ describe('AiObservabilityService', () => {
    expect(mockClientConstructor).toHaveBeenCalledTimes(1);
    expect(mockRunTreeConstructor).toHaveBeenCalledTimes(1);
  });
  it('captures explicit llm invocation traces when tracing is enabled', async () => {
    process.env.LANGSMITH_TRACING = 'true';
    process.env.LANGSMITH_API_KEY = 'lsv2_test_key';
    const runTree = {
      end: jest.fn(),
      patchRun: jest.fn().mockResolvedValue(undefined),
      postRun: jest.fn().mockResolvedValue(undefined)
    };
    mockRunTreeConstructor.mockReturnValue(runTree);
    const subject = new AiObservabilityService();
    await subject.recordLlmInvocation({
      durationInMs: 23,
      model: 'openrouter/auto',
      prompt: 'Query: who are you?',
      provider: 'openrouter',
      query: 'who are you?',
      responseText: 'I am your Ghostfolio assistant.',
      sessionId: 'session-3',
      userId: 'user-3'
    });
    await new Promise((resolve) => setTimeout(resolve, 10));
    expect(mockClientConstructor).toHaveBeenCalledTimes(1);
    expect(mockRunTreeConstructor).toHaveBeenCalledWith(
      expect.objectContaining({
        name: 'ghostfolio_ai_llm_openrouter',
        run_type: 'llm'
      })
    );
    expect(runTree.postRun).toHaveBeenCalledTimes(1);
    expect(runTree.end).toHaveBeenCalledWith(
      expect.objectContaining({
        outputs: expect.objectContaining({
          model: 'openrouter/auto',
          provider: 'openrouter',
          responseText: 'I am your Ghostfolio assistant.',
          status: 'success'
        })
      })
    );
    expect(runTree.patchRun).toHaveBeenCalledTimes(1);
  });
 });
--- a/apps/api/src/app/endpoints/ai/ai-observability.service.ts
+++ b/apps/api/src/app/endpoints/ai/ai-observability.service.ts
@ -20,6 +20,19 @@ interface AiAgentPolicySnapshot {
  toolsToExecute: string[];
 }
 interface AiLlmInvocationSnapshot {
  durationInMs: number;
  errorMessage?: string;
  model: string;
  prompt: string;
  provider: string;
  query?: string;
  responseText?: string;
  sessionId?: string;
  traceId: string;
  userId?: string;
 }
@Injectable()
 export class AiObservabilityService {
  private readonly logger = new Logger(OBSERVABILITY_LOG_LABEL);
@ -321,6 +334,56 @@ export class AiObservabilityService {
    await this.runSafely(async () => runTree.patchRun());
  }
  private async captureLlmInvocationTrace({
    durationInMs,
    errorMessage,
    model,
    prompt,
    provider,
    query,
    responseText,
    sessionId,
    traceId,
    userId
  }: AiLlmInvocationSnapshot) {
    const client = this.getLangSmithClient();
    if (!client) {
      return;
    }
    const runTree = new RunTree({
      client,
      inputs: {
        model,
        prompt,
        provider,
        query,
        sessionId,
        userId
      },
      name: `ghostfolio_ai_llm_${provider}`,
      project_name: this.langSmithProjectName,
      run_type: 'llm'
    });
    await this.runSafely(async () => runTree.postRun());
    await this.runSafely(async () =>
      runTree.end({
        outputs: {
          durationInMs,
          error: errorMessage,
          model,
          provider,
          responseText,
          status: errorMessage ? 'failed' : 'success',
          traceId
        }
      })
    );
    await this.runSafely(async () => runTree.patchRun());
  }
  public async captureChatFailure({
    durationInMs,
    error,
@ -460,4 +523,62 @@ export class AiObservabilityService {
      userId
    }).catch(() => undefined);
  }
  public async recordLlmInvocation({
    durationInMs,
    error,
    model,
    prompt,
    provider,
    query,
    responseText,
    sessionId,
    userId
  }: {
    durationInMs: number;
    error?: unknown;
    model: string;
    prompt: string;
    provider: string;
    query?: string;
    responseText?: string;
    sessionId?: string;
    userId?: string;
  }) {
    const traceId = randomUUID();
    const errorMessage = error instanceof Error ? error.message : undefined;
    this.logger.log(
      JSON.stringify({
        durationInMs,
        error: errorMessage,
        event: 'ai_llm_invocation',
        model,
        promptLength: prompt.length,
        provider,
        queryLength: query?.length ?? 0,
        responseLength: responseText?.length ?? 0,
        sessionId,
        traceId,
        userId
      })
    );
    if (!this.isLangSmithEnabled) {
      return;
    }
    void this.captureLlmInvocationTrace({
      durationInMs,
      errorMessage,
      model,
      prompt,
      provider,
      query,
      responseText,
      sessionId,
      traceId,
      userId
    }).catch(() => undefined);
  }
 }
--- a/apps/api/src/app/endpoints/ai/ai-performance.spec.ts
+++ b/apps/api/src/app/endpoints/ai/ai-performance.spec.ts
@ -85,6 +85,7 @@ function createAiServiceForPerformanceTests() {
      tokenEstimate: { input: 10, output: 10, total: 20 },
      traceId: 'perf-trace'
    }),
    recordLlmInvocation: jest.fn().mockResolvedValue(undefined),
    recordFeedback: jest.fn().mockResolvedValue(undefined)
  };
--- a/apps/api/src/app/endpoints/ai/ai.service.spec.ts
+++ b/apps/api/src/app/endpoints/ai/ai.service.spec.ts
@ -10,6 +10,7 @@ describe('AiService', () => {
  let aiObservabilityService: {
    captureChatFailure: jest.Mock;
    captureChatSuccess: jest.Mock;
    recordLlmInvocation: jest.Mock;
    recordFeedback: jest.Mock;
  };
  let subject: AiService;
@ -50,6 +51,7 @@ describe('AiService', () => {
        },
        traceId: 'trace-1'
      }),
      recordLlmInvocation: jest.fn().mockResolvedValue(undefined),
      recordFeedback: jest.fn()
    };
@ -458,6 +460,13 @@ describe('AiService', () => {
    expect(result).toEqual({
      text: 'zai-response'
    });
    expect(aiObservabilityService.recordLlmInvocation).toHaveBeenCalledWith(
      expect.objectContaining({
        model: 'glm-5',
        provider: 'z_ai_glm',
        responseText: 'zai-response'
      })
    );
    expect(propertyService.getByKey).not.toHaveBeenCalled();
  });
@ -497,6 +506,21 @@ describe('AiService', () => {
    expect(result).toEqual({
      text: 'minimax-response'
    });
    expect(aiObservabilityService.recordLlmInvocation).toHaveBeenNthCalledWith(
      1,
      expect.objectContaining({
        model: 'glm-5',
        provider: 'z_ai_glm'
      })
    );
    expect(aiObservabilityService.recordLlmInvocation).toHaveBeenNthCalledWith(
      2,
      expect.objectContaining({
        model: 'MiniMax-M2.5',
        provider: 'minimax',
        responseText: 'minimax-response'
      })
    );
  });
  it('captures observability failure events when chat throws', async () => {
--- a/apps/api/src/app/endpoints/ai/ai.service.ts
+++ b/apps/api/src/app/endpoints/ai/ai.service.ts
@ -58,10 +58,16 @@ export class AiService {
  ) {}
  public async generateText({
    prompt,
-    signal
+    signal,
    traceContext
  }: {
    prompt: string;
    signal?: AbortSignal;
    traceContext?: {
      query?: string;
      sessionId?: string;
      userId?: string;
    };
  }) {
    const zAiGlmApiKey =
      process.env.z_ai_glm_api_key ?? process.env.Z_AI_GLM_API_KEY;
@ -70,14 +76,54 @@ export class AiService {
      process.env.minimax_api_key ?? process.env.MINIMAX_API_KEY;
    const minimaxModel = process.env.minimax_model ?? process.env.MINIMAX_MODEL;
    const providerErrors: string[] = [];
    const invokeProviderWithTracing = async ({
      model,
      provider,
      run
    }: {
      model: string;
      provider: string;
      run: () => Promise<{ text?: string }>;
    }) => {
      const startedAt = Date.now();
      let invocationError: unknown;
      let responseText: string | undefined;
      try {
        const response = await run();
        responseText = response?.text;
        return response;
      } catch (error) {
        invocationError = error;
        throw error;
      } finally {
        void this.aiObservabilityService.recordLlmInvocation({
          durationInMs: Date.now() - startedAt,
          error: invocationError,
          model,
          prompt,
          provider,
          query: traceContext?.query,
          responseText,
          sessionId: traceContext?.sessionId,
          userId: traceContext?.userId
        });
      }
    };
    if (zAiGlmApiKey) {
      try {
-        return await generateTextWithZAiGlm({
+        return await invokeProviderWithTracing({
          model: zAiGlmModel ?? 'glm-5',
          provider: 'z_ai_glm',
          run: () =>
            generateTextWithZAiGlm({
              apiKey: zAiGlmApiKey,
              model: zAiGlmModel,
              prompt,
              signal
            })
        });
      } catch (error) {
        providerErrors.push(
@ -88,11 +134,16 @@ export class AiService {
    if (minimaxApiKey) {
      try {
-        return await generateTextWithMinimax({
+        return await invokeProviderWithTracing({
          model: minimaxModel ?? 'MiniMax-M2.5',
          provider: 'minimax',
          run: () =>
            generateTextWithMinimax({
              apiKey: minimaxApiKey,
              model: minimaxModel,
              prompt,
              signal
            })
        });
      } catch (error) {
        providerErrors.push(
@ -118,10 +169,15 @@ export class AiService {
    const openRouterService = createOpenRouter({
      apiKey: openRouterApiKey
    });
-    return generateText({
+    return invokeProviderWithTracing({
      model: openRouterModel,
      provider: 'openrouter',
      run: () =>
        generateText({
          prompt,
          abortSignal: signal,
          model: openRouterService.chat(openRouterModel)
        })
    });
  }
@ -343,7 +399,15 @@ export class AiService {
      if (policyDecision.route === 'tools') {
        const llmGenerationStartedAt = Date.now();
        answer = await buildAnswer({
-          generateText: (options) => this.generateText(options),
+          generateText: (options) =>
            this.generateText({
              ...options,
              traceContext: {
                query: normalizedQuery,
                sessionId: resolvedSessionId,
                userId
              }
            }),
          languageCode,
          marketData,
          memory,
--- a/apps/api/src/app/endpoints/ai/evals/ai-live-latency.spec.ts
+++ b/apps/api/src/app/endpoints/ai/evals/ai-live-latency.spec.ts
@ -107,6 +107,7 @@ function createLiveBenchmarkSubject() {
      },
      traceId: 'live-benchmark'
    }),
    recordLlmInvocation: jest.fn().mockResolvedValue(undefined),
    recordFeedback: jest.fn().mockResolvedValue(undefined)
  };
--- a/apps/api/src/app/endpoints/ai/evals/ai-quality-eval.spec.ts
+++ b/apps/api/src/app/endpoints/ai/evals/ai-quality-eval.spec.ts
@ -71,6 +71,7 @@ function createSubject({
      },
      traceId: 'quality-eval-trace'
    }),
    recordLlmInvocation: jest.fn().mockResolvedValue(undefined),
    recordFeedback: jest.fn().mockResolvedValue(undefined)
  };
--- a/apps/api/src/app/endpoints/ai/evals/mvp-eval.runner.spec.ts
+++ b/apps/api/src/app/endpoints/ai/evals/mvp-eval.runner.spec.ts
@ -30,6 +30,7 @@ function createAiServiceForCase(evalCase: AiAgentMvpEvalCase) {
      tokenEstimate: { input: 1, output: 1, total: 2 },
      traceId: 'eval-trace'
    }),
    recordLlmInvocation: jest.fn().mockResolvedValue(undefined),
    recordFeedback: jest.fn().mockResolvedValue(undefined)
  };
--- a/tasks/tasks.md
+++ b/tasks/tasks.md
@ -201,6 +201,14 @@ Last updated: 2026-02-24
 - [x] Add or update unit tests for chat persistence and policy simple-query routing.
 - [x] Run focused verification on touched frontend/backend AI suites and update task tracking artifacts.
 ## Session Plan (2026-02-24, Per-LLM LangSmith Invocation Tracing)
 - [x] Audit current AI provider call path and verify where LangSmith/LangChain tracing is missing.
 - [x] Add explicit per-provider LLM invocation tracing hooks before/after each `generateText` provider call.
 - [x] Thread query/session/user context into LLM invocation tracing payloads for easier LangSmith filtering.
 - [x] Update AI and observability unit tests to assert LLM invocation trace behavior and keep provider fallback behavior stable.
 - [x] Run focused verification for touched AI suites and update task tracking notes.
 ## Verification Notes
 - `nx run api:lint` completed successfully (existing workspace warnings only).
@ -269,3 +277,8 @@ Last updated: 2026-02-24
  - `npx jest apps/api/src/app/endpoints/ai/ai-agent.utils.spec.ts apps/api/src/app/endpoints/ai/ai.service.spec.ts --config apps/api/jest.config.ts` (31/31 tests passed)
  - `npx nx run api:lint` (passes with existing workspace warnings)
  - `npx nx run client:lint` (passes with existing workspace warnings)
 - Per-LLM LangSmith invocation tracing verification (local + deploy config, 2026-02-24):
  - `npx jest apps/api/src/app/endpoints/ai/ai-observability.service.spec.ts apps/api/src/app/endpoints/ai/ai.service.spec.ts apps/api/src/app/endpoints/ai/evals/mvp-eval.runner.spec.ts apps/api/src/app/endpoints/ai/ai-performance.spec.ts apps/api/src/app/endpoints/ai/evals/ai-quality-eval.spec.ts apps/api/src/app/endpoints/ai/evals/ai-live-latency.spec.ts --config apps/api/jest.config.ts` (5/5 suites passed, live-latency suite skipped by env gate)
  - `npx nx run api:lint` (passes with existing workspace warnings)
  - `railway variable set -s ghostfolio-api --skip-deploys LANGCHAIN_API_KEY=... LANGSMITH_API_KEY=... LANGCHAIN_TRACING_V2=true LANGSMITH_TRACING=true LANGSMITH_PROJECT=ghostfolio-ai-agent`
  - `railway variable list -s ghostfolio-api --kv` confirms: `LANGCHAIN_API_KEY`, `LANGSMITH_API_KEY`, `LANGCHAIN_TRACING_V2`, `LANGSMITH_TRACING`, `LANGSMITH_PROJECT`