You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

170 lines
4.4 KiB

const { DataSource } = require('@prisma/client');
const {
AiService
} = require('../../apps/api/src/app/endpoints/ai/ai.service.ts');
const {
AI_AGENT_MVP_EVAL_DATASET
} = require('../../apps/api/src/app/endpoints/ai/evals/mvp-eval.dataset.ts');
const {
runMvpEvalSuite
} = require('../../apps/api/src/app/endpoints/ai/evals/mvp-eval.runner.ts');
function createAiServiceForCase(evalCase) {
const dataProviderService = {
getQuotes: async ({ items }) => {
if (evalCase.setup.marketDataErrorMessage) {
throw new Error(evalCase.setup.marketDataErrorMessage);
}
const quotesBySymbol = evalCase.setup.quotesBySymbol ?? {};
return items.reduce((result, { symbol }) => {
if (quotesBySymbol[symbol]) {
result[symbol] = quotesBySymbol[symbol];
}
return result;
}, {});
}
};
const portfolioService = {
getDetails: async () => ({
holdings:
evalCase.setup.holdings ??
{
CASH: {
allocationInPercentage: 1,
dataSource: DataSource.MANUAL,
symbol: 'CASH',
valueInBaseCurrency: 1000
}
}
})
};
const propertyService = {
getByKey: async () => undefined
};
const redisCacheService = {
get: async () => {
if (evalCase.setup.storedMemoryTurns) {
return JSON.stringify({
turns: evalCase.setup.storedMemoryTurns
});
}
return undefined;
},
set: async () => undefined
};
const aiObservabilityService = {
captureChatFailure: async () => undefined,
captureChatSuccess: async () => ({
latencyInMs: 10,
tokenEstimate: { input: 1, output: 1, total: 2 },
traceId: 'langsmith-eval-trace'
}),
recordFeedback: async () => undefined
};
const aiService = new AiService(
dataProviderService,
portfolioService,
propertyService,
redisCacheService,
aiObservabilityService
);
if (evalCase.setup.llmThrows) {
aiService.generateText = async () => {
throw new Error('offline');
};
} else {
aiService.generateText = async () => ({
text: evalCase.setup.llmText ?? `Eval response for ${evalCase.id}`
});
}
return aiService;
}
function printSummary({ failedRows, label, passed, total }) {
const passRate = total > 0 ? (passed / total) * 100 : 0;
const header = `${label}: ${passed}/${total} passed (${passRate.toFixed(1)}%)`;
console.log(header);
if (failedRows.length > 0) {
console.log(`${label} failures:`);
for (const row of failedRows) {
console.log(`- ${row}`);
}
}
}
async function main() {
const investmentCases = AI_AGENT_MVP_EVAL_DATASET.filter(({ input }) => {
const query = input.query.toLowerCase();
return (
query.includes('invest') ||
query.includes('allocat') ||
query.includes('rebalanc') ||
query.includes('buy') ||
query.includes('trim')
);
});
const suiteResult = await runMvpEvalSuite({
aiServiceFactory: (evalCase) => createAiServiceForCase(evalCase),
cases: AI_AGENT_MVP_EVAL_DATASET
});
const investmentResults = suiteResult.results.filter(({ id }) => {
return investmentCases.some((evalCase) => evalCase.id === id);
});
const investmentPassed = investmentResults.filter(({ passed }) => passed).length;
const investmentFailedRows = investmentResults
.filter(({ passed }) => !passed)
.map(({ failures, id }) => `${id}: ${failures.join(' | ')}`);
const overallFailedRows = suiteResult.results
.filter(({ passed }) => !passed)
.map(({ failures, id }) => `${id}: ${failures.join(' | ')}`);
printSummary({
failedRows: overallFailedRows,
label: 'Overall suite',
passed: suiteResult.passed,
total: suiteResult.total
});
printSummary({
failedRows: investmentFailedRows,
label: 'Investment relevance subset',
passed: investmentPassed,
total: investmentResults.length
});
const keyDetected =
process.env.LANGSMITH_API_KEY || process.env.LANGCHAIN_API_KEY;
const tracingEnabled =
process.env.LANGSMITH_TRACING === 'true' ||
process.env.LANGCHAIN_TRACING_V2 === 'true';
console.log(
`LangSmith capture: key=${keyDetected ? 'set' : 'empty'}, tracing=${tracingEnabled ? 'enabled' : 'disabled'}`
);
if (overallFailedRows.length > 0) {
process.exitCode = 1;
}
}
main().catch((error) => {
console.error(error instanceof Error ? error.message : error);
process.exitCode = 1;
});