Browse Source

fix(ai-chat): hide memory line and enforce richer recommendation replies

pull/6395/head
Max P 1 month ago
parent
commit
fd4c8b2921
  1. 415
      apps/api/src/app/endpoints/ai/ai-agent.chat.helpers.spec.ts
  2. 65
      apps/api/src/app/endpoints/ai/ai-agent.chat.helpers.ts
  3. 272
      apps/api/src/app/endpoints/ai/ai-agent.policy.utils.spec.ts
  4. 52
      apps/api/src/app/endpoints/ai/ai-agent.policy.utils.ts
  5. 292
      apps/api/src/app/endpoints/ai/ai-agent.utils.spec.ts
  6. 79
      apps/api/src/app/endpoints/ai/ai.service.spec.ts
  7. 5
      apps/api/src/app/endpoints/ai/evals/ai-quality-eval.spec.ts
  8. 4
      apps/api/src/app/endpoints/ai/evals/dataset/edge-case.dataset.ts
  9. 2
      apps/api/src/app/endpoints/ai/evals/dataset/multi-step.dataset.ts
  10. 5
      apps/client/src/app/pages/portfolio/analysis/ai-chat-panel/ai-chat-panel.component.html
  11. 14
      apps/client/src/app/pages/portfolio/analysis/ai-chat-panel/ai-chat-panel.component.scss
  12. 1
      apps/client/src/app/pages/portfolio/analysis/ai-chat-panel/ai-chat-panel.component.spec.ts
  13. 18
      tasks/lessons.md
  14. 69
      tasks/tasks.md

415
apps/api/src/app/endpoints/ai/ai-agent.chat.helpers.spec.ts

@ -66,7 +66,7 @@ describe('AiAgentChatHelpers', () => {
}),
languageCode: 'en',
memory: { turns: [] },
query: 'How should I rebalance and invest next?',
query: 'Summarize my concentration risk and next move.',
userCurrency: 'USD'
});
@ -103,8 +103,55 @@ describe('AiAgentChatHelpers', () => {
userCurrency: 'USD'
});
expect(answer).toContain('Next-step allocation:');
expect(answer).toContain('AAPL');
expect(answer).toContain('Option 1 (new money first):');
expect(answer).toContain('Option 2 (sell and rebalance):');
expect(answer).toContain('Assumptions:');
expect(answer).toContain('Next questions:');
});
it('falls back to recommendation options when generated recommendation lacks option structure', async () => {
const generatedText =
'Risk concentration is high and your top position remains elevated. Redirect contributions and gradually reduce concentration while monitoring drift every month.';
const answer = await buildAnswer({
generateText: jest.fn().mockResolvedValue({
text: generatedText
}),
languageCode: 'en',
memory: { turns: [] },
portfolioAnalysis: {
allocationSum: 1,
holdings: [
{
allocationInPercentage: 0.7,
dataSource: DataSource.YAHOO,
symbol: 'AAPL',
valueInBaseCurrency: 7000
},
{
allocationInPercentage: 0.3,
dataSource: DataSource.YAHOO,
symbol: 'MSFT',
valueInBaseCurrency: 3000
}
],
holdingsCount: 2,
totalValueInBaseCurrency: 10000
},
query: 'help me diversify',
riskAssessment: {
concentrationBand: 'high',
hhi: 0.58,
topHoldingAllocation: 0.7
},
userCurrency: 'USD'
});
expect(answer).not.toBe(generatedText);
expect(answer).toContain('Option 1 (new money first):');
expect(answer).toContain('Option 2 (sell and rebalance):');
expect(answer).toContain('Risk notes:');
});
it('uses recommendation-composer prompt structure for action-intent queries', async () => {
@ -186,4 +233,368 @@ describe('AiAgentChatHelpers', () => {
expect(result).toEqual({});
});
it.each([
'What do you remember about me?',
'show my preferences',
'Show preferences',
'What are my preferences?',
'which preferences do you remember',
'which preferences did you save'
])('matches preference recall pattern for "%s"', (query) => {
expect(isPreferenceRecallQuery(query)).toBe(true);
});
it.each([
'Show my portfolio risk',
'Rebalance my holdings',
'hello',
'help me diversify'
])('does not match preference recall pattern for "%s"', (query) => {
expect(isPreferenceRecallQuery(query)).toBe(false);
});
it.each([
'keep answers concise',
'answer briefly',
'responses concise please',
'keep replies short'
])('detects concise preference phrase "%s"', (query) => {
const result = resolvePreferenceUpdate({
query,
userPreferences: {}
});
expect(result.shouldPersist).toBe(true);
expect(result.userPreferences.responseStyle).toBe('concise');
expect(result.acknowledgement).toContain('Saved preference');
});
it.each([
'keep responses detailed',
'answer in detail',
'more detail please',
'responses verbose'
])('detects detailed preference phrase "%s"', (query) => {
const result = resolvePreferenceUpdate({
query,
userPreferences: {}
});
expect(result.shouldPersist).toBe(true);
expect(result.userPreferences.responseStyle).toBe('detailed');
expect(result.acknowledgement).toContain('Saved preference');
});
it('returns no-op when preference query is ambiguous', () => {
const result = resolvePreferenceUpdate({
query: 'keep responses concise and add more detail',
userPreferences: {
responseStyle: 'concise',
updatedAt: '2026-02-24T10:00:00.000Z'
}
});
expect(result.shouldPersist).toBe(false);
expect(result.userPreferences.responseStyle).toBe('concise');
expect(result.acknowledgement).toBeUndefined();
});
it('returns already-saved acknowledgement when style does not change', () => {
const result = resolvePreferenceUpdate({
query: 'keep answers concise',
userPreferences: {
responseStyle: 'concise',
updatedAt: '2026-02-24T10:00:00.000Z'
}
});
expect(result.shouldPersist).toBe(false);
expect(result.acknowledgement).toContain('Preference already saved');
});
it('clears stored preferences when clear command is issued', () => {
const result = resolvePreferenceUpdate({
query: 'clear my saved preferences',
userPreferences: {
responseStyle: 'detailed',
updatedAt: '2026-02-24T10:00:00.000Z'
}
});
expect(result.shouldPersist).toBe(true);
expect(result.userPreferences).toEqual({});
expect(result.acknowledgement).toContain('Cleared');
});
it('returns no-op clear acknowledgement when no preferences exist', () => {
const result = resolvePreferenceUpdate({
query: 'reset preferences',
userPreferences: {}
});
expect(result.shouldPersist).toBe(false);
expect(result.userPreferences).toEqual({});
expect(result.acknowledgement).toContain('No saved cross-session preferences');
});
it('returns deterministic summary for empty preference state', () => {
expect(
createPreferenceSummaryResponse({
userPreferences: {}
})
).toBe('I have no saved cross-session preferences yet.');
});
it('builds fallback with market snapshot when llm output is unavailable', async () => {
const answer = await buildAnswer({
generateText: jest.fn().mockRejectedValue(new Error('offline')),
languageCode: 'en',
marketData: {
quotes: [
{
currency: 'USD',
marketPrice: 210.12,
marketState: 'REGULAR',
symbol: 'AAPL'
}
],
symbolsRequested: ['AAPL']
},
memory: { turns: [] },
query: 'show market quote',
userCurrency: 'USD'
});
expect(answer).toContain('Market snapshot: AAPL: 210.12 USD');
});
it('builds fallback with limited-coverage message when quotes are missing', async () => {
const answer = await buildAnswer({
generateText: jest.fn().mockRejectedValue(new Error('offline')),
languageCode: 'en',
marketData: {
quotes: [],
symbolsRequested: ['AAPL', 'TSLA']
},
memory: { turns: [] },
query: 'show market quote',
userCurrency: 'USD'
});
expect(answer).toContain(
'Market data request completed with limited quote coverage for: AAPL, TSLA.'
);
});
it('limits fallback output to two lines when concise preference is saved', async () => {
const answer = await buildAnswer({
generateText: jest.fn().mockRejectedValue(new Error('offline')),
languageCode: 'en',
memory: {
turns: [
{
answer: 'prior answer',
query: 'prior query',
timestamp: '2026-02-24T10:00:00.000Z',
toolCalls: [{ status: 'success', tool: 'portfolio_analysis' }]
}
]
},
portfolioAnalysis: {
allocationSum: 1,
holdings: [
{
allocationInPercentage: 0.6,
dataSource: DataSource.YAHOO,
symbol: 'AAPL',
valueInBaseCurrency: 6000
},
{
allocationInPercentage: 0.4,
dataSource: DataSource.YAHOO,
symbol: 'MSFT',
valueInBaseCurrency: 4000
}
],
holdingsCount: 2,
totalValueInBaseCurrency: 10000
},
query: 'show allocation overview',
userCurrency: 'USD',
userPreferences: {
responseStyle: 'concise',
updatedAt: '2026-02-24T10:00:00.000Z'
}
});
expect(answer.split('\n').length).toBeLessThanOrEqual(2);
});
it('keeps fallback user-facing by omitting session-memory status lines', async () => {
const answer = await buildAnswer({
generateText: jest.fn().mockRejectedValue(new Error('offline')),
languageCode: 'en',
memory: {
turns: [
{
answer: 'prior answer',
query: 'prior query',
timestamp: '2026-02-24T10:00:00.000Z',
toolCalls: [{ status: 'success', tool: 'portfolio_analysis' }]
},
{
answer: 'second answer',
query: 'second query',
timestamp: '2026-02-24T10:01:00.000Z',
toolCalls: [{ status: 'success', tool: 'risk_assessment' }]
}
]
},
portfolioAnalysis: {
allocationSum: 1,
holdings: [
{
allocationInPercentage: 0.6,
dataSource: DataSource.YAHOO,
symbol: 'AAPL',
valueInBaseCurrency: 6000
},
{
allocationInPercentage: 0.4,
dataSource: DataSource.YAHOO,
symbol: 'MSFT',
valueInBaseCurrency: 4000
}
],
holdingsCount: 2,
totalValueInBaseCurrency: 10000
},
query: 'show allocation overview',
userCurrency: 'USD'
});
expect(answer).toContain('Largest long allocations:');
expect(answer).not.toContain('Session memory applied');
});
it('includes recommendation fallback options when recommendation query is unreliable', async () => {
const answer = await buildAnswer({
generateText: jest.fn().mockResolvedValue({
text: 'Diversify.'
}),
languageCode: 'en',
memory: { turns: [] },
portfolioAnalysis: {
allocationSum: 1,
holdings: [
{
allocationInPercentage: 0.7,
dataSource: DataSource.YAHOO,
symbol: 'AAPL',
valueInBaseCurrency: 7000
},
{
allocationInPercentage: 0.3,
dataSource: DataSource.YAHOO,
symbol: 'VTI',
valueInBaseCurrency: 3000
}
],
holdingsCount: 2,
totalValueInBaseCurrency: 10000
},
query: 'what should i do to diversify',
riskAssessment: {
concentrationBand: 'high',
hhi: 0.58,
topHoldingAllocation: 0.7
},
userCurrency: 'USD'
});
expect(answer).toContain('Option 1 (new money first)');
expect(answer).toContain('Option 2 (sell and rebalance)');
expect(answer).toContain('Next questions:');
});
it('includes stress and rebalance fallback sections when llm fails', async () => {
const answer = await buildAnswer({
generateText: jest.fn().mockRejectedValue(new Error('offline')),
languageCode: 'en',
memory: { turns: [] },
query: 'run rebalance and stress test',
rebalancePlan: {
maxAllocationTarget: 0.35,
overweightHoldings: [
{
currentAllocation: 0.55,
reductionNeeded: 0.2,
symbol: 'AAPL'
}
],
underweightHoldings: [
{
currentAllocation: 0.12,
symbol: 'VTI'
}
]
},
stressTest: {
estimatedDrawdownInBaseCurrency: 3200,
estimatedPortfolioValueAfterShock: 12800,
longExposureInBaseCurrency: 16000,
shockPercentage: 0.2
},
userCurrency: 'USD'
});
expect(answer).toContain('Rebalance priority');
expect(answer).toContain('Stress test (20% downside)');
});
it('falls back to guidance prompt when no context sections exist', async () => {
const answer = await buildAnswer({
generateText: jest.fn().mockRejectedValue(new Error('offline')),
languageCode: 'en',
memory: { turns: [] },
query: 'anything else?',
userCurrency: 'USD'
});
expect(answer).toContain(
'Portfolio context is available. Ask about holdings, risk concentration, or symbol prices for deeper analysis.'
);
});
it('sanitizes malformed user preference payload fields', async () => {
const redisCacheService = {
get: jest.fn().mockResolvedValue(
JSON.stringify({
responseStyle: 'unsupported',
updatedAt: 12345
})
)
};
const result = await getUserPreferences({
redisCacheService: redisCacheService as never,
userId: 'user-1'
});
expect(result).toEqual({});
});
it('returns empty preferences when cache lookup is empty', async () => {
const redisCacheService = {
get: jest.fn().mockResolvedValue(undefined)
};
const result = await getUserPreferences({
redisCacheService: redisCacheService as never,
userId: 'user-1'
});
expect(result).toEqual({});
});
});

65
apps/api/src/app/endpoints/ai/ai-agent.chat.helpers.ts

@ -32,6 +32,17 @@ const PREFERENCE_RECALL_PATTERN =
/\b(?:what do you remember about me|show (?:my )?preferences?|what are my preferences?|which preferences (?:do|did) you (?:remember|save))\b/i;
const RECOMMENDATION_INTENT_PATTERN =
/\b(?:how do i|what should i do|help me|fix|reduce|diversif|deconcentrat|rebalance|recommend|what can i do)\b/i;
const RECOMMENDATION_REQUIRED_SECTIONS = [
/option 1/i,
/option 2/i
];
const RECOMMENDATION_SUPPORTING_SECTIONS = [
/summary:/i,
/assumptions:/i,
/risk notes:/i,
/next questions:/i
];
const MINIMUM_RECOMMENDATION_WORDS = 45;
export const AI_AGENT_MEMORY_MAX_TURNS = 10;
@ -80,7 +91,7 @@ function getResponseInstruction({
return `Write a concise response with actionable insight and avoid speculation.`;
}
function isRecommendationIntentQuery(query: string) {
export function isRecommendationIntentQuery(query: string) {
return RECOMMENDATION_INTENT_PATTERN.test(query.trim().toLowerCase());
}
@ -155,11 +166,9 @@ function buildRecommendationContext({
}
function buildRecommendationFallback({
memory,
portfolioAnalysis,
riskAssessment
}: {
memory: AiAgentMemoryState;
portfolioAnalysis?: PortfolioAnalysisResult;
riskAssessment?: RiskAssessmentResult;
}) {
@ -200,17 +209,13 @@ function buildRecommendationFallback({
.join(', ');
const recommendationSections: string[] = [];
if (memory.turns.length > 0) {
recommendationSections.push(
`Session memory applied from ${memory.turns.length} prior turn(s).`
);
}
recommendationSections.push(
`Summary: concentration is ${riskAssessment?.concentrationBand ?? 'elevated'} with ${topHolding.symbol} at ${currentTopPct}% of long exposure.`,
`Largest long allocations: ${topAllocationsSummary}.`,
`Option 1 (new money first): Next-step allocation: direct 80-100% of new contributions to positions outside ${topHolding.symbol} until the top holding approaches 35%.`,
`Option 2 (sell and rebalance): Next-step allocation: trim ${topHolding.symbol} by about ${reallocationGapPct} percentage points in staged rebalances and rotate into underweight diversified exposures.`,
`Option 1 (new money first): direct 80-100% of new contributions away from ${topHolding.symbol} until top concentration approaches 35%; a neutral split can start at 50-60% broad equity, 20-30% international equity, and 20-25% defensive exposure.`,
`Option 2 (sell and rebalance): trim ${topHolding.symbol} by about ${reallocationGapPct} percentage points in 2-3 staged rebalances and rotate proceeds into underweight diversified sleeves to reduce single-name dependence.`,
`Option 3 (risk-managed path): keep core holdings, reduce incremental exposure to ${topHolding.symbol}, and add defensive or uncorrelated assets while monitoring monthly drift back toward target concentration.`,
'Risk notes: taxable accounts can trigger realized gains when trimming; include fees, spread, and currency exposure checks before execution.',
'Assumptions: taxable status, account type, and product universe were not provided.',
'Next questions: account type (taxable vs tax-advantaged), tax sensitivity (low/medium/high), and whether new-money-only rebalancing is preferred.'
);
@ -218,6 +223,28 @@ function buildRecommendationFallback({
return recommendationSections.join('\n');
}
function isDetailedRecommendationAnswer(answer: string) {
const normalizedAnswer = answer.trim();
if (!normalizedAnswer) {
return false;
}
const words = normalizedAnswer.split(/\s+/).filter(Boolean);
const hasRequiredOptions = RECOMMENDATION_REQUIRED_SECTIONS.every((pattern) => {
return pattern.test(normalizedAnswer);
});
const supportingSectionMatches = RECOMMENDATION_SUPPORTING_SECTIONS.filter((pattern) => {
return pattern.test(normalizedAnswer);
}).length;
return (
words.length >= MINIMUM_RECOMMENDATION_WORDS &&
hasRequiredOptions &&
supportingSectionMatches >= 2
);
}
export function isPreferenceRecallQuery(query: string) {
return PREFERENCE_RECALL_PATTERN.test(query.trim().toLowerCase());
}
@ -346,12 +373,6 @@ export async function buildAnswer({
});
const hasRecommendationIntent = isRecommendationIntentQuery(query);
if (memory.turns.length > 0) {
fallbackSections.push(
`Session memory applied from ${memory.turns.length} prior turn(s).`
);
}
if (riskAssessment) {
fallbackSections.push(
`Risk concentration is ${riskAssessment.concentrationBand}. Top holding allocation is ${(riskAssessment.topHoldingAllocation * 100).toFixed(2)}% with HHI ${riskAssessment.hhi.toFixed(3)}.`
@ -454,12 +475,16 @@ export async function buildAnswer({
`User currency: ${userCurrency}`,
`Language code: ${languageCode}`,
`Query: ${query}`,
`Session turns available: ${memory.turns.length}`,
`Recommendation context (JSON):`,
JSON.stringify(recommendationContext),
`Context summary:`,
fallbackAnswer,
`Task: provide 2-3 policy-bounded options to improve diversification with concrete allocation targets or percentage ranges.`,
`Output sections: Summary, Assumptions, Option 1 (new money first), Option 2 (sell and rebalance), Risk notes, Next questions (max 3).`,
`Each option must include concrete percentage ranges or target bands derived from the recommendation context.`,
`If constraints are missing, provide conditioned pathways for taxable vs tax-advantaged accounts.`,
`Use at least 120 words unless the user explicitly asked for concise responses.`,
`Do not rely on a single hardcoded ETF unless the user explicitly requests a product. Ask for missing constraints when needed.`,
getResponseInstruction({ userPreferences })
].join('\n')
@ -468,6 +493,7 @@ export async function buildAnswer({
`User currency: ${userCurrency}`,
`Language code: ${languageCode}`,
`Query: ${query}`,
`Session turns available: ${memory.turns.length}`,
`Context summary:`,
fallbackAnswer,
getResponseInstruction({ userPreferences })
@ -500,7 +526,9 @@ export async function buildAnswer({
query
})
) {
return generatedAnswer;
if (!hasRecommendationIntent || isDetailedRecommendationAnswer(generatedAnswer)) {
return generatedAnswer;
}
}
} catch {}
finally {
@ -511,7 +539,6 @@ export async function buildAnswer({
if (hasRecommendationIntent) {
const recommendationFallback = buildRecommendationFallback({
memory,
portfolioAnalysis,
riskAssessment
});

272
apps/api/src/app/endpoints/ai/ai-agent.policy.utils.spec.ts

@ -0,0 +1,272 @@
import { AiAgentToolName } from './ai-agent.interfaces';
import {
applyToolExecutionPolicy,
createPolicyRouteResponse,
formatPolicyVerificationDetails
} from './ai-agent.policy.utils';
describe('AiAgentPolicyUtils', () => {
it.each([
'hi',
'hello',
'hey',
'thanks',
'thank you',
'good morning',
'good afternoon',
'good evening'
])('routes greeting-like query "%s" to direct no-tool', (query) => {
const decision = applyToolExecutionPolicy({
plannedTools: ['portfolio_analysis'],
query
});
expect(decision.route).toBe('direct');
expect(decision.blockReason).toBe('no_tool_query');
expect(decision.toolsToExecute).toEqual([]);
});
it.each([
'who are you',
'what are you',
'what can you do',
'how do you work',
'how can i use this',
'help',
'assist me',
'what can you help with'
])('routes assistant capability query "%s" to direct no-tool', (query) => {
const decision = applyToolExecutionPolicy({
plannedTools: [],
query
});
expect(decision.route).toBe('direct');
expect(decision.blockReason).toBe('no_tool_query');
expect(
createPolicyRouteResponse({ policyDecision: decision, query })
).toContain(
'Ghostfolio AI'
);
});
it.each<[string, string]>([
['2+2', '2+2 = 4'],
['what is 5 * 3', '5 * 3 = 15'],
['(2+3)*4', '(2+3)*4 = 20'],
['10 / 4', '10 / 4 = 2.5'],
['7 - 10', '7 - 10 = -3'],
['3.5 + 1.25', '3.5 + 1.25 = 4.75'],
['(8 - 2) / 3', '(8 - 2) / 3 = 2'],
['what is 3*(2+4)?', '3*(2+4) = 18'],
['2 + (3 * (4 - 1))', '2 + (3 * (4 - 1)) = 11'],
['10-3-2', '10-3-2 = 5']
])('returns arithmetic direct response for "%s"', (query, expected) => {
const decision = applyToolExecutionPolicy({
plannedTools: [],
query
});
expect(decision.route).toBe('direct');
expect(
createPolicyRouteResponse({
policyDecision: decision,
query
})
).toBe(expected);
});
it.each(['1/0', '2+*2', '5 % 2'])(
'falls back to capability response for unsupported arithmetic expression "%s"',
(query) => {
const decision = applyToolExecutionPolicy({
plannedTools: [],
query
});
expect(decision.route).toBe('direct');
expect(
createPolicyRouteResponse({
policyDecision: decision,
query
})
).toContain('portfolio analysis');
}
);
it('returns distinct direct no-tool responses for identity and capability prompts', () => {
const identityDecision = applyToolExecutionPolicy({
plannedTools: [],
query: 'who are you?'
});
const capabilityDecision = applyToolExecutionPolicy({
plannedTools: [],
query: 'what can you do?'
});
const identityResponse = createPolicyRouteResponse({
policyDecision: identityDecision,
query: 'who are you?'
});
const capabilityResponse = createPolicyRouteResponse({
policyDecision: capabilityDecision,
query: 'what can you do?'
});
expect(identityResponse).toContain('portfolio copilot');
expect(capabilityResponse).toContain('three modes');
expect(identityResponse).not.toBe(capabilityResponse);
});
it('routes finance read intent with empty planner output to clarify', () => {
const decision = applyToolExecutionPolicy({
plannedTools: [],
query: 'Show portfolio risk and allocation'
});
expect(decision.route).toBe('clarify');
expect(decision.blockReason).toBe('unknown');
expect(createPolicyRouteResponse({ policyDecision: decision })).toContain(
'Which one should I run next?'
);
});
it('routes non-finance empty planner output to direct no-tool', () => {
const decision = applyToolExecutionPolicy({
plannedTools: [],
query: 'Tell me a joke'
});
expect(decision.route).toBe('direct');
expect(decision.blockReason).toBe('no_tool_query');
});
it('deduplicates planned tools while preserving route decisions', () => {
const plannedTools: AiAgentToolName[] = [
'portfolio_analysis',
'portfolio_analysis',
'risk_assessment'
];
const decision = applyToolExecutionPolicy({
plannedTools,
query: 'analyze concentration risk'
});
expect(decision.plannedTools).toEqual([
'portfolio_analysis',
'risk_assessment'
]);
expect(decision.toolsToExecute).toEqual([
'portfolio_analysis',
'risk_assessment'
]);
expect(decision.route).toBe('tools');
});
it.each<{
expectedTools: AiAgentToolName[];
plannedTools: AiAgentToolName[];
query: string;
reason: string;
route?: 'clarify' | 'direct' | 'tools';
}>([
{
expectedTools: ['portfolio_analysis', 'risk_assessment'] as AiAgentToolName[],
plannedTools: [
'portfolio_analysis',
'risk_assessment',
'rebalance_plan'
] as AiAgentToolName[],
query: 'review portfolio concentration risk',
reason: 'read-only intent strips rebalance'
},
{
expectedTools: [
'portfolio_analysis',
'risk_assessment',
'rebalance_plan'
] as AiAgentToolName[],
plannedTools: [
'portfolio_analysis',
'risk_assessment',
'rebalance_plan'
] as AiAgentToolName[],
query: 'invest 2000 and rebalance',
reason: 'action intent preserves rebalance'
},
{
expectedTools: [
'portfolio_analysis',
'risk_assessment',
'rebalance_plan',
'market_data_lookup'
] as AiAgentToolName[],
plannedTools: [
'portfolio_analysis',
'risk_assessment',
'rebalance_plan',
'market_data_lookup'
] as AiAgentToolName[],
query: 'invest and rebalance after checking market quote for NVDA',
reason: 'action + market intent keeps all planned tools'
},
{
expectedTools: ['stress_test'] as AiAgentToolName[],
plannedTools: ['stress_test'] as AiAgentToolName[],
query: 'run stress scenario read-only',
reason: 'read-only stress execution stays allowed'
}
])(
'applies policy gating: $reason',
({ expectedTools, plannedTools, query, route }) => {
const decision = applyToolExecutionPolicy({
plannedTools,
query
});
if (route) {
expect(decision.route).toBe(route);
} else {
expect(decision.route).toBe('tools');
}
expect(decision.toolsToExecute).toEqual(expectedTools);
}
);
it('marks rebalance-only no-action prompts as clarify with needs_confirmation', () => {
const decision = applyToolExecutionPolicy({
plannedTools: ['rebalance_plan'],
query: 'review concentration profile'
});
expect(decision.route).toBe('clarify');
expect(decision.blockReason).toBe('needs_confirmation');
expect(decision.blockedByPolicy).toBe(true);
expect(decision.toolsToExecute).toEqual([]);
});
it('formats policy verification details with planned and executed tools', () => {
const decision = applyToolExecutionPolicy({
plannedTools: [
'portfolio_analysis',
'risk_assessment',
'rebalance_plan'
],
query: 'review concentration risk'
});
const details = formatPolicyVerificationDetails({
policyDecision: decision
});
expect(details).toContain('route=tools');
expect(details).toContain('blocked_by_policy=true');
expect(details).toContain('block_reason=needs_confirmation');
expect(details).toContain(
'planned_tools=portfolio_analysis, risk_assessment, rebalance_plan'
);
expect(details).toContain(
'executed_tools=portfolio_analysis, risk_assessment'
);
});
});

52
apps/api/src/app/endpoints/ai/ai-agent.policy.utils.ts

@ -34,6 +34,11 @@ const SIMPLE_ASSISTANT_QUERY_PATTERNS = [
/^\s*(?:how do you work|how (?:can|do) i use (?:you|this))\s*[!.?]*\s*$/i,
/^\s*(?:help|assist(?: me)?|what can you help with)\s*[!.?]*\s*$/i
];
const DIRECT_IDENTITY_QUERY_PATTERN = /\b(?:who are you|what are you)\b/i;
const DIRECT_USAGE_QUERY_PATTERN =
/\b(?:how do you work|how (?:can|do) i use (?:you|this)|how should i ask)\b/i;
const DIRECT_CAPABILITY_QUERY_PATTERN =
/\b(?:what can (?:you|i) do|help|assist(?: me)?|what can you help with)\b/i;
const READ_ONLY_TOOLS = new Set<AiAgentToolName>([
'portfolio_analysis',
'risk_assessment',
@ -292,6 +297,51 @@ function evaluateSimpleArithmetic(query: string) {
return `${expression} = ${formatNumericResult(result)}`;
}
function createNoToolDirectResponse(query?: string) {
const normalizedQuery = query?.trim().toLowerCase() ?? '';
if (DIRECT_IDENTITY_QUERY_PATTERN.test(normalizedQuery)) {
return [
'I am Ghostfolio AI, your portfolio copilot for this account.',
'I analyze concentration risk, summarize holdings, fetch market quotes, run stress scenarios, and compose diversification or rebalance options.',
'Try one of these:',
'- "Give me a concentration risk summary"',
'- "Show the latest prices for my top holdings"',
'- "Help me diversify with 2-3 optioned plans"'
].join('\n');
}
if (DIRECT_USAGE_QUERY_PATTERN.test(normalizedQuery)) {
return [
'I am Ghostfolio AI. Use short direct prompts and include your goal or constraint.',
'Good pattern: objective + scope + constraint (for example, "reduce top holding below 35% with low tax impact").',
'I can return analysis, recommendation options, stress scenarios, and market snapshots with citations.',
'If key constraints are missing, I will ask up to 3 follow-up questions before giving trade-style steps.'
].join('\n');
}
if (DIRECT_CAPABILITY_QUERY_PATTERN.test(normalizedQuery)) {
return [
'I am Ghostfolio AI. You can use me in three modes: diagnose, recommend, and verify.',
'Diagnose: concentration risk, top exposures, and allocation summaries.',
'Recommend: optioned diversification/rebalance plans with assumptions and next questions.',
'Verify: live quote checks and stress-scenario impact estimates.',
'Try next:',
'- "Analyze my concentration risk"',
'- "Help me diversify with new-money and sell/rotate options"',
'- "Run a 20% downside stress test"'
].join('\n');
}
return [
'I am Ghostfolio AI. I can help with portfolio analysis, concentration risk, market prices, diversification options, and stress scenarios.',
'Try one of these:',
'- "Show my top holdings"',
'- "What is my concentration risk?"',
'- "Help me diversify with actionable options"'
].join('\n');
}
export function applyToolExecutionPolicy({
plannedTools,
query
@ -410,7 +460,7 @@ export function createPolicyRouteResponse({
return arithmeticResult;
}
return `I am your Ghostfolio AI assistant. I can help with portfolio analysis, concentration risk, market prices, rebalancing ideas, and stress scenarios. Try: "Show my top holdings" or "What is my concentration risk?".`;
return createNoToolDirectResponse(query);
}
return `I can help with portfolio analysis, concentration risk, market prices, and stress scenarios. Ask a portfolio question when you are ready.`;

292
apps/api/src/app/endpoints/ai/ai-agent.utils.spec.ts

@ -78,8 +78,10 @@ describe('AiAgentUtils', () => {
expect(decision.route).toBe('direct');
expect(decision.toolsToExecute).toEqual([]);
expect(decision.blockReason).toBe('no_tool_query');
expect(createPolicyRouteResponse({ policyDecision: decision })).toContain(
'Ghostfolio AI assistant'
expect(
createPolicyRouteResponse({ policyDecision: decision, query: 'Who are you?' })
).toContain(
'Ghostfolio AI'
);
});
@ -143,6 +145,14 @@ describe('AiAgentUtils', () => {
).toEqual(['portfolio_analysis', 'risk_assessment', 'rebalance_plan']);
});
it('selects recommendation tools for ambiguous action phrasing', () => {
expect(
determineToolPlan({
query: 'What can I do?'
})
).toEqual(['portfolio_analysis', 'risk_assessment', 'rebalance_plan']);
});
it('selects rebalance tool for rebalance-focused prompts', () => {
expect(
determineToolPlan({
@ -328,4 +338,282 @@ describe('AiAgentUtils', () => {
expect(qualityCheck.check).toBe('response_quality');
expect(qualityCheck.status).toBe('failed');
});
it.each([
{
expected: ['AAPL', 'MSFT'],
query: 'Need AAPL plus MSFT update'
},
{
expected: ['BRK.B', 'VTI'],
query: 'Quote BRK.B and VTI'
},
{
expected: ['QQQ', 'SPY'],
query: 'Check $qqq against $spy'
},
{
expected: ['AAPL'],
query: 'Price for AAPL and THE and WHAT'
},
{
expected: [],
query: 'price for appl and tsla in lowercase without prefixes'
},
{
expected: ['AMD', 'NVDA'],
query: 'Show AMD then $nvda'
},
{
expected: ['BTCUSD'],
query: 'ticker BTCUSD now'
},
{
expected: ['MSFT'],
query: 'Quote MSFT, msft, and $msft'
},
{
expected: ['SHOP.TO'],
query: 'market for SHOP.TO'
},
{
expected: [],
query: 'what can you do'
}
])('extractSymbolsFromQuery handles edge case: $query', ({ expected, query }) => {
expect(extractSymbolsFromQuery(query)).toEqual(expected);
});
it.each([
{
expectedTools: ['portfolio_analysis'],
query: 'portfolio overview'
},
{
expectedTools: ['portfolio_analysis'],
query: 'holdings summary'
},
{
expectedTools: ['portfolio_analysis', 'risk_assessment', 'rebalance_plan'],
query: 'allocation snapshot'
},
{
expectedTools: ['portfolio_analysis'],
query: 'performance review'
},
{
expectedTools: ['portfolio_analysis', 'risk_assessment'],
query: 'risk concentration report'
},
{
expectedTools: ['portfolio_analysis', 'risk_assessment'],
query: 'diversification check'
},
{
expectedTools: ['market_data_lookup'],
query: 'price for NVDA'
},
{
expectedTools: ['market_data_lookup'],
query: 'ticker quote for AAPL'
},
{
expectedTools: ['market_data_lookup'],
query: 'market context'
},
{
expectedTools: ['portfolio_analysis', 'risk_assessment', 'rebalance_plan'],
query: 'where should I invest next'
},
{
expectedTools: ['portfolio_analysis', 'risk_assessment', 'rebalance_plan'],
query: 'trim overweight positions'
},
{
expectedTools: ['portfolio_analysis', 'risk_assessment', 'rebalance_plan'],
query: 'sell and rebalance'
},
{
expectedTools: ['portfolio_analysis', 'risk_assessment', 'stress_test'],
query: 'run a crash stress test'
},
{
expectedTools: ['portfolio_analysis', 'risk_assessment', 'stress_test'],
query: 'drawdown shock analysis'
},
{
expectedTools: ['portfolio_analysis', 'risk_assessment', 'stress_test'],
query: 'stress scenario'
},
{
expectedTools: ['portfolio_analysis', 'risk_assessment', 'rebalance_plan', 'market_data_lookup'],
query: 'rebalance portfolio and quote NVDA'
},
{
expectedTools: ['portfolio_analysis', 'risk_assessment', 'market_data_lookup'],
query: 'analyze risk and market price'
},
{
expectedTools: [],
query: 'who are you'
},
{
expectedTools: [],
query: 'hello there'
},
{
expectedTools: [],
query: 'help me with account settings'
}
])(
'determineToolPlan returns expected tools for "$query"',
({ expectedTools, query }) => {
expect(determineToolPlan({ query })).toEqual(expectedTools);
}
);
it.each([
{
expected: true,
query: 'How should I rebalance and reduce concentration risk?',
text:
'Trim your top position by 4% and direct the next 1500 USD to two smaller holdings. Recheck concentration after each contribution.'
},
{
expected: true,
query: 'What is my market price exposure?',
text:
'AAPL is 210.12 USD and MSFT is 455.90 USD. Market exposure remains concentrated in your top position.'
},
{
expected: false,
query: 'Should I buy more MSFT?',
text:
'As an AI, I cannot provide financial advice and you should consult a financial advisor.'
},
{
expected: false,
query: 'What are my risk metrics right now?',
text:
'Risk seems elevated overall with concentration concerns but no specific values are available.'
},
{
expected: false,
query: 'Where should I invest next?',
text:
'Consider your long-term goals.'
},
{
expected: true,
query: 'Where should I invest next?',
text:
'Allocate 70% of new money to positions outside your top holding and 30% to broad-market exposure. This lowers concentration without forced selling.'
},
{
expected: true,
query: 'Run stress drawdown estimate',
text:
'Under a 20% shock, estimated drawdown is 3200 USD and projected value is 12800 USD. Reduce single-name concentration to improve downside stability.'
},
{
expected: false,
query: 'Run stress drawdown estimate',
text:
'Stress impact could be meaningful and diversification may help over time.'
},
{
expected: false,
query: 'What is concentration risk now?',
text: 'Risk is high.'
},
{
expected: true,
query: 'What is concentration risk now?',
text:
'Top holding is 52.4% with HHI 0.331. Trim 2-4 percentage points from the top position or add to underweight holdings.'
}
])(
'isGeneratedAnswerReliable=$expected for quality gate case',
({ expected, query, text }) => {
expect(
isGeneratedAnswerReliable({
answer: text,
query
})
).toBe(expected);
}
);
it.each([
{
expectedStatus: 'passed',
query: 'How should I rebalance risk?',
text:
'Top holding is 48%. Trim 3% from the largest position and add to two underweight holdings. Re-evaluate concentration in one week.'
},
{
expectedStatus: 'warning',
query: 'Show concentration and market price risk',
text:
'Concentration is elevated and diversification would improve resilience over time.'
},
{
expectedStatus: 'warning',
query: 'Where should I invest next?',
text:
'You can diversify over time by considering additional positions that fit your risk profile and timeline.'
},
{
expectedStatus: 'failed',
query: 'Where should I invest next?',
text:
'As an AI, I cannot provide financial advice and you should consult a financial advisor.'
},
{
expectedStatus: 'warning',
query: 'What is my drawdown risk right now?',
text:
'Drawdown risk exists and depends on current concentration and market volatility.'
},
{
expectedStatus: 'passed',
query: 'What is my drawdown risk right now?',
text:
'At a 20% shock, projected drawdown is 2600 USD. Reduce your top position by 2-3 points to lower downside risk concentration.'
},
{
expectedStatus: 'warning',
query: 'Show my market quote and risk',
text:
'AAPL is high and risk is elevated.'
},
{
expectedStatus: 'passed',
query: 'Show my market quote and risk',
text:
'AAPL is 212.40 USD and top holding concentration is 46.2%. Rebalance by directing new cash into lower-weight holdings.'
},
{
expectedStatus: 'warning',
query: 'Analyze performance and allocation',
text:
'Performance and allocation are stable.'
},
{
expectedStatus: 'passed',
query: 'Analyze performance and allocation',
text:
'Portfolio return is 8.4% and top allocation is 41.0%. Add to underweight positions to keep concentration from rising.'
}
])(
'evaluateAnswerQuality returns $expectedStatus',
({ expectedStatus, query, text }) => {
expect(
evaluateAnswerQuality({
answer: text,
query
}).status
).toBe(expectedStatus);
}
);
});

79
apps/api/src/app/endpoints/ai/ai.service.spec.ts

@ -255,7 +255,7 @@ describe('AiService', () => {
userId: 'user-direct-route'
});
expect(result.answer).toContain('Ghostfolio AI assistant');
expect(result.answer).toContain('I am Ghostfolio AI');
expect(result.toolCalls).toEqual([]);
expect(result.citations).toEqual([]);
expect(dataProviderService.getQuotes).not.toHaveBeenCalled();
@ -291,6 +291,82 @@ describe('AiService', () => {
expect(generateTextSpy).not.toHaveBeenCalled();
});
it('routes ambiguous action follow-up query through recommendation tools when finance memory exists', async () => {
portfolioService.getDetails.mockResolvedValue({
holdings: {
USD: {
allocationInPercentage: 0.665,
dataSource: DataSource.MANUAL,
symbol: 'USD',
valueInBaseCurrency: 6650
},
VTI: {
allocationInPercentage: 0.159,
dataSource: DataSource.YAHOO,
symbol: 'VTI',
valueInBaseCurrency: 1590
},
AAPL: {
allocationInPercentage: 0.085,
dataSource: DataSource.YAHOO,
symbol: 'AAPL',
valueInBaseCurrency: 850
}
}
});
redisCacheService.get.mockImplementation(async (key: string) => {
if (key.startsWith('ai-agent-memory-user-follow-up-')) {
return JSON.stringify({
turns: [
{
answer:
'Risk concentration is high. Top holding allocation is 66.5%.',
query: 'help me diversify',
timestamp: '2026-02-24T12:00:00.000Z',
toolCalls: [
{ status: 'success', tool: 'portfolio_analysis' },
{ status: 'success', tool: 'risk_assessment' }
]
}
]
});
}
return undefined;
});
jest.spyOn(subject, 'generateText').mockResolvedValue({
text: 'Improve concentration by redirecting new cash to underweight holdings, trimming the top position in stages, and reassessing risk after each rebalance checkpoint.'
} as never);
const result = await subject.chat({
languageCode: 'en',
query: 'what can i do?',
sessionId: 'session-follow-up',
userCurrency: 'USD',
userId: 'user-follow-up'
});
expect(result.answer).toContain('Option 1 (new money first):');
expect(result.answer).toContain('Option 2 (sell and rebalance):');
expect(result.toolCalls).toEqual(
expect.arrayContaining([
expect.objectContaining({
status: 'success',
tool: 'portfolio_analysis'
}),
expect.objectContaining({
status: 'success',
tool: 'risk_assessment'
})
])
);
expect(subject.generateText).toHaveBeenCalledWith(
expect.objectContaining({
prompt: expect.stringContaining('Recommendation context (JSON):')
})
);
});
it('persists and recalls cross-session user preferences for the same user', async () => {
const redisStore = new Map<string, string>();
redisCacheService.get.mockImplementation(async (key: string) => {
@ -466,7 +542,6 @@ describe('AiService', () => {
userId: 'user-diversify-1'
});
expect(result.answer).toContain('Next-step allocation:');
expect(result.answer).toContain('AAPL');
expect(result.answer).toContain('Option 1 (new money first):');
expect(result.answer).toContain('Option 2 (sell and rebalance):');

5
apps/api/src/app/endpoints/ai/evals/ai-quality-eval.spec.ts

@ -105,8 +105,9 @@ describe('AiReplyQualityEval', () => {
userId: 'quality-user'
});
expect(response.answer).toContain('Next-step allocation:');
expect(response.answer).toContain('Option 1 (new money first):');
expect(response.answer).toContain('Largest long allocations:');
expect(response.answer).toContain('Risk notes:');
expect(response.answer).not.toContain('As an AI');
expect(response.verification).toEqual(
expect.arrayContaining([
@ -127,7 +128,7 @@ describe('AiReplyQualityEval', () => {
const response = await subject.chat({
languageCode: 'en',
query: 'How should I rebalance and invest next month?',
query: 'Summarize my concentration risk and allocation status.',
sessionId: 'quality-eval-generated',
userCurrency: 'USD',
userId: 'quality-user'

4
apps/api/src/app/endpoints/ai/evals/dataset/edge-case.dataset.ts

@ -141,7 +141,7 @@ export const EDGE_CASE_EVAL_CASES: AiAgentMvpEvalCase[] = [
createEvalCase({
category: 'edge_case',
expected: {
answerIncludes: ['Session memory applied from 2 prior turn(s).'],
answerIncludes: ['Largest long allocations:'],
memoryTurnsAtLeast: 3,
requiredTools: ['portfolio_analysis']
},
@ -158,7 +158,7 @@ export const EDGE_CASE_EVAL_CASES: AiAgentMvpEvalCase[] = [
createEvalCase({
category: 'edge_case',
expected: {
answerIncludes: ['Session memory applied from 1 prior turn(s).'],
answerIncludes: ['Largest long allocations:'],
memoryTurnsAtLeast: 2,
requiredTools: ['portfolio_analysis']
},

2
apps/api/src/app/endpoints/ai/evals/dataset/multi-step.dataset.ts

@ -131,7 +131,7 @@ export const MULTI_STEP_EVAL_CASES: AiAgentMvpEvalCase[] = [
createEvalCase({
category: 'multi_step',
expected: {
answerIncludes: ['Session memory applied from 1 prior turn(s).'],
answerIncludes: ['Summary: concentration is'],
memoryTurnsAtLeast: 2,
requiredTools: ['portfolio_analysis', 'risk_assessment', 'rebalance_plan']
},

5
apps/client/src/app/pages/portfolio/analysis/ai-chat-panel/ai-chat-panel.component.html

@ -77,12 +77,13 @@
aria-label="Show response details"
class="chat-details-trigger ml-2"
i18n-aria-label
mat-icon-button
mat-stroked-button
type="button"
[matMenuTriggerFor]="responseDetailsMenu"
(click)="onOpenResponseDetails(message.response)"
>
<mat-icon aria-hidden="true">info_outline</mat-icon>
<mat-icon aria-hidden="true">info</mat-icon>
<span i18n>Info</span>
</button>
}
</div>

14
apps/client/src/app/pages/portfolio/analysis/ai-chat-panel/ai-chat-panel.component.scss

@ -66,16 +66,20 @@ textarea::selection {
}
.chat-details-trigger {
align-items: center;
color: var(--ai-chat-muted-text);
height: 1.625rem;
display: inline-flex;
gap: 0.2rem;
height: 1.75rem;
line-height: 1;
width: 1.625rem;
min-width: 0;
padding: 0 0.4rem;
}
.chat-details-trigger mat-icon {
font-size: 1rem;
height: 1rem;
width: 1rem;
font-size: 0.95rem;
height: 0.95rem;
width: 0.95rem;
}
.prompt-list {

1
apps/client/src/app/pages/portfolio/analysis/ai-chat-panel/ai-chat-panel.component.spec.ts

@ -181,6 +181,7 @@ describe('GfAiChatPanelComponent', () => {
) as HTMLButtonElement | null;
expect(detailsTrigger).toBeTruthy();
expect(detailsTrigger?.textContent).toContain('Info');
detailsTrigger?.click();
fixture.detectChanges();

18
tasks/lessons.md

@ -36,6 +36,22 @@ Updated: 2026-02-24
Mistake: Treated npm publication as the only completion path for contribution evidence
Rule: When package publication is blocked, ship the tool in-repo and open upstream PRs in high-signal repositories to preserve external contribution progress.
9. Context: Memory feature validation after chat/session persistence rollout
9. Context: AI chat UX feedback on response readability
Mistake: Diagnostic metadata remained inline with assistant answers, reducing answer clarity
Rule: Keep primary assistant messages user-facing only and place diagnostics (confidence, citations, verification, observability) behind an explicit info-triggered popover.
10. Context: Memory feature validation after chat/session persistence rollout
Mistake: Session-scoped memory shipped without an explicit user-scoped preference path for cross-session continuity.
Rule: When memory requirements mention user preferences, implement and test both session memory and user-level memory keyed independently from session IDs.
11. Context: Large table-driven Jest test expansion for policy routing and arithmetic behavior
Mistake: Mixed tuple/string/object datasets under a single typed `it.each` signature created preventable TypeScript compile failures.
Rule: Keep each table shape typed independently (`it.each<[...tuple]>()` for positional rows and object generics only for object rows).
12. Context: Ambiguous user follow-up prompts in a finance assistant ("what can i do?")
Mistake: Capability-style routing captured actionable follow-up intent and bypassed tool-backed recommendation generation.
Rule: Treat ambiguous action follow-ups as recommendation intent when finance context exists, and lock this with deterministic service tests.
13. Context: Recommendation replies looked short and repetitive even when tool context was available
Mistake: Reliability gating accepted generic recommendation prose that lacked option sections and actionable structure.
Rule: For recommendation-intent prompts, enforce sectioned output quality gates (Option 1/2 + assumptions/risk notes/next questions) and fall back to deterministic option plans when structure is missing.

69
tasks/tasks.md

@ -226,14 +226,62 @@ Last updated: 2026-02-24
## Session Plan (2026-02-24, Chat Details Popover UX)
- [ ] Audit current AI chat response rendering and identify diagnostics shown inline.
- [ ] Move diagnostics (confidence, citations, verification, observability) behind an info-triggered popover per assistant message.
- [ ] Keep main assistant response focused on user-facing answer and retain feedback controls in primary view.
- [ ] Update chat panel tests to assert info-trigger behavior and diagnostics visibility expectations.
- [ ] Run focused frontend verification and update trackers (`Tasks.md`, `tasks/tasks.md`, `tasks/lessons.md`).
- [x] Audit current AI chat response rendering and identify diagnostics shown inline.
- [x] Move diagnostics (confidence, citations, verification, observability) behind an info-triggered popover per assistant message.
- [x] Keep main assistant response focused on user-facing answer and retain feedback controls in primary view.
- [x] Update chat panel tests to assert info-trigger behavior and diagnostics visibility expectations.
- [x] Run focused frontend verification and update trackers (`Tasks.md`, `tasks/tasks.md`, `tasks/lessons.md`).
## Session Plan (2026-02-24, Diversification Reply Actionability Fix)
- [x] Reproduce and inspect `help me diversify` routing and fallback behavior in AI service.
- [x] Add deterministic diversification-action fallback guidance when LLM output is unavailable or rejected.
- [x] Add targeted helper/service tests for diversification prompt behavior.
- [x] Run focused verification (`npm run test:ai`) and update tracker notes.
## Session Plan (2026-02-24, Recommendation Composer Prompting)
- [ ] Add action-intent recommendation mode detection in AI answer builder.
- [ ] Build and inject structured recommendation context (concentration + constraints placeholders) into LLM prompting.
- [ ] Strengthen recommendation instructions to produce option-based actionable plans with assumptions and follow-up questions.
- [ ] Add targeted tests for recommendation-mode prompt composition.
- [ ] Run focused verification (`npm run test:ai`) and update tracker notes.
## Session Plan (2026-02-24, Chat Reply Detail + Hidden Diagnostics Follow-up)
- [x] Remove session-memory status text from user-facing assistant answers.
- [x] Improve no-tool direct assistant replies so identity/help prompts produce distinct, more detailed responses.
- [x] Enforce recommendation-mode fallback structure when generated answers are too short or lack option sections.
- [x] Update chat diagnostics trigger to show `i` icon with visible `Info` label while keeping details hidden in popover.
- [x] Run focused verification on touched API/client suites and update tracker evidence.
## Session Plan (2026-02-24, AI + Eval Coverage to 100+)
- [x] Audit current AI test count and eval dataset size against 100+ targets.
- [x] Add deterministic AI unit tests for policy routing, arithmetic handling, planning heuristics, and response quality guards.
- [x] Expand eval datasets to at least 100 cases with balanced category coverage and edge/adversarial depth.
- [x] Raise eval runner guardrails to assert 100+ total dataset size and updated category minimums.
- [x] Run focused verification (`npm run test:ai`, `npm run test:mvp-eval`) and update trackers (`Tasks.md`, `tasks/tasks.md`, `tasks/lessons.md`).
## Session Plan (2026-02-24, Recommendation Follow-Up Routing Fix)
- [x] Reproduce and isolate why `what can i do?` returns capability fallback after a risk turn.
- [x] Route ambiguous action follow-up queries with recent finance memory through tools to unlock recommendation-mode generation.
- [x] Add deterministic AI service tests for follow-up recommendation behavior and guard against capability fallback regression.
- [x] Run focused verification (`npm run test:ai`) and update trackers (`Tasks.md`, `tasks/tasks.md`, `tasks/lessons.md`).
## Verification Notes
- Chat reply detail + hidden diagnostics follow-up verification (local, 2026-02-24):
- `npm run test:ai` (10/10 suites passed, 181/181 tests)
- `npm run test:mvp-eval` (1/1 suite passed, 2/2 tests)
- `npx dotenv-cli -e .env.example -- npx jest apps/client/src/app/pages/portfolio/analysis/ai-chat-panel/ai-chat-panel.component.spec.ts --config apps/client/jest.config.ts` (8/8 tests passed)
- `npx nx run api:lint` (passed with existing workspace warnings)
- `npx nx run client:lint` (passed with existing workspace warnings)
- Chat details popover UX verification (2026-02-24):
- `npx dotenv-cli -e .env.example -- npx jest apps/client/src/app/pages/portfolio/analysis/ai-chat-panel/ai-chat-panel.component.spec.ts --config apps/client/jest.config.ts`
- `npx nx run client:lint`
- `npx nx run client:build:development-en`
- `nx run api:lint` completed successfully (existing workspace warnings only).
- Full `nx test api` currently fails in pre-existing portfolio calculator suites unrelated to AI endpoint changes.
- Focused MVP verification passed:
@ -312,3 +360,14 @@ Last updated: 2026-02-24
- Cross-session user preference memory verification (local, 2026-02-24):
- `npm run test:ai` (9/9 suites passed, 54/54 tests)
- `npx nx run api:lint` (passes with existing workspace warnings)
- Diversification reply actionability fix verification (local, 2026-02-24):
- `npm run test:ai` (9/9 suites passed, 56/56 tests)
- `npx nx run api:lint` (passes with existing workspace warnings)
- AI + eval coverage to 100+ verification (local, 2026-02-24):
- `npm run test:ai` (10/10 suites passed, 176/176 tests)
- `npm run test:mvp-eval` (1/1 suite passed; dataset and pass-rate gates enforced)
- `npx tsx -e "import { AI_AGENT_MVP_EVAL_DATASET } from './apps/api/src/app/endpoints/ai/evals/mvp-eval.dataset.ts'; ..."` (109 total eval cases; category counts: happy_path 43, edge_case 26, adversarial 20, multi_step 20)
- Recommendation follow-up routing fix verification (local, 2026-02-24):
- `npx dotenv-cli -e .env.example -- npx jest apps/api/src/app/endpoints/ai/ai.service.spec.ts apps/api/src/app/endpoints/ai/ai-agent.chat.helpers.spec.ts apps/api/src/app/endpoints/ai/ai-agent.utils.spec.ts apps/api/src/app/endpoints/ai/ai-agent.policy.utils.spec.ts --config apps/api/jest.config.ts` (4/4 suites passed, 167/167 tests)
- `npm run test:ai` (10/10 suites passed, 181/181 tests)
- `npm run test:mvp-eval` (1/1 suite passed)

Loading…
Cancel
Save