From fd4c8b29218c5759bf4a1dc2f441428e9fe2cd9a Mon Sep 17 00:00:00 2001 From: Max P Date: Tue, 24 Feb 2026 12:38:37 -0500 Subject: [PATCH] fix(ai-chat): hide memory line and enforce richer recommendation replies --- .../ai/ai-agent.chat.helpers.spec.ts | 415 +++++++++++++++++- .../app/endpoints/ai/ai-agent.chat.helpers.ts | 65 ++- .../ai/ai-agent.policy.utils.spec.ts | 272 ++++++++++++ .../app/endpoints/ai/ai-agent.policy.utils.ts | 52 ++- .../app/endpoints/ai/ai-agent.utils.spec.ts | 292 +++++++++++- .../src/app/endpoints/ai/ai.service.spec.ts | 79 +++- .../ai/evals/ai-quality-eval.spec.ts | 5 +- .../ai/evals/dataset/edge-case.dataset.ts | 4 +- .../ai/evals/dataset/multi-step.dataset.ts | 2 +- .../ai-chat-panel.component.html | 5 +- .../ai-chat-panel.component.scss | 14 +- .../ai-chat-panel.component.spec.ts | 1 + tasks/lessons.md | 18 +- tasks/tasks.md | 69 ++- 14 files changed, 1249 insertions(+), 44 deletions(-) create mode 100644 apps/api/src/app/endpoints/ai/ai-agent.policy.utils.spec.ts diff --git a/apps/api/src/app/endpoints/ai/ai-agent.chat.helpers.spec.ts b/apps/api/src/app/endpoints/ai/ai-agent.chat.helpers.spec.ts index 88c78cc1d..42adeccd8 100644 --- a/apps/api/src/app/endpoints/ai/ai-agent.chat.helpers.spec.ts +++ b/apps/api/src/app/endpoints/ai/ai-agent.chat.helpers.spec.ts @@ -66,7 +66,7 @@ describe('AiAgentChatHelpers', () => { }), languageCode: 'en', memory: { turns: [] }, - query: 'How should I rebalance and invest next?', + query: 'Summarize my concentration risk and next move.', userCurrency: 'USD' }); @@ -103,8 +103,55 @@ describe('AiAgentChatHelpers', () => { userCurrency: 'USD' }); - expect(answer).toContain('Next-step allocation:'); expect(answer).toContain('AAPL'); + expect(answer).toContain('Option 1 (new money first):'); + expect(answer).toContain('Option 2 (sell and rebalance):'); + expect(answer).toContain('Assumptions:'); + expect(answer).toContain('Next questions:'); + }); + + it('falls back to recommendation options when generated recommendation lacks option structure', async () => { + const generatedText = + 'Risk concentration is high and your top position remains elevated. Redirect contributions and gradually reduce concentration while monitoring drift every month.'; + + const answer = await buildAnswer({ + generateText: jest.fn().mockResolvedValue({ + text: generatedText + }), + languageCode: 'en', + memory: { turns: [] }, + portfolioAnalysis: { + allocationSum: 1, + holdings: [ + { + allocationInPercentage: 0.7, + dataSource: DataSource.YAHOO, + symbol: 'AAPL', + valueInBaseCurrency: 7000 + }, + { + allocationInPercentage: 0.3, + dataSource: DataSource.YAHOO, + symbol: 'MSFT', + valueInBaseCurrency: 3000 + } + ], + holdingsCount: 2, + totalValueInBaseCurrency: 10000 + }, + query: 'help me diversify', + riskAssessment: { + concentrationBand: 'high', + hhi: 0.58, + topHoldingAllocation: 0.7 + }, + userCurrency: 'USD' + }); + + expect(answer).not.toBe(generatedText); + expect(answer).toContain('Option 1 (new money first):'); + expect(answer).toContain('Option 2 (sell and rebalance):'); + expect(answer).toContain('Risk notes:'); }); it('uses recommendation-composer prompt structure for action-intent queries', async () => { @@ -186,4 +233,368 @@ describe('AiAgentChatHelpers', () => { expect(result).toEqual({}); }); + + it.each([ + 'What do you remember about me?', + 'show my preferences', + 'Show preferences', + 'What are my preferences?', + 'which preferences do you remember', + 'which preferences did you save' + ])('matches preference recall pattern for "%s"', (query) => { + expect(isPreferenceRecallQuery(query)).toBe(true); + }); + + it.each([ + 'Show my portfolio risk', + 'Rebalance my holdings', + 'hello', + 'help me diversify' + ])('does not match preference recall pattern for "%s"', (query) => { + expect(isPreferenceRecallQuery(query)).toBe(false); + }); + + it.each([ + 'keep answers concise', + 'answer briefly', + 'responses concise please', + 'keep replies short' + ])('detects concise preference phrase "%s"', (query) => { + const result = resolvePreferenceUpdate({ + query, + userPreferences: {} + }); + + expect(result.shouldPersist).toBe(true); + expect(result.userPreferences.responseStyle).toBe('concise'); + expect(result.acknowledgement).toContain('Saved preference'); + }); + + it.each([ + 'keep responses detailed', + 'answer in detail', + 'more detail please', + 'responses verbose' + ])('detects detailed preference phrase "%s"', (query) => { + const result = resolvePreferenceUpdate({ + query, + userPreferences: {} + }); + + expect(result.shouldPersist).toBe(true); + expect(result.userPreferences.responseStyle).toBe('detailed'); + expect(result.acknowledgement).toContain('Saved preference'); + }); + + it('returns no-op when preference query is ambiguous', () => { + const result = resolvePreferenceUpdate({ + query: 'keep responses concise and add more detail', + userPreferences: { + responseStyle: 'concise', + updatedAt: '2026-02-24T10:00:00.000Z' + } + }); + + expect(result.shouldPersist).toBe(false); + expect(result.userPreferences.responseStyle).toBe('concise'); + expect(result.acknowledgement).toBeUndefined(); + }); + + it('returns already-saved acknowledgement when style does not change', () => { + const result = resolvePreferenceUpdate({ + query: 'keep answers concise', + userPreferences: { + responseStyle: 'concise', + updatedAt: '2026-02-24T10:00:00.000Z' + } + }); + + expect(result.shouldPersist).toBe(false); + expect(result.acknowledgement).toContain('Preference already saved'); + }); + + it('clears stored preferences when clear command is issued', () => { + const result = resolvePreferenceUpdate({ + query: 'clear my saved preferences', + userPreferences: { + responseStyle: 'detailed', + updatedAt: '2026-02-24T10:00:00.000Z' + } + }); + + expect(result.shouldPersist).toBe(true); + expect(result.userPreferences).toEqual({}); + expect(result.acknowledgement).toContain('Cleared'); + }); + + it('returns no-op clear acknowledgement when no preferences exist', () => { + const result = resolvePreferenceUpdate({ + query: 'reset preferences', + userPreferences: {} + }); + + expect(result.shouldPersist).toBe(false); + expect(result.userPreferences).toEqual({}); + expect(result.acknowledgement).toContain('No saved cross-session preferences'); + }); + + it('returns deterministic summary for empty preference state', () => { + expect( + createPreferenceSummaryResponse({ + userPreferences: {} + }) + ).toBe('I have no saved cross-session preferences yet.'); + }); + + it('builds fallback with market snapshot when llm output is unavailable', async () => { + const answer = await buildAnswer({ + generateText: jest.fn().mockRejectedValue(new Error('offline')), + languageCode: 'en', + marketData: { + quotes: [ + { + currency: 'USD', + marketPrice: 210.12, + marketState: 'REGULAR', + symbol: 'AAPL' + } + ], + symbolsRequested: ['AAPL'] + }, + memory: { turns: [] }, + query: 'show market quote', + userCurrency: 'USD' + }); + + expect(answer).toContain('Market snapshot: AAPL: 210.12 USD'); + }); + + it('builds fallback with limited-coverage message when quotes are missing', async () => { + const answer = await buildAnswer({ + generateText: jest.fn().mockRejectedValue(new Error('offline')), + languageCode: 'en', + marketData: { + quotes: [], + symbolsRequested: ['AAPL', 'TSLA'] + }, + memory: { turns: [] }, + query: 'show market quote', + userCurrency: 'USD' + }); + + expect(answer).toContain( + 'Market data request completed with limited quote coverage for: AAPL, TSLA.' + ); + }); + + it('limits fallback output to two lines when concise preference is saved', async () => { + const answer = await buildAnswer({ + generateText: jest.fn().mockRejectedValue(new Error('offline')), + languageCode: 'en', + memory: { + turns: [ + { + answer: 'prior answer', + query: 'prior query', + timestamp: '2026-02-24T10:00:00.000Z', + toolCalls: [{ status: 'success', tool: 'portfolio_analysis' }] + } + ] + }, + portfolioAnalysis: { + allocationSum: 1, + holdings: [ + { + allocationInPercentage: 0.6, + dataSource: DataSource.YAHOO, + symbol: 'AAPL', + valueInBaseCurrency: 6000 + }, + { + allocationInPercentage: 0.4, + dataSource: DataSource.YAHOO, + symbol: 'MSFT', + valueInBaseCurrency: 4000 + } + ], + holdingsCount: 2, + totalValueInBaseCurrency: 10000 + }, + query: 'show allocation overview', + userCurrency: 'USD', + userPreferences: { + responseStyle: 'concise', + updatedAt: '2026-02-24T10:00:00.000Z' + } + }); + + expect(answer.split('\n').length).toBeLessThanOrEqual(2); + }); + + it('keeps fallback user-facing by omitting session-memory status lines', async () => { + const answer = await buildAnswer({ + generateText: jest.fn().mockRejectedValue(new Error('offline')), + languageCode: 'en', + memory: { + turns: [ + { + answer: 'prior answer', + query: 'prior query', + timestamp: '2026-02-24T10:00:00.000Z', + toolCalls: [{ status: 'success', tool: 'portfolio_analysis' }] + }, + { + answer: 'second answer', + query: 'second query', + timestamp: '2026-02-24T10:01:00.000Z', + toolCalls: [{ status: 'success', tool: 'risk_assessment' }] + } + ] + }, + portfolioAnalysis: { + allocationSum: 1, + holdings: [ + { + allocationInPercentage: 0.6, + dataSource: DataSource.YAHOO, + symbol: 'AAPL', + valueInBaseCurrency: 6000 + }, + { + allocationInPercentage: 0.4, + dataSource: DataSource.YAHOO, + symbol: 'MSFT', + valueInBaseCurrency: 4000 + } + ], + holdingsCount: 2, + totalValueInBaseCurrency: 10000 + }, + query: 'show allocation overview', + userCurrency: 'USD' + }); + + expect(answer).toContain('Largest long allocations:'); + expect(answer).not.toContain('Session memory applied'); + }); + + it('includes recommendation fallback options when recommendation query is unreliable', async () => { + const answer = await buildAnswer({ + generateText: jest.fn().mockResolvedValue({ + text: 'Diversify.' + }), + languageCode: 'en', + memory: { turns: [] }, + portfolioAnalysis: { + allocationSum: 1, + holdings: [ + { + allocationInPercentage: 0.7, + dataSource: DataSource.YAHOO, + symbol: 'AAPL', + valueInBaseCurrency: 7000 + }, + { + allocationInPercentage: 0.3, + dataSource: DataSource.YAHOO, + symbol: 'VTI', + valueInBaseCurrency: 3000 + } + ], + holdingsCount: 2, + totalValueInBaseCurrency: 10000 + }, + query: 'what should i do to diversify', + riskAssessment: { + concentrationBand: 'high', + hhi: 0.58, + topHoldingAllocation: 0.7 + }, + userCurrency: 'USD' + }); + + expect(answer).toContain('Option 1 (new money first)'); + expect(answer).toContain('Option 2 (sell and rebalance)'); + expect(answer).toContain('Next questions:'); + }); + + it('includes stress and rebalance fallback sections when llm fails', async () => { + const answer = await buildAnswer({ + generateText: jest.fn().mockRejectedValue(new Error('offline')), + languageCode: 'en', + memory: { turns: [] }, + query: 'run rebalance and stress test', + rebalancePlan: { + maxAllocationTarget: 0.35, + overweightHoldings: [ + { + currentAllocation: 0.55, + reductionNeeded: 0.2, + symbol: 'AAPL' + } + ], + underweightHoldings: [ + { + currentAllocation: 0.12, + symbol: 'VTI' + } + ] + }, + stressTest: { + estimatedDrawdownInBaseCurrency: 3200, + estimatedPortfolioValueAfterShock: 12800, + longExposureInBaseCurrency: 16000, + shockPercentage: 0.2 + }, + userCurrency: 'USD' + }); + + expect(answer).toContain('Rebalance priority'); + expect(answer).toContain('Stress test (20% downside)'); + }); + + it('falls back to guidance prompt when no context sections exist', async () => { + const answer = await buildAnswer({ + generateText: jest.fn().mockRejectedValue(new Error('offline')), + languageCode: 'en', + memory: { turns: [] }, + query: 'anything else?', + userCurrency: 'USD' + }); + + expect(answer).toContain( + 'Portfolio context is available. Ask about holdings, risk concentration, or symbol prices for deeper analysis.' + ); + }); + + it('sanitizes malformed user preference payload fields', async () => { + const redisCacheService = { + get: jest.fn().mockResolvedValue( + JSON.stringify({ + responseStyle: 'unsupported', + updatedAt: 12345 + }) + ) + }; + + const result = await getUserPreferences({ + redisCacheService: redisCacheService as never, + userId: 'user-1' + }); + + expect(result).toEqual({}); + }); + + it('returns empty preferences when cache lookup is empty', async () => { + const redisCacheService = { + get: jest.fn().mockResolvedValue(undefined) + }; + + const result = await getUserPreferences({ + redisCacheService: redisCacheService as never, + userId: 'user-1' + }); + + expect(result).toEqual({}); + }); }); diff --git a/apps/api/src/app/endpoints/ai/ai-agent.chat.helpers.ts b/apps/api/src/app/endpoints/ai/ai-agent.chat.helpers.ts index 1dc3bf46b..23c470e06 100644 --- a/apps/api/src/app/endpoints/ai/ai-agent.chat.helpers.ts +++ b/apps/api/src/app/endpoints/ai/ai-agent.chat.helpers.ts @@ -32,6 +32,17 @@ const PREFERENCE_RECALL_PATTERN = /\b(?:what do you remember about me|show (?:my )?preferences?|what are my preferences?|which preferences (?:do|did) you (?:remember|save))\b/i; const RECOMMENDATION_INTENT_PATTERN = /\b(?:how do i|what should i do|help me|fix|reduce|diversif|deconcentrat|rebalance|recommend|what can i do)\b/i; +const RECOMMENDATION_REQUIRED_SECTIONS = [ + /option 1/i, + /option 2/i +]; +const RECOMMENDATION_SUPPORTING_SECTIONS = [ + /summary:/i, + /assumptions:/i, + /risk notes:/i, + /next questions:/i +]; +const MINIMUM_RECOMMENDATION_WORDS = 45; export const AI_AGENT_MEMORY_MAX_TURNS = 10; @@ -80,7 +91,7 @@ function getResponseInstruction({ return `Write a concise response with actionable insight and avoid speculation.`; } -function isRecommendationIntentQuery(query: string) { +export function isRecommendationIntentQuery(query: string) { return RECOMMENDATION_INTENT_PATTERN.test(query.trim().toLowerCase()); } @@ -155,11 +166,9 @@ function buildRecommendationContext({ } function buildRecommendationFallback({ - memory, portfolioAnalysis, riskAssessment }: { - memory: AiAgentMemoryState; portfolioAnalysis?: PortfolioAnalysisResult; riskAssessment?: RiskAssessmentResult; }) { @@ -200,17 +209,13 @@ function buildRecommendationFallback({ .join(', '); const recommendationSections: string[] = []; - if (memory.turns.length > 0) { - recommendationSections.push( - `Session memory applied from ${memory.turns.length} prior turn(s).` - ); - } - recommendationSections.push( `Summary: concentration is ${riskAssessment?.concentrationBand ?? 'elevated'} with ${topHolding.symbol} at ${currentTopPct}% of long exposure.`, `Largest long allocations: ${topAllocationsSummary}.`, - `Option 1 (new money first): Next-step allocation: direct 80-100% of new contributions to positions outside ${topHolding.symbol} until the top holding approaches 35%.`, - `Option 2 (sell and rebalance): Next-step allocation: trim ${topHolding.symbol} by about ${reallocationGapPct} percentage points in staged rebalances and rotate into underweight diversified exposures.`, + `Option 1 (new money first): direct 80-100% of new contributions away from ${topHolding.symbol} until top concentration approaches 35%; a neutral split can start at 50-60% broad equity, 20-30% international equity, and 20-25% defensive exposure.`, + `Option 2 (sell and rebalance): trim ${topHolding.symbol} by about ${reallocationGapPct} percentage points in 2-3 staged rebalances and rotate proceeds into underweight diversified sleeves to reduce single-name dependence.`, + `Option 3 (risk-managed path): keep core holdings, reduce incremental exposure to ${topHolding.symbol}, and add defensive or uncorrelated assets while monitoring monthly drift back toward target concentration.`, + 'Risk notes: taxable accounts can trigger realized gains when trimming; include fees, spread, and currency exposure checks before execution.', 'Assumptions: taxable status, account type, and product universe were not provided.', 'Next questions: account type (taxable vs tax-advantaged), tax sensitivity (low/medium/high), and whether new-money-only rebalancing is preferred.' ); @@ -218,6 +223,28 @@ function buildRecommendationFallback({ return recommendationSections.join('\n'); } +function isDetailedRecommendationAnswer(answer: string) { + const normalizedAnswer = answer.trim(); + + if (!normalizedAnswer) { + return false; + } + + const words = normalizedAnswer.split(/\s+/).filter(Boolean); + const hasRequiredOptions = RECOMMENDATION_REQUIRED_SECTIONS.every((pattern) => { + return pattern.test(normalizedAnswer); + }); + const supportingSectionMatches = RECOMMENDATION_SUPPORTING_SECTIONS.filter((pattern) => { + return pattern.test(normalizedAnswer); + }).length; + + return ( + words.length >= MINIMUM_RECOMMENDATION_WORDS && + hasRequiredOptions && + supportingSectionMatches >= 2 + ); +} + export function isPreferenceRecallQuery(query: string) { return PREFERENCE_RECALL_PATTERN.test(query.trim().toLowerCase()); } @@ -346,12 +373,6 @@ export async function buildAnswer({ }); const hasRecommendationIntent = isRecommendationIntentQuery(query); - if (memory.turns.length > 0) { - fallbackSections.push( - `Session memory applied from ${memory.turns.length} prior turn(s).` - ); - } - if (riskAssessment) { fallbackSections.push( `Risk concentration is ${riskAssessment.concentrationBand}. Top holding allocation is ${(riskAssessment.topHoldingAllocation * 100).toFixed(2)}% with HHI ${riskAssessment.hhi.toFixed(3)}.` @@ -454,12 +475,16 @@ export async function buildAnswer({ `User currency: ${userCurrency}`, `Language code: ${languageCode}`, `Query: ${query}`, + `Session turns available: ${memory.turns.length}`, `Recommendation context (JSON):`, JSON.stringify(recommendationContext), `Context summary:`, fallbackAnswer, `Task: provide 2-3 policy-bounded options to improve diversification with concrete allocation targets or percentage ranges.`, `Output sections: Summary, Assumptions, Option 1 (new money first), Option 2 (sell and rebalance), Risk notes, Next questions (max 3).`, + `Each option must include concrete percentage ranges or target bands derived from the recommendation context.`, + `If constraints are missing, provide conditioned pathways for taxable vs tax-advantaged accounts.`, + `Use at least 120 words unless the user explicitly asked for concise responses.`, `Do not rely on a single hardcoded ETF unless the user explicitly requests a product. Ask for missing constraints when needed.`, getResponseInstruction({ userPreferences }) ].join('\n') @@ -468,6 +493,7 @@ export async function buildAnswer({ `User currency: ${userCurrency}`, `Language code: ${languageCode}`, `Query: ${query}`, + `Session turns available: ${memory.turns.length}`, `Context summary:`, fallbackAnswer, getResponseInstruction({ userPreferences }) @@ -500,7 +526,9 @@ export async function buildAnswer({ query }) ) { - return generatedAnswer; + if (!hasRecommendationIntent || isDetailedRecommendationAnswer(generatedAnswer)) { + return generatedAnswer; + } } } catch {} finally { @@ -511,7 +539,6 @@ export async function buildAnswer({ if (hasRecommendationIntent) { const recommendationFallback = buildRecommendationFallback({ - memory, portfolioAnalysis, riskAssessment }); diff --git a/apps/api/src/app/endpoints/ai/ai-agent.policy.utils.spec.ts b/apps/api/src/app/endpoints/ai/ai-agent.policy.utils.spec.ts new file mode 100644 index 000000000..10b946deb --- /dev/null +++ b/apps/api/src/app/endpoints/ai/ai-agent.policy.utils.spec.ts @@ -0,0 +1,272 @@ +import { AiAgentToolName } from './ai-agent.interfaces'; +import { + applyToolExecutionPolicy, + createPolicyRouteResponse, + formatPolicyVerificationDetails +} from './ai-agent.policy.utils'; + +describe('AiAgentPolicyUtils', () => { + it.each([ + 'hi', + 'hello', + 'hey', + 'thanks', + 'thank you', + 'good morning', + 'good afternoon', + 'good evening' + ])('routes greeting-like query "%s" to direct no-tool', (query) => { + const decision = applyToolExecutionPolicy({ + plannedTools: ['portfolio_analysis'], + query + }); + + expect(decision.route).toBe('direct'); + expect(decision.blockReason).toBe('no_tool_query'); + expect(decision.toolsToExecute).toEqual([]); + }); + + it.each([ + 'who are you', + 'what are you', + 'what can you do', + 'how do you work', + 'how can i use this', + 'help', + 'assist me', + 'what can you help with' + ])('routes assistant capability query "%s" to direct no-tool', (query) => { + const decision = applyToolExecutionPolicy({ + plannedTools: [], + query + }); + + expect(decision.route).toBe('direct'); + expect(decision.blockReason).toBe('no_tool_query'); + expect( + createPolicyRouteResponse({ policyDecision: decision, query }) + ).toContain( + 'Ghostfolio AI' + ); + }); + + it.each<[string, string]>([ + ['2+2', '2+2 = 4'], + ['what is 5 * 3', '5 * 3 = 15'], + ['(2+3)*4', '(2+3)*4 = 20'], + ['10 / 4', '10 / 4 = 2.5'], + ['7 - 10', '7 - 10 = -3'], + ['3.5 + 1.25', '3.5 + 1.25 = 4.75'], + ['(8 - 2) / 3', '(8 - 2) / 3 = 2'], + ['what is 3*(2+4)?', '3*(2+4) = 18'], + ['2 + (3 * (4 - 1))', '2 + (3 * (4 - 1)) = 11'], + ['10-3-2', '10-3-2 = 5'] + ])('returns arithmetic direct response for "%s"', (query, expected) => { + const decision = applyToolExecutionPolicy({ + plannedTools: [], + query + }); + + expect(decision.route).toBe('direct'); + expect( + createPolicyRouteResponse({ + policyDecision: decision, + query + }) + ).toBe(expected); + }); + + it.each(['1/0', '2+*2', '5 % 2'])( + 'falls back to capability response for unsupported arithmetic expression "%s"', + (query) => { + const decision = applyToolExecutionPolicy({ + plannedTools: [], + query + }); + + expect(decision.route).toBe('direct'); + expect( + createPolicyRouteResponse({ + policyDecision: decision, + query + }) + ).toContain('portfolio analysis'); + } + ); + + it('returns distinct direct no-tool responses for identity and capability prompts', () => { + const identityDecision = applyToolExecutionPolicy({ + plannedTools: [], + query: 'who are you?' + }); + const capabilityDecision = applyToolExecutionPolicy({ + plannedTools: [], + query: 'what can you do?' + }); + + const identityResponse = createPolicyRouteResponse({ + policyDecision: identityDecision, + query: 'who are you?' + }); + const capabilityResponse = createPolicyRouteResponse({ + policyDecision: capabilityDecision, + query: 'what can you do?' + }); + + expect(identityResponse).toContain('portfolio copilot'); + expect(capabilityResponse).toContain('three modes'); + expect(identityResponse).not.toBe(capabilityResponse); + }); + + it('routes finance read intent with empty planner output to clarify', () => { + const decision = applyToolExecutionPolicy({ + plannedTools: [], + query: 'Show portfolio risk and allocation' + }); + + expect(decision.route).toBe('clarify'); + expect(decision.blockReason).toBe('unknown'); + expect(createPolicyRouteResponse({ policyDecision: decision })).toContain( + 'Which one should I run next?' + ); + }); + + it('routes non-finance empty planner output to direct no-tool', () => { + const decision = applyToolExecutionPolicy({ + plannedTools: [], + query: 'Tell me a joke' + }); + + expect(decision.route).toBe('direct'); + expect(decision.blockReason).toBe('no_tool_query'); + }); + + it('deduplicates planned tools while preserving route decisions', () => { + const plannedTools: AiAgentToolName[] = [ + 'portfolio_analysis', + 'portfolio_analysis', + 'risk_assessment' + ]; + const decision = applyToolExecutionPolicy({ + plannedTools, + query: 'analyze concentration risk' + }); + + expect(decision.plannedTools).toEqual([ + 'portfolio_analysis', + 'risk_assessment' + ]); + expect(decision.toolsToExecute).toEqual([ + 'portfolio_analysis', + 'risk_assessment' + ]); + expect(decision.route).toBe('tools'); + }); + + it.each<{ + expectedTools: AiAgentToolName[]; + plannedTools: AiAgentToolName[]; + query: string; + reason: string; + route?: 'clarify' | 'direct' | 'tools'; + }>([ + { + expectedTools: ['portfolio_analysis', 'risk_assessment'] as AiAgentToolName[], + plannedTools: [ + 'portfolio_analysis', + 'risk_assessment', + 'rebalance_plan' + ] as AiAgentToolName[], + query: 'review portfolio concentration risk', + reason: 'read-only intent strips rebalance' + }, + { + expectedTools: [ + 'portfolio_analysis', + 'risk_assessment', + 'rebalance_plan' + ] as AiAgentToolName[], + plannedTools: [ + 'portfolio_analysis', + 'risk_assessment', + 'rebalance_plan' + ] as AiAgentToolName[], + query: 'invest 2000 and rebalance', + reason: 'action intent preserves rebalance' + }, + { + expectedTools: [ + 'portfolio_analysis', + 'risk_assessment', + 'rebalance_plan', + 'market_data_lookup' + ] as AiAgentToolName[], + plannedTools: [ + 'portfolio_analysis', + 'risk_assessment', + 'rebalance_plan', + 'market_data_lookup' + ] as AiAgentToolName[], + query: 'invest and rebalance after checking market quote for NVDA', + reason: 'action + market intent keeps all planned tools' + }, + { + expectedTools: ['stress_test'] as AiAgentToolName[], + plannedTools: ['stress_test'] as AiAgentToolName[], + query: 'run stress scenario read-only', + reason: 'read-only stress execution stays allowed' + } + ])( + 'applies policy gating: $reason', + ({ expectedTools, plannedTools, query, route }) => { + const decision = applyToolExecutionPolicy({ + plannedTools, + query + }); + + if (route) { + expect(decision.route).toBe(route); + } else { + expect(decision.route).toBe('tools'); + } + + expect(decision.toolsToExecute).toEqual(expectedTools); + } + ); + + it('marks rebalance-only no-action prompts as clarify with needs_confirmation', () => { + const decision = applyToolExecutionPolicy({ + plannedTools: ['rebalance_plan'], + query: 'review concentration profile' + }); + + expect(decision.route).toBe('clarify'); + expect(decision.blockReason).toBe('needs_confirmation'); + expect(decision.blockedByPolicy).toBe(true); + expect(decision.toolsToExecute).toEqual([]); + }); + + it('formats policy verification details with planned and executed tools', () => { + const decision = applyToolExecutionPolicy({ + plannedTools: [ + 'portfolio_analysis', + 'risk_assessment', + 'rebalance_plan' + ], + query: 'review concentration risk' + }); + const details = formatPolicyVerificationDetails({ + policyDecision: decision + }); + + expect(details).toContain('route=tools'); + expect(details).toContain('blocked_by_policy=true'); + expect(details).toContain('block_reason=needs_confirmation'); + expect(details).toContain( + 'planned_tools=portfolio_analysis, risk_assessment, rebalance_plan' + ); + expect(details).toContain( + 'executed_tools=portfolio_analysis, risk_assessment' + ); + }); +}); diff --git a/apps/api/src/app/endpoints/ai/ai-agent.policy.utils.ts b/apps/api/src/app/endpoints/ai/ai-agent.policy.utils.ts index 76e20ec9e..9364e0eb4 100644 --- a/apps/api/src/app/endpoints/ai/ai-agent.policy.utils.ts +++ b/apps/api/src/app/endpoints/ai/ai-agent.policy.utils.ts @@ -34,6 +34,11 @@ const SIMPLE_ASSISTANT_QUERY_PATTERNS = [ /^\s*(?:how do you work|how (?:can|do) i use (?:you|this))\s*[!.?]*\s*$/i, /^\s*(?:help|assist(?: me)?|what can you help with)\s*[!.?]*\s*$/i ]; +const DIRECT_IDENTITY_QUERY_PATTERN = /\b(?:who are you|what are you)\b/i; +const DIRECT_USAGE_QUERY_PATTERN = + /\b(?:how do you work|how (?:can|do) i use (?:you|this)|how should i ask)\b/i; +const DIRECT_CAPABILITY_QUERY_PATTERN = + /\b(?:what can (?:you|i) do|help|assist(?: me)?|what can you help with)\b/i; const READ_ONLY_TOOLS = new Set([ 'portfolio_analysis', 'risk_assessment', @@ -292,6 +297,51 @@ function evaluateSimpleArithmetic(query: string) { return `${expression} = ${formatNumericResult(result)}`; } +function createNoToolDirectResponse(query?: string) { + const normalizedQuery = query?.trim().toLowerCase() ?? ''; + + if (DIRECT_IDENTITY_QUERY_PATTERN.test(normalizedQuery)) { + return [ + 'I am Ghostfolio AI, your portfolio copilot for this account.', + 'I analyze concentration risk, summarize holdings, fetch market quotes, run stress scenarios, and compose diversification or rebalance options.', + 'Try one of these:', + '- "Give me a concentration risk summary"', + '- "Show the latest prices for my top holdings"', + '- "Help me diversify with 2-3 optioned plans"' + ].join('\n'); + } + + if (DIRECT_USAGE_QUERY_PATTERN.test(normalizedQuery)) { + return [ + 'I am Ghostfolio AI. Use short direct prompts and include your goal or constraint.', + 'Good pattern: objective + scope + constraint (for example, "reduce top holding below 35% with low tax impact").', + 'I can return analysis, recommendation options, stress scenarios, and market snapshots with citations.', + 'If key constraints are missing, I will ask up to 3 follow-up questions before giving trade-style steps.' + ].join('\n'); + } + + if (DIRECT_CAPABILITY_QUERY_PATTERN.test(normalizedQuery)) { + return [ + 'I am Ghostfolio AI. You can use me in three modes: diagnose, recommend, and verify.', + 'Diagnose: concentration risk, top exposures, and allocation summaries.', + 'Recommend: optioned diversification/rebalance plans with assumptions and next questions.', + 'Verify: live quote checks and stress-scenario impact estimates.', + 'Try next:', + '- "Analyze my concentration risk"', + '- "Help me diversify with new-money and sell/rotate options"', + '- "Run a 20% downside stress test"' + ].join('\n'); + } + + return [ + 'I am Ghostfolio AI. I can help with portfolio analysis, concentration risk, market prices, diversification options, and stress scenarios.', + 'Try one of these:', + '- "Show my top holdings"', + '- "What is my concentration risk?"', + '- "Help me diversify with actionable options"' + ].join('\n'); +} + export function applyToolExecutionPolicy({ plannedTools, query @@ -410,7 +460,7 @@ export function createPolicyRouteResponse({ return arithmeticResult; } - return `I am your Ghostfolio AI assistant. I can help with portfolio analysis, concentration risk, market prices, rebalancing ideas, and stress scenarios. Try: "Show my top holdings" or "What is my concentration risk?".`; + return createNoToolDirectResponse(query); } return `I can help with portfolio analysis, concentration risk, market prices, and stress scenarios. Ask a portfolio question when you are ready.`; diff --git a/apps/api/src/app/endpoints/ai/ai-agent.utils.spec.ts b/apps/api/src/app/endpoints/ai/ai-agent.utils.spec.ts index f41042b42..d90f201cb 100644 --- a/apps/api/src/app/endpoints/ai/ai-agent.utils.spec.ts +++ b/apps/api/src/app/endpoints/ai/ai-agent.utils.spec.ts @@ -78,8 +78,10 @@ describe('AiAgentUtils', () => { expect(decision.route).toBe('direct'); expect(decision.toolsToExecute).toEqual([]); expect(decision.blockReason).toBe('no_tool_query'); - expect(createPolicyRouteResponse({ policyDecision: decision })).toContain( - 'Ghostfolio AI assistant' + expect( + createPolicyRouteResponse({ policyDecision: decision, query: 'Who are you?' }) + ).toContain( + 'Ghostfolio AI' ); }); @@ -143,6 +145,14 @@ describe('AiAgentUtils', () => { ).toEqual(['portfolio_analysis', 'risk_assessment', 'rebalance_plan']); }); + it('selects recommendation tools for ambiguous action phrasing', () => { + expect( + determineToolPlan({ + query: 'What can I do?' + }) + ).toEqual(['portfolio_analysis', 'risk_assessment', 'rebalance_plan']); + }); + it('selects rebalance tool for rebalance-focused prompts', () => { expect( determineToolPlan({ @@ -328,4 +338,282 @@ describe('AiAgentUtils', () => { expect(qualityCheck.check).toBe('response_quality'); expect(qualityCheck.status).toBe('failed'); }); + + it.each([ + { + expected: ['AAPL', 'MSFT'], + query: 'Need AAPL plus MSFT update' + }, + { + expected: ['BRK.B', 'VTI'], + query: 'Quote BRK.B and VTI' + }, + { + expected: ['QQQ', 'SPY'], + query: 'Check $qqq against $spy' + }, + { + expected: ['AAPL'], + query: 'Price for AAPL and THE and WHAT' + }, + { + expected: [], + query: 'price for appl and tsla in lowercase without prefixes' + }, + { + expected: ['AMD', 'NVDA'], + query: 'Show AMD then $nvda' + }, + { + expected: ['BTCUSD'], + query: 'ticker BTCUSD now' + }, + { + expected: ['MSFT'], + query: 'Quote MSFT, msft, and $msft' + }, + { + expected: ['SHOP.TO'], + query: 'market for SHOP.TO' + }, + { + expected: [], + query: 'what can you do' + } + ])('extractSymbolsFromQuery handles edge case: $query', ({ expected, query }) => { + expect(extractSymbolsFromQuery(query)).toEqual(expected); + }); + + it.each([ + { + expectedTools: ['portfolio_analysis'], + query: 'portfolio overview' + }, + { + expectedTools: ['portfolio_analysis'], + query: 'holdings summary' + }, + { + expectedTools: ['portfolio_analysis', 'risk_assessment', 'rebalance_plan'], + query: 'allocation snapshot' + }, + { + expectedTools: ['portfolio_analysis'], + query: 'performance review' + }, + { + expectedTools: ['portfolio_analysis', 'risk_assessment'], + query: 'risk concentration report' + }, + { + expectedTools: ['portfolio_analysis', 'risk_assessment'], + query: 'diversification check' + }, + { + expectedTools: ['market_data_lookup'], + query: 'price for NVDA' + }, + { + expectedTools: ['market_data_lookup'], + query: 'ticker quote for AAPL' + }, + { + expectedTools: ['market_data_lookup'], + query: 'market context' + }, + { + expectedTools: ['portfolio_analysis', 'risk_assessment', 'rebalance_plan'], + query: 'where should I invest next' + }, + { + expectedTools: ['portfolio_analysis', 'risk_assessment', 'rebalance_plan'], + query: 'trim overweight positions' + }, + { + expectedTools: ['portfolio_analysis', 'risk_assessment', 'rebalance_plan'], + query: 'sell and rebalance' + }, + { + expectedTools: ['portfolio_analysis', 'risk_assessment', 'stress_test'], + query: 'run a crash stress test' + }, + { + expectedTools: ['portfolio_analysis', 'risk_assessment', 'stress_test'], + query: 'drawdown shock analysis' + }, + { + expectedTools: ['portfolio_analysis', 'risk_assessment', 'stress_test'], + query: 'stress scenario' + }, + { + expectedTools: ['portfolio_analysis', 'risk_assessment', 'rebalance_plan', 'market_data_lookup'], + query: 'rebalance portfolio and quote NVDA' + }, + { + expectedTools: ['portfolio_analysis', 'risk_assessment', 'market_data_lookup'], + query: 'analyze risk and market price' + }, + { + expectedTools: [], + query: 'who are you' + }, + { + expectedTools: [], + query: 'hello there' + }, + { + expectedTools: [], + query: 'help me with account settings' + } + ])( + 'determineToolPlan returns expected tools for "$query"', + ({ expectedTools, query }) => { + expect(determineToolPlan({ query })).toEqual(expectedTools); + } + ); + + it.each([ + { + expected: true, + query: 'How should I rebalance and reduce concentration risk?', + text: + 'Trim your top position by 4% and direct the next 1500 USD to two smaller holdings. Recheck concentration after each contribution.' + }, + { + expected: true, + query: 'What is my market price exposure?', + text: + 'AAPL is 210.12 USD and MSFT is 455.90 USD. Market exposure remains concentrated in your top position.' + }, + { + expected: false, + query: 'Should I buy more MSFT?', + text: + 'As an AI, I cannot provide financial advice and you should consult a financial advisor.' + }, + { + expected: false, + query: 'What are my risk metrics right now?', + text: + 'Risk seems elevated overall with concentration concerns but no specific values are available.' + }, + { + expected: false, + query: 'Where should I invest next?', + text: + 'Consider your long-term goals.' + }, + { + expected: true, + query: 'Where should I invest next?', + text: + 'Allocate 70% of new money to positions outside your top holding and 30% to broad-market exposure. This lowers concentration without forced selling.' + }, + { + expected: true, + query: 'Run stress drawdown estimate', + text: + 'Under a 20% shock, estimated drawdown is 3200 USD and projected value is 12800 USD. Reduce single-name concentration to improve downside stability.' + }, + { + expected: false, + query: 'Run stress drawdown estimate', + text: + 'Stress impact could be meaningful and diversification may help over time.' + }, + { + expected: false, + query: 'What is concentration risk now?', + text: 'Risk is high.' + }, + { + expected: true, + query: 'What is concentration risk now?', + text: + 'Top holding is 52.4% with HHI 0.331. Trim 2-4 percentage points from the top position or add to underweight holdings.' + } + ])( + 'isGeneratedAnswerReliable=$expected for quality gate case', + ({ expected, query, text }) => { + expect( + isGeneratedAnswerReliable({ + answer: text, + query + }) + ).toBe(expected); + } + ); + + it.each([ + { + expectedStatus: 'passed', + query: 'How should I rebalance risk?', + text: + 'Top holding is 48%. Trim 3% from the largest position and add to two underweight holdings. Re-evaluate concentration in one week.' + }, + { + expectedStatus: 'warning', + query: 'Show concentration and market price risk', + text: + 'Concentration is elevated and diversification would improve resilience over time.' + }, + { + expectedStatus: 'warning', + query: 'Where should I invest next?', + text: + 'You can diversify over time by considering additional positions that fit your risk profile and timeline.' + }, + { + expectedStatus: 'failed', + query: 'Where should I invest next?', + text: + 'As an AI, I cannot provide financial advice and you should consult a financial advisor.' + }, + { + expectedStatus: 'warning', + query: 'What is my drawdown risk right now?', + text: + 'Drawdown risk exists and depends on current concentration and market volatility.' + }, + { + expectedStatus: 'passed', + query: 'What is my drawdown risk right now?', + text: + 'At a 20% shock, projected drawdown is 2600 USD. Reduce your top position by 2-3 points to lower downside risk concentration.' + }, + { + expectedStatus: 'warning', + query: 'Show my market quote and risk', + text: + 'AAPL is high and risk is elevated.' + }, + { + expectedStatus: 'passed', + query: 'Show my market quote and risk', + text: + 'AAPL is 212.40 USD and top holding concentration is 46.2%. Rebalance by directing new cash into lower-weight holdings.' + }, + { + expectedStatus: 'warning', + query: 'Analyze performance and allocation', + text: + 'Performance and allocation are stable.' + }, + { + expectedStatus: 'passed', + query: 'Analyze performance and allocation', + text: + 'Portfolio return is 8.4% and top allocation is 41.0%. Add to underweight positions to keep concentration from rising.' + } + ])( + 'evaluateAnswerQuality returns $expectedStatus', + ({ expectedStatus, query, text }) => { + expect( + evaluateAnswerQuality({ + answer: text, + query + }).status + ).toBe(expectedStatus); + } + ); }); diff --git a/apps/api/src/app/endpoints/ai/ai.service.spec.ts b/apps/api/src/app/endpoints/ai/ai.service.spec.ts index 255c6a233..09f98abb6 100644 --- a/apps/api/src/app/endpoints/ai/ai.service.spec.ts +++ b/apps/api/src/app/endpoints/ai/ai.service.spec.ts @@ -255,7 +255,7 @@ describe('AiService', () => { userId: 'user-direct-route' }); - expect(result.answer).toContain('Ghostfolio AI assistant'); + expect(result.answer).toContain('I am Ghostfolio AI'); expect(result.toolCalls).toEqual([]); expect(result.citations).toEqual([]); expect(dataProviderService.getQuotes).not.toHaveBeenCalled(); @@ -291,6 +291,82 @@ describe('AiService', () => { expect(generateTextSpy).not.toHaveBeenCalled(); }); + it('routes ambiguous action follow-up query through recommendation tools when finance memory exists', async () => { + portfolioService.getDetails.mockResolvedValue({ + holdings: { + USD: { + allocationInPercentage: 0.665, + dataSource: DataSource.MANUAL, + symbol: 'USD', + valueInBaseCurrency: 6650 + }, + VTI: { + allocationInPercentage: 0.159, + dataSource: DataSource.YAHOO, + symbol: 'VTI', + valueInBaseCurrency: 1590 + }, + AAPL: { + allocationInPercentage: 0.085, + dataSource: DataSource.YAHOO, + symbol: 'AAPL', + valueInBaseCurrency: 850 + } + } + }); + redisCacheService.get.mockImplementation(async (key: string) => { + if (key.startsWith('ai-agent-memory-user-follow-up-')) { + return JSON.stringify({ + turns: [ + { + answer: + 'Risk concentration is high. Top holding allocation is 66.5%.', + query: 'help me diversify', + timestamp: '2026-02-24T12:00:00.000Z', + toolCalls: [ + { status: 'success', tool: 'portfolio_analysis' }, + { status: 'success', tool: 'risk_assessment' } + ] + } + ] + }); + } + + return undefined; + }); + jest.spyOn(subject, 'generateText').mockResolvedValue({ + text: 'Improve concentration by redirecting new cash to underweight holdings, trimming the top position in stages, and reassessing risk after each rebalance checkpoint.' + } as never); + + const result = await subject.chat({ + languageCode: 'en', + query: 'what can i do?', + sessionId: 'session-follow-up', + userCurrency: 'USD', + userId: 'user-follow-up' + }); + + expect(result.answer).toContain('Option 1 (new money first):'); + expect(result.answer).toContain('Option 2 (sell and rebalance):'); + expect(result.toolCalls).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + status: 'success', + tool: 'portfolio_analysis' + }), + expect.objectContaining({ + status: 'success', + tool: 'risk_assessment' + }) + ]) + ); + expect(subject.generateText).toHaveBeenCalledWith( + expect.objectContaining({ + prompt: expect.stringContaining('Recommendation context (JSON):') + }) + ); + }); + it('persists and recalls cross-session user preferences for the same user', async () => { const redisStore = new Map(); redisCacheService.get.mockImplementation(async (key: string) => { @@ -466,7 +542,6 @@ describe('AiService', () => { userId: 'user-diversify-1' }); - expect(result.answer).toContain('Next-step allocation:'); expect(result.answer).toContain('AAPL'); expect(result.answer).toContain('Option 1 (new money first):'); expect(result.answer).toContain('Option 2 (sell and rebalance):'); diff --git a/apps/api/src/app/endpoints/ai/evals/ai-quality-eval.spec.ts b/apps/api/src/app/endpoints/ai/evals/ai-quality-eval.spec.ts index f9df25123..d72d4857a 100644 --- a/apps/api/src/app/endpoints/ai/evals/ai-quality-eval.spec.ts +++ b/apps/api/src/app/endpoints/ai/evals/ai-quality-eval.spec.ts @@ -105,8 +105,9 @@ describe('AiReplyQualityEval', () => { userId: 'quality-user' }); - expect(response.answer).toContain('Next-step allocation:'); + expect(response.answer).toContain('Option 1 (new money first):'); expect(response.answer).toContain('Largest long allocations:'); + expect(response.answer).toContain('Risk notes:'); expect(response.answer).not.toContain('As an AI'); expect(response.verification).toEqual( expect.arrayContaining([ @@ -127,7 +128,7 @@ describe('AiReplyQualityEval', () => { const response = await subject.chat({ languageCode: 'en', - query: 'How should I rebalance and invest next month?', + query: 'Summarize my concentration risk and allocation status.', sessionId: 'quality-eval-generated', userCurrency: 'USD', userId: 'quality-user' diff --git a/apps/api/src/app/endpoints/ai/evals/dataset/edge-case.dataset.ts b/apps/api/src/app/endpoints/ai/evals/dataset/edge-case.dataset.ts index 88cebaf65..ec1f2d983 100644 --- a/apps/api/src/app/endpoints/ai/evals/dataset/edge-case.dataset.ts +++ b/apps/api/src/app/endpoints/ai/evals/dataset/edge-case.dataset.ts @@ -141,7 +141,7 @@ export const EDGE_CASE_EVAL_CASES: AiAgentMvpEvalCase[] = [ createEvalCase({ category: 'edge_case', expected: { - answerIncludes: ['Session memory applied from 2 prior turn(s).'], + answerIncludes: ['Largest long allocations:'], memoryTurnsAtLeast: 3, requiredTools: ['portfolio_analysis'] }, @@ -158,7 +158,7 @@ export const EDGE_CASE_EVAL_CASES: AiAgentMvpEvalCase[] = [ createEvalCase({ category: 'edge_case', expected: { - answerIncludes: ['Session memory applied from 1 prior turn(s).'], + answerIncludes: ['Largest long allocations:'], memoryTurnsAtLeast: 2, requiredTools: ['portfolio_analysis'] }, diff --git a/apps/api/src/app/endpoints/ai/evals/dataset/multi-step.dataset.ts b/apps/api/src/app/endpoints/ai/evals/dataset/multi-step.dataset.ts index dd0bde5fc..20f8a11a2 100644 --- a/apps/api/src/app/endpoints/ai/evals/dataset/multi-step.dataset.ts +++ b/apps/api/src/app/endpoints/ai/evals/dataset/multi-step.dataset.ts @@ -131,7 +131,7 @@ export const MULTI_STEP_EVAL_CASES: AiAgentMvpEvalCase[] = [ createEvalCase({ category: 'multi_step', expected: { - answerIncludes: ['Session memory applied from 1 prior turn(s).'], + answerIncludes: ['Summary: concentration is'], memoryTurnsAtLeast: 2, requiredTools: ['portfolio_analysis', 'risk_assessment', 'rebalance_plan'] }, diff --git a/apps/client/src/app/pages/portfolio/analysis/ai-chat-panel/ai-chat-panel.component.html b/apps/client/src/app/pages/portfolio/analysis/ai-chat-panel/ai-chat-panel.component.html index 4a1d45518..45faead43 100644 --- a/apps/client/src/app/pages/portfolio/analysis/ai-chat-panel/ai-chat-panel.component.html +++ b/apps/client/src/app/pages/portfolio/analysis/ai-chat-panel/ai-chat-panel.component.html @@ -77,12 +77,13 @@ aria-label="Show response details" class="chat-details-trigger ml-2" i18n-aria-label - mat-icon-button + mat-stroked-button type="button" [matMenuTriggerFor]="responseDetailsMenu" (click)="onOpenResponseDetails(message.response)" > - + + Info } diff --git a/apps/client/src/app/pages/portfolio/analysis/ai-chat-panel/ai-chat-panel.component.scss b/apps/client/src/app/pages/portfolio/analysis/ai-chat-panel/ai-chat-panel.component.scss index ff253e55d..791dce189 100644 --- a/apps/client/src/app/pages/portfolio/analysis/ai-chat-panel/ai-chat-panel.component.scss +++ b/apps/client/src/app/pages/portfolio/analysis/ai-chat-panel/ai-chat-panel.component.scss @@ -66,16 +66,20 @@ textarea::selection { } .chat-details-trigger { + align-items: center; color: var(--ai-chat-muted-text); - height: 1.625rem; + display: inline-flex; + gap: 0.2rem; + height: 1.75rem; line-height: 1; - width: 1.625rem; + min-width: 0; + padding: 0 0.4rem; } .chat-details-trigger mat-icon { - font-size: 1rem; - height: 1rem; - width: 1rem; + font-size: 0.95rem; + height: 0.95rem; + width: 0.95rem; } .prompt-list { diff --git a/apps/client/src/app/pages/portfolio/analysis/ai-chat-panel/ai-chat-panel.component.spec.ts b/apps/client/src/app/pages/portfolio/analysis/ai-chat-panel/ai-chat-panel.component.spec.ts index 98a626a72..1d58bbe24 100644 --- a/apps/client/src/app/pages/portfolio/analysis/ai-chat-panel/ai-chat-panel.component.spec.ts +++ b/apps/client/src/app/pages/portfolio/analysis/ai-chat-panel/ai-chat-panel.component.spec.ts @@ -181,6 +181,7 @@ describe('GfAiChatPanelComponent', () => { ) as HTMLButtonElement | null; expect(detailsTrigger).toBeTruthy(); + expect(detailsTrigger?.textContent).toContain('Info'); detailsTrigger?.click(); fixture.detectChanges(); diff --git a/tasks/lessons.md b/tasks/lessons.md index 53db3fc41..c706d38fa 100644 --- a/tasks/lessons.md +++ b/tasks/lessons.md @@ -36,6 +36,22 @@ Updated: 2026-02-24 Mistake: Treated npm publication as the only completion path for contribution evidence Rule: When package publication is blocked, ship the tool in-repo and open upstream PRs in high-signal repositories to preserve external contribution progress. -9. Context: Memory feature validation after chat/session persistence rollout +9. Context: AI chat UX feedback on response readability + Mistake: Diagnostic metadata remained inline with assistant answers, reducing answer clarity + Rule: Keep primary assistant messages user-facing only and place diagnostics (confidence, citations, verification, observability) behind an explicit info-triggered popover. + +10. Context: Memory feature validation after chat/session persistence rollout Mistake: Session-scoped memory shipped without an explicit user-scoped preference path for cross-session continuity. Rule: When memory requirements mention user preferences, implement and test both session memory and user-level memory keyed independently from session IDs. + +11. Context: Large table-driven Jest test expansion for policy routing and arithmetic behavior + Mistake: Mixed tuple/string/object datasets under a single typed `it.each` signature created preventable TypeScript compile failures. + Rule: Keep each table shape typed independently (`it.each<[...tuple]>()` for positional rows and object generics only for object rows). + +12. Context: Ambiguous user follow-up prompts in a finance assistant ("what can i do?") + Mistake: Capability-style routing captured actionable follow-up intent and bypassed tool-backed recommendation generation. + Rule: Treat ambiguous action follow-ups as recommendation intent when finance context exists, and lock this with deterministic service tests. + +13. Context: Recommendation replies looked short and repetitive even when tool context was available + Mistake: Reliability gating accepted generic recommendation prose that lacked option sections and actionable structure. + Rule: For recommendation-intent prompts, enforce sectioned output quality gates (Option 1/2 + assumptions/risk notes/next questions) and fall back to deterministic option plans when structure is missing. diff --git a/tasks/tasks.md b/tasks/tasks.md index 9b7db4e5a..0e47c1184 100644 --- a/tasks/tasks.md +++ b/tasks/tasks.md @@ -226,14 +226,62 @@ Last updated: 2026-02-24 ## Session Plan (2026-02-24, Chat Details Popover UX) -- [ ] Audit current AI chat response rendering and identify diagnostics shown inline. -- [ ] Move diagnostics (confidence, citations, verification, observability) behind an info-triggered popover per assistant message. -- [ ] Keep main assistant response focused on user-facing answer and retain feedback controls in primary view. -- [ ] Update chat panel tests to assert info-trigger behavior and diagnostics visibility expectations. -- [ ] Run focused frontend verification and update trackers (`Tasks.md`, `tasks/tasks.md`, `tasks/lessons.md`). +- [x] Audit current AI chat response rendering and identify diagnostics shown inline. +- [x] Move diagnostics (confidence, citations, verification, observability) behind an info-triggered popover per assistant message. +- [x] Keep main assistant response focused on user-facing answer and retain feedback controls in primary view. +- [x] Update chat panel tests to assert info-trigger behavior and diagnostics visibility expectations. +- [x] Run focused frontend verification and update trackers (`Tasks.md`, `tasks/tasks.md`, `tasks/lessons.md`). + +## Session Plan (2026-02-24, Diversification Reply Actionability Fix) + +- [x] Reproduce and inspect `help me diversify` routing and fallback behavior in AI service. +- [x] Add deterministic diversification-action fallback guidance when LLM output is unavailable or rejected. +- [x] Add targeted helper/service tests for diversification prompt behavior. +- [x] Run focused verification (`npm run test:ai`) and update tracker notes. + +## Session Plan (2026-02-24, Recommendation Composer Prompting) + +- [ ] Add action-intent recommendation mode detection in AI answer builder. +- [ ] Build and inject structured recommendation context (concentration + constraints placeholders) into LLM prompting. +- [ ] Strengthen recommendation instructions to produce option-based actionable plans with assumptions and follow-up questions. +- [ ] Add targeted tests for recommendation-mode prompt composition. +- [ ] Run focused verification (`npm run test:ai`) and update tracker notes. + +## Session Plan (2026-02-24, Chat Reply Detail + Hidden Diagnostics Follow-up) + +- [x] Remove session-memory status text from user-facing assistant answers. +- [x] Improve no-tool direct assistant replies so identity/help prompts produce distinct, more detailed responses. +- [x] Enforce recommendation-mode fallback structure when generated answers are too short or lack option sections. +- [x] Update chat diagnostics trigger to show `i` icon with visible `Info` label while keeping details hidden in popover. +- [x] Run focused verification on touched API/client suites and update tracker evidence. + +## Session Plan (2026-02-24, AI + Eval Coverage to 100+) + +- [x] Audit current AI test count and eval dataset size against 100+ targets. +- [x] Add deterministic AI unit tests for policy routing, arithmetic handling, planning heuristics, and response quality guards. +- [x] Expand eval datasets to at least 100 cases with balanced category coverage and edge/adversarial depth. +- [x] Raise eval runner guardrails to assert 100+ total dataset size and updated category minimums. +- [x] Run focused verification (`npm run test:ai`, `npm run test:mvp-eval`) and update trackers (`Tasks.md`, `tasks/tasks.md`, `tasks/lessons.md`). + +## Session Plan (2026-02-24, Recommendation Follow-Up Routing Fix) + +- [x] Reproduce and isolate why `what can i do?` returns capability fallback after a risk turn. +- [x] Route ambiguous action follow-up queries with recent finance memory through tools to unlock recommendation-mode generation. +- [x] Add deterministic AI service tests for follow-up recommendation behavior and guard against capability fallback regression. +- [x] Run focused verification (`npm run test:ai`) and update trackers (`Tasks.md`, `tasks/tasks.md`, `tasks/lessons.md`). ## Verification Notes +- Chat reply detail + hidden diagnostics follow-up verification (local, 2026-02-24): + - `npm run test:ai` (10/10 suites passed, 181/181 tests) + - `npm run test:mvp-eval` (1/1 suite passed, 2/2 tests) + - `npx dotenv-cli -e .env.example -- npx jest apps/client/src/app/pages/portfolio/analysis/ai-chat-panel/ai-chat-panel.component.spec.ts --config apps/client/jest.config.ts` (8/8 tests passed) + - `npx nx run api:lint` (passed with existing workspace warnings) + - `npx nx run client:lint` (passed with existing workspace warnings) +- Chat details popover UX verification (2026-02-24): + - `npx dotenv-cli -e .env.example -- npx jest apps/client/src/app/pages/portfolio/analysis/ai-chat-panel/ai-chat-panel.component.spec.ts --config apps/client/jest.config.ts` + - `npx nx run client:lint` + - `npx nx run client:build:development-en` - `nx run api:lint` completed successfully (existing workspace warnings only). - Full `nx test api` currently fails in pre-existing portfolio calculator suites unrelated to AI endpoint changes. - Focused MVP verification passed: @@ -312,3 +360,14 @@ Last updated: 2026-02-24 - Cross-session user preference memory verification (local, 2026-02-24): - `npm run test:ai` (9/9 suites passed, 54/54 tests) - `npx nx run api:lint` (passes with existing workspace warnings) +- Diversification reply actionability fix verification (local, 2026-02-24): + - `npm run test:ai` (9/9 suites passed, 56/56 tests) + - `npx nx run api:lint` (passes with existing workspace warnings) +- AI + eval coverage to 100+ verification (local, 2026-02-24): + - `npm run test:ai` (10/10 suites passed, 176/176 tests) + - `npm run test:mvp-eval` (1/1 suite passed; dataset and pass-rate gates enforced) + - `npx tsx -e "import { AI_AGENT_MVP_EVAL_DATASET } from './apps/api/src/app/endpoints/ai/evals/mvp-eval.dataset.ts'; ..."` (109 total eval cases; category counts: happy_path 43, edge_case 26, adversarial 20, multi_step 20) +- Recommendation follow-up routing fix verification (local, 2026-02-24): + - `npx dotenv-cli -e .env.example -- npx jest apps/api/src/app/endpoints/ai/ai.service.spec.ts apps/api/src/app/endpoints/ai/ai-agent.chat.helpers.spec.ts apps/api/src/app/endpoints/ai/ai-agent.utils.spec.ts apps/api/src/app/endpoints/ai/ai-agent.policy.utils.spec.ts --config apps/api/jest.config.ts` (4/4 suites passed, 167/167 tests) + - `npm run test:ai` (10/10 suites passed, 181/181 tests) + - `npm run test:mvp-eval` (1/1 suite passed)