mirror of https://github.com/ghostfolio/ghostfolio
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
438 lines
15 KiB
438 lines
15 KiB
/**
|
|
* K1 Comparison Test — SC-006 Quality Gate
|
|
*
|
|
* Purpose: Validates that the new normalized K1LineItem pipeline produces
|
|
* correct output by:
|
|
*
|
|
* Part A (Pipeline Verification):
|
|
* - Takes EXTRACTED import sessions
|
|
* - Runs verify → confirm through the DB layer (simulating the service)
|
|
* - Asserts K1LineItems match extraction fields exactly
|
|
*
|
|
* Part B (Key Coverage Verification):
|
|
* - Maps baseline descriptive keys to IRS box numbers
|
|
* - Asserts all 23 baseline data keys have a corresponding K1BoxDefinition
|
|
*
|
|
* Usage: node --experimental-strip-types test/import/k1-comparison.test.mts
|
|
*/
|
|
|
|
import { PrismaClient } from '@prisma/client';
|
|
import { readFileSync } from 'node:fs';
|
|
import { join, dirname } from 'node:path';
|
|
import { fileURLToPath } from 'node:url';
|
|
|
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
|
|
const prisma = new PrismaClient();
|
|
|
|
// ─── Descriptive key → IRS box number mapping ───────────────────────────────
|
|
// These are the 23 keys used in the seed data mapped to their standard IRS box numbers
|
|
const DESCRIPTIVE_KEY_TO_BOX: Record<string, string> = {
|
|
ordinaryIncome: '1',
|
|
netRentalIncome: '2',
|
|
otherRentalIncome: '3',
|
|
guaranteedPayments: '4',
|
|
interestIncome: '5',
|
|
dividends: '6a',
|
|
royalties: '7',
|
|
capitalGainLossShortTerm: '8',
|
|
capitalGainLossLongTerm: '9a',
|
|
unrecaptured1250Gain: '9b',
|
|
section1231GainLoss: '10',
|
|
otherIncome: '11',
|
|
section179Deduction: '12',
|
|
otherDeductions: '13',
|
|
selfEmploymentEarnings: '14',
|
|
foreignTaxesPaid: '16',
|
|
distributionsProperty: '19a',
|
|
otherAdjustments: '19b',
|
|
beginningTaxBasis: '20-L-begin',
|
|
endingTaxBasis: '20-L-end',
|
|
k1CapitalAccount: '20-L-cap',
|
|
endingGLBalance: '20-L-gl',
|
|
activityNotes: 'notes'
|
|
};
|
|
|
|
let passed = 0;
|
|
let failed = 0;
|
|
let skipped = 0;
|
|
|
|
function assert(condition: boolean, message: string): void {
|
|
if (condition) {
|
|
passed++;
|
|
console.log(` ✓ ${message}`);
|
|
} else {
|
|
failed++;
|
|
console.error(` ✗ FAIL: ${message}`);
|
|
}
|
|
}
|
|
|
|
function skip(message: string): void {
|
|
skipped++;
|
|
console.log(` ⊘ SKIP: ${message}`);
|
|
}
|
|
|
|
async function partAPipelineVerification(): Promise<void> {
|
|
console.log('\n══════════════════════════════════════════════════════════════');
|
|
console.log('Part A: Pipeline Verification — EXTRACTED → VERIFIED → CONFIRMED');
|
|
console.log('══════════════════════════════════════════════════════════════\n');
|
|
|
|
// Find EXTRACTED sessions with rawExtraction data
|
|
const sessions = await prisma.k1ImportSession.findMany({
|
|
where: { status: 'EXTRACTED' },
|
|
include: {
|
|
partnership: { select: { name: true } }
|
|
}
|
|
});
|
|
|
|
if (sessions.length === 0) {
|
|
skip('No EXTRACTED sessions found. Cannot verify pipeline.');
|
|
return;
|
|
}
|
|
|
|
console.log(`Found ${sessions.length} EXTRACTED session(s) to test.\n`);
|
|
|
|
for (const session of sessions) {
|
|
console.log(`─── Testing: ${session.partnership.name} ${session.taxYear} (${session.fileName}) ───`);
|
|
const raw = session.rawExtraction as any;
|
|
const fields = raw?.fields || [];
|
|
|
|
if (fields.length === 0) {
|
|
skip(`No fields in rawExtraction for session ${session.id}`);
|
|
continue;
|
|
}
|
|
|
|
// Step 1: Simulate verify — mark all fields as reviewed and store verified data
|
|
const verifiedFields = fields.map((f: any) => ({
|
|
...f,
|
|
isReviewed: true,
|
|
confidenceLevel: f.confidenceLevel || 'HIGH'
|
|
}));
|
|
|
|
await prisma.k1ImportSession.update({
|
|
where: { id: session.id },
|
|
data: {
|
|
status: 'VERIFIED',
|
|
rawExtraction: {
|
|
...raw,
|
|
verified: {
|
|
fields: verifiedFields,
|
|
unmappedItems: []
|
|
}
|
|
} as any
|
|
}
|
|
});
|
|
|
|
assert(true, `Session ${session.id.substring(0, 8)}... advanced to VERIFIED with ${verifiedFields.length} fields`);
|
|
|
|
// Step 2: Simulate confirm — create KDocument and K1LineItems
|
|
// Check for existing KDocument
|
|
const existingDoc = await prisma.kDocument.findUnique({
|
|
where: {
|
|
partnershipId_type_taxYear: {
|
|
partnershipId: session.partnershipId,
|
|
type: 'K1',
|
|
taxYear: session.taxYear
|
|
}
|
|
}
|
|
});
|
|
|
|
// Build K1LineItem data from verified fields (mirrors K1ImportService.confirm logic)
|
|
const lineItemMap = new Map<
|
|
string,
|
|
{
|
|
boxKey: string;
|
|
amount: number | null;
|
|
textValue: string | null;
|
|
rawText: string | null;
|
|
confidence: number | null;
|
|
sourcePage: number | null;
|
|
sourceCoords: any;
|
|
isUserEdited: boolean;
|
|
}
|
|
>();
|
|
|
|
const kDocData: Record<string, any> = {};
|
|
|
|
for (const field of verifiedFields) {
|
|
const boxKey = field.subtype
|
|
? `${field.boxNumber}-${field.subtype}`
|
|
: field.boxNumber;
|
|
|
|
// Auto-create box definition if missing
|
|
const existing = await prisma.k1BoxDefinition.findUnique({
|
|
where: { boxKey }
|
|
});
|
|
if (!existing) {
|
|
const maxSort = await prisma.k1BoxDefinition
|
|
.aggregate({ _max: { sortOrder: true } })
|
|
.then((r: any) => r._max.sortOrder ?? 999);
|
|
|
|
await prisma.k1BoxDefinition.create({
|
|
data: {
|
|
boxKey,
|
|
label: field.label || `Custom: ${boxKey}`,
|
|
section: null,
|
|
dataType: 'number',
|
|
sortOrder: maxSort + 1,
|
|
irsFormLine: null,
|
|
description: `Auto-created during comparison test for box key "${boxKey}"`,
|
|
isCustom: true
|
|
}
|
|
});
|
|
}
|
|
|
|
const isNumeric = field.numericValue !== undefined && field.numericValue !== null;
|
|
kDocData[boxKey] = field.numericValue ?? field.rawValue ?? null;
|
|
|
|
// Deduplicate by boxKey — take the entry with actual data
|
|
const newItem = {
|
|
boxKey,
|
|
amount: isNumeric ? field.numericValue : null,
|
|
textValue: !isNumeric ? String(field.rawValue ?? '') : null,
|
|
rawText: field.rawValue != null ? String(field.rawValue) : null,
|
|
confidence: field.confidence ?? null,
|
|
sourcePage: field.page ?? null,
|
|
sourceCoords: field.boundingBox ?? null,
|
|
isUserEdited: field.isReviewed === true
|
|
};
|
|
|
|
const existingItem = lineItemMap.get(boxKey);
|
|
if (existingItem) {
|
|
const existingHasValue =
|
|
existingItem.amount !== null ||
|
|
(existingItem.textValue && existingItem.textValue !== '' && existingItem.textValue !== '.');
|
|
const newHasValue =
|
|
newItem.amount !== null ||
|
|
(newItem.textValue && newItem.textValue !== '' && newItem.textValue !== '.');
|
|
|
|
if (newHasValue && !existingHasValue) {
|
|
lineItemMap.set(boxKey, newItem);
|
|
}
|
|
} else {
|
|
lineItemMap.set(boxKey, newItem);
|
|
}
|
|
}
|
|
|
|
const lineItemsToCreate = Array.from(lineItemMap.values());
|
|
|
|
// Create or update KDocument
|
|
let kDocument;
|
|
if (existingDoc) {
|
|
// Mark existing line items as superseded
|
|
await prisma.k1LineItem.updateMany({
|
|
where: { kDocumentId: existingDoc.id, isSuperseded: false },
|
|
data: { isSuperseded: true }
|
|
});
|
|
|
|
kDocument = await prisma.kDocument.update({
|
|
where: { id: existingDoc.id },
|
|
data: {
|
|
filingStatus: 'FINAL',
|
|
data: kDocData as any,
|
|
documentFileId: session.documentId
|
|
}
|
|
});
|
|
} else {
|
|
kDocument = await prisma.kDocument.create({
|
|
data: {
|
|
partnershipId: session.partnershipId,
|
|
type: 'K1',
|
|
taxYear: session.taxYear,
|
|
filingStatus: 'FINAL',
|
|
data: kDocData as any,
|
|
documentFileId: session.documentId
|
|
}
|
|
});
|
|
}
|
|
|
|
// Create K1LineItems
|
|
if (lineItemsToCreate.length > 0) {
|
|
await prisma.k1LineItem.createMany({
|
|
data: lineItemsToCreate.map((item) => ({
|
|
kDocumentId: kDocument.id,
|
|
boxKey: item.boxKey,
|
|
amount: item.amount,
|
|
textValue: item.textValue,
|
|
rawText: item.rawText,
|
|
confidence: item.confidence,
|
|
sourcePage: item.sourcePage,
|
|
sourceCoords: item.sourceCoords,
|
|
isUserEdited: item.isUserEdited,
|
|
isSuperseded: false
|
|
}))
|
|
});
|
|
}
|
|
|
|
// Update session to CONFIRMED
|
|
await prisma.k1ImportSession.update({
|
|
where: { id: session.id },
|
|
data: {
|
|
status: 'CONFIRMED',
|
|
kDocumentId: kDocument.id
|
|
}
|
|
});
|
|
|
|
assert(true, `KDocument ${kDocument.id.substring(0, 8)}... created/updated with ${lineItemsToCreate.length} fields (${verifiedFields.length} raw, ${verifiedFields.length - lineItemsToCreate.length} dupes merged)`);
|
|
|
|
// Step 3: Verify K1LineItems match deduplicated fields
|
|
const lineItems = await prisma.k1LineItem.findMany({
|
|
where: {
|
|
kDocumentId: kDocument.id,
|
|
isSuperseded: false
|
|
},
|
|
orderBy: { boxKey: 'asc' }
|
|
});
|
|
|
|
assert(
|
|
lineItems.length === lineItemsToCreate.length,
|
|
`K1LineItem count matches: ${lineItems.length} items (expected ${lineItemsToCreate.length})`
|
|
);
|
|
|
|
// Verify each deduplicated field has a corresponding K1LineItem
|
|
const resultLineItemMap = new Map(lineItems.map((li) => [li.boxKey, li]));
|
|
let fieldMismatches = 0;
|
|
|
|
for (const item of lineItemsToCreate) {
|
|
const li = resultLineItemMap.get(item.boxKey);
|
|
|
|
if (!li) {
|
|
console.error(` ✗ Missing K1LineItem for boxKey: ${item.boxKey}`);
|
|
fieldMismatches++;
|
|
continue;
|
|
}
|
|
|
|
// For numeric fields, compare amounts
|
|
if (item.amount !== null) {
|
|
const actualAmount = li.amount ? Number(li.amount) : null;
|
|
if (actualAmount !== item.amount) {
|
|
console.error(
|
|
` ✗ Amount mismatch for ${item.boxKey}: expected ${item.amount}, got ${actualAmount}`
|
|
);
|
|
fieldMismatches++;
|
|
}
|
|
} else if (item.textValue !== null) {
|
|
// Text/string comparison
|
|
if (li.textValue !== item.textValue) {
|
|
console.error(
|
|
` ✗ Text mismatch for ${item.boxKey}: expected "${item.textValue}", got "${li.textValue}"`
|
|
);
|
|
fieldMismatches++;
|
|
}
|
|
}
|
|
}
|
|
|
|
assert(
|
|
fieldMismatches === 0,
|
|
`All field values match K1LineItem data (${fieldMismatches} mismatches)`
|
|
);
|
|
|
|
// Verify no extra K1LineItems exist beyond what was extracted
|
|
const expectedKeys = new Set(lineItemsToCreate.map((item) => item.boxKey));
|
|
const extraLineItems = lineItems.filter((li) => !expectedKeys.has(li.boxKey));
|
|
assert(
|
|
extraLineItems.length === 0,
|
|
`No extra K1LineItems beyond extraction (${extraLineItems.length} extra)`
|
|
);
|
|
|
|
console.log('');
|
|
}
|
|
}
|
|
|
|
async function partBKeyCoverageVerification(): Promise<void> {
|
|
console.log('\n══════════════════════════════════════════════════════════════');
|
|
console.log('Part B: Key Coverage — Baseline descriptive keys → K1BoxDefinition');
|
|
console.log('══════════════════════════════════════════════════════════════\n');
|
|
|
|
// Load baseline
|
|
const baselinePath = join(__dirname, 'k1-comparison-baseline.json');
|
|
const baseline = JSON.parse(readFileSync(baselinePath, 'utf8'));
|
|
|
|
assert(
|
|
baseline.documents && baseline.documents.length > 0,
|
|
`Baseline loaded: ${baseline.documents.length} documents`
|
|
);
|
|
|
|
// Get all K1BoxDefinitions
|
|
const definitions = await prisma.k1BoxDefinition.findMany();
|
|
const defMap = new Map(definitions.map((d) => [d.boxKey, d]));
|
|
|
|
console.log(` K1BoxDefinition count: ${definitions.length}`);
|
|
|
|
// Verify each descriptive key maps to a valid box number
|
|
const allKeys = Object.keys(DESCRIPTIVE_KEY_TO_BOX);
|
|
let unmappedCount = 0;
|
|
|
|
for (const descriptiveKey of allKeys) {
|
|
const boxKey = DESCRIPTIVE_KEY_TO_BOX[descriptiveKey];
|
|
|
|
if (boxKey === 'notes') {
|
|
// activityNotes is metadata, not a box number
|
|
skip(`${descriptiveKey} → ${boxKey} (metadata, not an IRS box)`);
|
|
continue;
|
|
}
|
|
|
|
// Check for box definition, allowing for custom keys like 20-L-begin
|
|
const def = defMap.get(boxKey);
|
|
if (def) {
|
|
assert(true, `${descriptiveKey} → ${boxKey} (${def.label})`);
|
|
} else {
|
|
// Check if it's a section 20 custom key
|
|
if (boxKey.startsWith('20-L-')) {
|
|
skip(`${descriptiveKey} → ${boxKey} (Section L custom key, will be auto-created on import)`);
|
|
} else {
|
|
console.error(` ✗ No K1BoxDefinition for ${descriptiveKey} → ${boxKey}`);
|
|
unmappedCount++;
|
|
failed++;
|
|
}
|
|
}
|
|
}
|
|
|
|
assert(
|
|
unmappedCount === 0,
|
|
`All standard IRS box keys have K1BoxDefinition entries (${unmappedCount} missing)`
|
|
);
|
|
|
|
// Verify baseline document data values are numeric (as expected)
|
|
let nonNumericCount = 0;
|
|
for (const doc of baseline.documents) {
|
|
for (const [key, value] of Object.entries(doc.data)) {
|
|
if (key === 'activityNotes') continue; // Text field
|
|
if (value !== null && typeof value !== 'number') {
|
|
nonNumericCount++;
|
|
}
|
|
}
|
|
}
|
|
|
|
assert(
|
|
nonNumericCount === 0,
|
|
`All baseline numeric values are numbers (${nonNumericCount} non-numeric)`
|
|
);
|
|
}
|
|
|
|
async function main(): Promise<void> {
|
|
console.log('╔══════════════════════════════════════════════════════════════╗');
|
|
console.log('║ K1 Comparison Test — SC-006 Quality Gate ║');
|
|
console.log('╚══════════════════════════════════════════════════════════════╝');
|
|
|
|
try {
|
|
await partAPipelineVerification();
|
|
await partBKeyCoverageVerification();
|
|
} finally {
|
|
await prisma.$disconnect();
|
|
}
|
|
|
|
console.log('\n══════════════════════════════════════════════════════════════');
|
|
console.log(`Results: ${passed} passed, ${failed} failed, ${skipped} skipped`);
|
|
console.log('══════════════════════════════════════════════════════════════');
|
|
|
|
if (failed > 0) {
|
|
console.error('\n🔴 SC-006 GATE: FAILED — Do not commit.\n');
|
|
process.exit(1);
|
|
} else {
|
|
console.log('\n🟢 SC-006 GATE: PASSED — Safe to commit.\n');
|
|
process.exit(0);
|
|
}
|
|
}
|
|
|
|
main();
|
|
|