You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

438 lines
15 KiB

/**
* K1 Comparison Test — SC-006 Quality Gate
*
* Purpose: Validates that the new normalized K1LineItem pipeline produces
* correct output by:
*
* Part A (Pipeline Verification):
* - Takes EXTRACTED import sessions
* - Runs verify → confirm through the DB layer (simulating the service)
* - Asserts K1LineItems match extraction fields exactly
*
* Part B (Key Coverage Verification):
* - Maps baseline descriptive keys to IRS box numbers
* - Asserts all 23 baseline data keys have a corresponding K1BoxDefinition
*
* Usage: node --experimental-strip-types test/import/k1-comparison.test.mts
*/
import { PrismaClient } from '@prisma/client';
import { readFileSync } from 'node:fs';
import { join, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
const __dirname = dirname(fileURLToPath(import.meta.url));
const prisma = new PrismaClient();
// ─── Descriptive key → IRS box number mapping ───────────────────────────────
// These are the 23 keys used in the seed data mapped to their standard IRS box numbers
const DESCRIPTIVE_KEY_TO_BOX: Record<string, string> = {
ordinaryIncome: '1',
netRentalIncome: '2',
otherRentalIncome: '3',
guaranteedPayments: '4',
interestIncome: '5',
dividends: '6a',
royalties: '7',
capitalGainLossShortTerm: '8',
capitalGainLossLongTerm: '9a',
unrecaptured1250Gain: '9b',
section1231GainLoss: '10',
otherIncome: '11',
section179Deduction: '12',
otherDeductions: '13',
selfEmploymentEarnings: '14',
foreignTaxesPaid: '16',
distributionsProperty: '19a',
otherAdjustments: '19b',
beginningTaxBasis: '20-L-begin',
endingTaxBasis: '20-L-end',
k1CapitalAccount: '20-L-cap',
endingGLBalance: '20-L-gl',
activityNotes: 'notes'
};
let passed = 0;
let failed = 0;
let skipped = 0;
function assert(condition: boolean, message: string): void {
if (condition) {
passed++;
console.log(` ✓ ${message}`);
} else {
failed++;
console.error(` ✗ FAIL: ${message}`);
}
}
function skip(message: string): void {
skipped++;
console.log(` ⊘ SKIP: ${message}`);
}
async function partAPipelineVerification(): Promise<void> {
console.log('\n══════════════════════════════════════════════════════════════');
console.log('Part A: Pipeline Verification — EXTRACTED → VERIFIED → CONFIRMED');
console.log('══════════════════════════════════════════════════════════════\n');
// Find EXTRACTED sessions with rawExtraction data
const sessions = await prisma.k1ImportSession.findMany({
where: { status: 'EXTRACTED' },
include: {
partnership: { select: { name: true } }
}
});
if (sessions.length === 0) {
skip('No EXTRACTED sessions found. Cannot verify pipeline.');
return;
}
console.log(`Found ${sessions.length} EXTRACTED session(s) to test.\n`);
for (const session of sessions) {
console.log(`─── Testing: ${session.partnership.name} ${session.taxYear} (${session.fileName}) ───`);
const raw = session.rawExtraction as any;
const fields = raw?.fields || [];
if (fields.length === 0) {
skip(`No fields in rawExtraction for session ${session.id}`);
continue;
}
// Step 1: Simulate verify — mark all fields as reviewed and store verified data
const verifiedFields = fields.map((f: any) => ({
...f,
isReviewed: true,
confidenceLevel: f.confidenceLevel || 'HIGH'
}));
await prisma.k1ImportSession.update({
where: { id: session.id },
data: {
status: 'VERIFIED',
rawExtraction: {
...raw,
verified: {
fields: verifiedFields,
unmappedItems: []
}
} as any
}
});
assert(true, `Session ${session.id.substring(0, 8)}... advanced to VERIFIED with ${verifiedFields.length} fields`);
// Step 2: Simulate confirm — create KDocument and K1LineItems
// Check for existing KDocument
const existingDoc = await prisma.kDocument.findUnique({
where: {
partnershipId_type_taxYear: {
partnershipId: session.partnershipId,
type: 'K1',
taxYear: session.taxYear
}
}
});
// Build K1LineItem data from verified fields (mirrors K1ImportService.confirm logic)
const lineItemMap = new Map<
string,
{
boxKey: string;
amount: number | null;
textValue: string | null;
rawText: string | null;
confidence: number | null;
sourcePage: number | null;
sourceCoords: any;
isUserEdited: boolean;
}
>();
const kDocData: Record<string, any> = {};
for (const field of verifiedFields) {
const boxKey = field.subtype
? `${field.boxNumber}-${field.subtype}`
: field.boxNumber;
// Auto-create box definition if missing
const existing = await prisma.k1BoxDefinition.findUnique({
where: { boxKey }
});
if (!existing) {
const maxSort = await prisma.k1BoxDefinition
.aggregate({ _max: { sortOrder: true } })
.then((r: any) => r._max.sortOrder ?? 999);
await prisma.k1BoxDefinition.create({
data: {
boxKey,
label: field.label || `Custom: ${boxKey}`,
section: null,
dataType: 'number',
sortOrder: maxSort + 1,
irsFormLine: null,
description: `Auto-created during comparison test for box key "${boxKey}"`,
isCustom: true
}
});
}
const isNumeric = field.numericValue !== undefined && field.numericValue !== null;
kDocData[boxKey] = field.numericValue ?? field.rawValue ?? null;
// Deduplicate by boxKey — take the entry with actual data
const newItem = {
boxKey,
amount: isNumeric ? field.numericValue : null,
textValue: !isNumeric ? String(field.rawValue ?? '') : null,
rawText: field.rawValue != null ? String(field.rawValue) : null,
confidence: field.confidence ?? null,
sourcePage: field.page ?? null,
sourceCoords: field.boundingBox ?? null,
isUserEdited: field.isReviewed === true
};
const existingItem = lineItemMap.get(boxKey);
if (existingItem) {
const existingHasValue =
existingItem.amount !== null ||
(existingItem.textValue && existingItem.textValue !== '' && existingItem.textValue !== '.');
const newHasValue =
newItem.amount !== null ||
(newItem.textValue && newItem.textValue !== '' && newItem.textValue !== '.');
if (newHasValue && !existingHasValue) {
lineItemMap.set(boxKey, newItem);
}
} else {
lineItemMap.set(boxKey, newItem);
}
}
const lineItemsToCreate = Array.from(lineItemMap.values());
// Create or update KDocument
let kDocument;
if (existingDoc) {
// Mark existing line items as superseded
await prisma.k1LineItem.updateMany({
where: { kDocumentId: existingDoc.id, isSuperseded: false },
data: { isSuperseded: true }
});
kDocument = await prisma.kDocument.update({
where: { id: existingDoc.id },
data: {
filingStatus: 'FINAL',
data: kDocData as any,
documentFileId: session.documentId
}
});
} else {
kDocument = await prisma.kDocument.create({
data: {
partnershipId: session.partnershipId,
type: 'K1',
taxYear: session.taxYear,
filingStatus: 'FINAL',
data: kDocData as any,
documentFileId: session.documentId
}
});
}
// Create K1LineItems
if (lineItemsToCreate.length > 0) {
await prisma.k1LineItem.createMany({
data: lineItemsToCreate.map((item) => ({
kDocumentId: kDocument.id,
boxKey: item.boxKey,
amount: item.amount,
textValue: item.textValue,
rawText: item.rawText,
confidence: item.confidence,
sourcePage: item.sourcePage,
sourceCoords: item.sourceCoords,
isUserEdited: item.isUserEdited,
isSuperseded: false
}))
});
}
// Update session to CONFIRMED
await prisma.k1ImportSession.update({
where: { id: session.id },
data: {
status: 'CONFIRMED',
kDocumentId: kDocument.id
}
});
assert(true, `KDocument ${kDocument.id.substring(0, 8)}... created/updated with ${lineItemsToCreate.length} fields (${verifiedFields.length} raw, ${verifiedFields.length - lineItemsToCreate.length} dupes merged)`);
// Step 3: Verify K1LineItems match deduplicated fields
const lineItems = await prisma.k1LineItem.findMany({
where: {
kDocumentId: kDocument.id,
isSuperseded: false
},
orderBy: { boxKey: 'asc' }
});
assert(
lineItems.length === lineItemsToCreate.length,
`K1LineItem count matches: ${lineItems.length} items (expected ${lineItemsToCreate.length})`
);
// Verify each deduplicated field has a corresponding K1LineItem
const resultLineItemMap = new Map(lineItems.map((li) => [li.boxKey, li]));
let fieldMismatches = 0;
for (const item of lineItemsToCreate) {
const li = resultLineItemMap.get(item.boxKey);
if (!li) {
console.error(` ✗ Missing K1LineItem for boxKey: ${item.boxKey}`);
fieldMismatches++;
continue;
}
// For numeric fields, compare amounts
if (item.amount !== null) {
const actualAmount = li.amount ? Number(li.amount) : null;
if (actualAmount !== item.amount) {
console.error(
` ✗ Amount mismatch for ${item.boxKey}: expected ${item.amount}, got ${actualAmount}`
);
fieldMismatches++;
}
} else if (item.textValue !== null) {
// Text/string comparison
if (li.textValue !== item.textValue) {
console.error(
` ✗ Text mismatch for ${item.boxKey}: expected "${item.textValue}", got "${li.textValue}"`
);
fieldMismatches++;
}
}
}
assert(
fieldMismatches === 0,
`All field values match K1LineItem data (${fieldMismatches} mismatches)`
);
// Verify no extra K1LineItems exist beyond what was extracted
const expectedKeys = new Set(lineItemsToCreate.map((item) => item.boxKey));
const extraLineItems = lineItems.filter((li) => !expectedKeys.has(li.boxKey));
assert(
extraLineItems.length === 0,
`No extra K1LineItems beyond extraction (${extraLineItems.length} extra)`
);
console.log('');
}
}
async function partBKeyCoverageVerification(): Promise<void> {
console.log('\n══════════════════════════════════════════════════════════════');
console.log('Part B: Key Coverage — Baseline descriptive keys → K1BoxDefinition');
console.log('══════════════════════════════════════════════════════════════\n');
// Load baseline
const baselinePath = join(__dirname, 'k1-comparison-baseline.json');
const baseline = JSON.parse(readFileSync(baselinePath, 'utf8'));
assert(
baseline.documents && baseline.documents.length > 0,
`Baseline loaded: ${baseline.documents.length} documents`
);
// Get all K1BoxDefinitions
const definitions = await prisma.k1BoxDefinition.findMany();
const defMap = new Map(definitions.map((d) => [d.boxKey, d]));
console.log(` K1BoxDefinition count: ${definitions.length}`);
// Verify each descriptive key maps to a valid box number
const allKeys = Object.keys(DESCRIPTIVE_KEY_TO_BOX);
let unmappedCount = 0;
for (const descriptiveKey of allKeys) {
const boxKey = DESCRIPTIVE_KEY_TO_BOX[descriptiveKey];
if (boxKey === 'notes') {
// activityNotes is metadata, not a box number
skip(`${descriptiveKey} → ${boxKey} (metadata, not an IRS box)`);
continue;
}
// Check for box definition, allowing for custom keys like 20-L-begin
const def = defMap.get(boxKey);
if (def) {
assert(true, `${descriptiveKey} → ${boxKey} (${def.label})`);
} else {
// Check if it's a section 20 custom key
if (boxKey.startsWith('20-L-')) {
skip(`${descriptiveKey} → ${boxKey} (Section L custom key, will be auto-created on import)`);
} else {
console.error(` ✗ No K1BoxDefinition for ${descriptiveKey} → ${boxKey}`);
unmappedCount++;
failed++;
}
}
}
assert(
unmappedCount === 0,
`All standard IRS box keys have K1BoxDefinition entries (${unmappedCount} missing)`
);
// Verify baseline document data values are numeric (as expected)
let nonNumericCount = 0;
for (const doc of baseline.documents) {
for (const [key, value] of Object.entries(doc.data)) {
if (key === 'activityNotes') continue; // Text field
if (value !== null && typeof value !== 'number') {
nonNumericCount++;
}
}
}
assert(
nonNumericCount === 0,
`All baseline numeric values are numbers (${nonNumericCount} non-numeric)`
);
}
async function main(): Promise<void> {
console.log('╔══════════════════════════════════════════════════════════════╗');
console.log('║ K1 Comparison Test — SC-006 Quality Gate ║');
console.log('╚══════════════════════════════════════════════════════════════╝');
try {
await partAPipelineVerification();
await partBKeyCoverageVerification();
} finally {
await prisma.$disconnect();
}
console.log('\n══════════════════════════════════════════════════════════════');
console.log(`Results: ${passed} passed, ${failed} failed, ${skipped} skipped`);
console.log('══════════════════════════════════════════════════════════════');
if (failed > 0) {
console.error('\n🔴 SC-006 GATE: FAILED — Do not commit.\n');
process.exit(1);
} else {
console.log('\n🟢 SC-006 GATE: PASSED — Safe to commit.\n');
process.exit(0);
}
}
main();