diff --git a/apps/api/src/app/cell-mapping/cell-mapping.controller.ts b/apps/api/src/app/cell-mapping/cell-mapping.controller.ts index f35205e47..ca0ad2f37 100644 --- a/apps/api/src/app/cell-mapping/cell-mapping.controller.ts +++ b/apps/api/src/app/cell-mapping/cell-mapping.controller.ts @@ -6,6 +6,7 @@ import { Controller, Delete, Get, + Patch, Put, Query, UseGuards @@ -48,6 +49,7 @@ export class CellMappingController { boxNumber: string; label: string; description?: string; + cellType?: string; isCustom: boolean; }>; } @@ -71,6 +73,22 @@ export class CellMappingController { return this.cellMappingService.resetMappings(partnershipId); } + /** + * PATCH /api/v1/cell-mapping/toggle-ignored + * Toggle the isIgnored flag for a specific cell mapping. + */ + @HasPermission(permissions.updateKDocument) + @Patch('toggle-ignored') + @UseGuards(AuthGuard('jwt'), HasPermissionGuard) + public async toggleIgnored( + @Body() data: { partnershipId: string; boxNumber: string } + ) { + return this.cellMappingService.toggleIgnored( + data.partnershipId, + data.boxNumber + ); + } + /** * GET /api/v1/cell-mapping/aggregation-rules * Get aggregation rules for a partnership. diff --git a/apps/api/src/app/cell-mapping/cell-mapping.service.ts b/apps/api/src/app/cell-mapping/cell-mapping.service.ts index 3a08451e3..791d8517a 100644 --- a/apps/api/src/app/cell-mapping/cell-mapping.service.ts +++ b/apps/api/src/app/cell-mapping/cell-mapping.service.ts @@ -3,41 +3,113 @@ import { PrismaService } from '@ghostfolio/api/services/prisma/prisma.service'; import { HttpException, Injectable, OnModuleInit } from '@nestjs/common'; import { StatusCodes } from 'http-status-codes'; +/** Allowed cell types */ +type CellType = 'number' | 'string' | 'percentage' | 'boolean'; + /** Default IRS K-1 (Form 1065) cell mappings */ const IRS_DEFAULT_MAPPINGS: Array<{ boxNumber: string; label: string; description: string; + cellType: CellType; sortOrder: number; }> = [ - { boxNumber: '1', label: 'Ordinary business income (loss)', description: 'IRS Schedule K-1 Box 1', sortOrder: 1 }, - { boxNumber: '2', label: 'Net rental real estate income (loss)', description: 'IRS Schedule K-1 Box 2', sortOrder: 2 }, - { boxNumber: '3', label: 'Other net rental income (loss)', description: 'IRS Schedule K-1 Box 3', sortOrder: 3 }, - { boxNumber: '4', label: 'Guaranteed payments for services', description: 'IRS Schedule K-1 Box 4', sortOrder: 4 }, - { boxNumber: '4a', label: 'Guaranteed payments for capital', description: 'IRS Schedule K-1 Box 4a', sortOrder: 5 }, - { boxNumber: '4b', label: 'Total guaranteed payments', description: 'IRS Schedule K-1 Box 4b', sortOrder: 6 }, - { boxNumber: '5', label: 'Interest income', description: 'IRS Schedule K-1 Box 5', sortOrder: 7 }, - { boxNumber: '6a', label: 'Ordinary dividends', description: 'IRS Schedule K-1 Box 6a', sortOrder: 8 }, - { boxNumber: '6b', label: 'Qualified dividends', description: 'IRS Schedule K-1 Box 6b', sortOrder: 9 }, - { boxNumber: '6c', label: 'Dividend equivalents', description: 'IRS Schedule K-1 Box 6c', sortOrder: 10 }, - { boxNumber: '7', label: 'Royalties', description: 'IRS Schedule K-1 Box 7', sortOrder: 11 }, - { boxNumber: '8', label: 'Net short-term capital gain (loss)', description: 'IRS Schedule K-1 Box 8', sortOrder: 12 }, - { boxNumber: '9a', label: 'Net long-term capital gain (loss)', description: 'IRS Schedule K-1 Box 9a', sortOrder: 13 }, - { boxNumber: '9b', label: 'Collectibles (28%) gain (loss)', description: 'IRS Schedule K-1 Box 9b', sortOrder: 14 }, - { boxNumber: '9c', label: 'Unrecaptured section 1250 gain', description: 'IRS Schedule K-1 Box 9c', sortOrder: 15 }, - { boxNumber: '10', label: 'Net section 1231 gain (loss)', description: 'IRS Schedule K-1 Box 10', sortOrder: 16 }, - { boxNumber: '11', label: 'Other income (loss)', description: 'IRS Schedule K-1 Box 11', sortOrder: 17 }, - { boxNumber: '12', label: 'Section 179 deduction', description: 'IRS Schedule K-1 Box 12', sortOrder: 18 }, - { boxNumber: '13', label: 'Other deductions', description: 'IRS Schedule K-1 Box 13', sortOrder: 19 }, - { boxNumber: '14', label: 'Self-employment earnings (loss)', description: 'IRS Schedule K-1 Box 14', sortOrder: 20 }, - { boxNumber: '15', label: 'Credits', description: 'IRS Schedule K-1 Box 15', sortOrder: 21 }, - { boxNumber: '16', label: 'Foreign transactions', description: 'IRS Schedule K-1 Box 16', sortOrder: 22 }, - { boxNumber: '17', label: 'Alternative minimum tax (AMT) items', description: 'IRS Schedule K-1 Box 17', sortOrder: 23 }, - { boxNumber: '18', label: 'Tax-exempt income and nondeductible expenses', description: 'IRS Schedule K-1 Box 18', sortOrder: 24 }, - { boxNumber: '19a', label: 'Distributions — Cash and marketable securities', description: 'IRS Schedule K-1 Box 19a', sortOrder: 25 }, - { boxNumber: '19b', label: 'Distributions — Other property', description: 'IRS Schedule K-1 Box 19b', sortOrder: 26 }, - { boxNumber: '20', label: 'Other information', description: 'IRS Schedule K-1 Box 20', sortOrder: 27 }, - { boxNumber: '21', label: 'Foreign taxes paid or accrued', description: 'IRS Schedule K-1 Box 21', sortOrder: 28 } + // ── Header / Metadata ────────────────────────────────────────────────── + { boxNumber: 'K1_DOCUMENT_ID', label: 'K-1 Document ID', description: 'Large-font ID at top right of K-1 form', cellType: 'string', sortOrder: 0 }, + { boxNumber: 'TAX_YEAR', label: 'Tax Year', description: 'Calendar year or tax year beginning/ending', cellType: 'string', sortOrder: 1 }, + { boxNumber: 'FINAL_K1', label: 'Final K-1', description: 'Check if this is a final K-1', cellType: 'boolean', sortOrder: 2 }, + { boxNumber: 'AMENDED_K1', label: 'Amended K-1', description: 'Check if this is an amended K-1', cellType: 'boolean', sortOrder: 3 }, + + // ── Part I — Information About the Partnership ───────────────────────── + { boxNumber: 'A', label: "Partnership's EIN", description: 'Part I, Line A — Employer identification number', cellType: 'string', sortOrder: 10 }, + { boxNumber: 'B', label: "Partnership's name, address, city, state, ZIP", description: 'Part I, Line B', cellType: 'string', sortOrder: 11 }, + { boxNumber: 'C', label: 'IRS center where partnership filed return', description: 'Part I, Line C', cellType: 'string', sortOrder: 12 }, + { boxNumber: 'D', label: 'Publicly traded partnership (PTP)', description: 'Part I, Line D — Check if PTP', cellType: 'boolean', sortOrder: 13 }, + + // ── Part II — Information About the Partner ──────────────────────────── + { boxNumber: 'E', label: "Partner's identifying number", description: 'Part II, Line E — SSN or TIN', cellType: 'string', sortOrder: 20 }, + { boxNumber: 'F', label: "Partner's name, address, city, state, ZIP", description: 'Part II, Line F', cellType: 'string', sortOrder: 21 }, + { boxNumber: 'G_GENERAL', label: 'General partner or LLC member-manager', description: 'Part II, Line G — General partner checkbox', cellType: 'boolean', sortOrder: 22 }, + { boxNumber: 'G_LIMITED', label: 'Limited partner or other LLC member', description: 'Part II, Line G — Limited partner checkbox', cellType: 'boolean', sortOrder: 23 }, + { boxNumber: 'H1_DOMESTIC', label: 'Domestic partner', description: 'Part II, Line H1 — Domestic', cellType: 'boolean', sortOrder: 24 }, + { boxNumber: 'H1_FOREIGN', label: 'Foreign partner', description: 'Part II, Line H1 — Foreign', cellType: 'boolean', sortOrder: 25 }, + { boxNumber: 'H2', label: 'Disregarded entity (DE)', description: 'Part II, Line H2 — DE checkbox', cellType: 'boolean', sortOrder: 26 }, + { boxNumber: 'H2_TIN', label: 'Disregarded entity TIN', description: 'Part II, Line H2 — DE taxpayer ID', cellType: 'string', sortOrder: 27 }, + { boxNumber: 'I1', label: 'Type of entity', description: 'Part II, Line I1 — Entity type of partner', cellType: 'string', sortOrder: 28 }, + { boxNumber: 'I2', label: 'Retirement plan (IRA/SEP/Keogh)', description: 'Part II, Line I2 — Retirement plan checkbox', cellType: 'boolean', sortOrder: 29 }, + + // ── Section J — Partner's Share of Profit, Loss, and Capital ─────────── + { boxNumber: 'J_PROFIT_BEGIN', label: 'Profit — Beginning %', description: 'Section J — Profit share beginning of year', cellType: 'percentage', sortOrder: 30 }, + { boxNumber: 'J_PROFIT_END', label: 'Profit — Ending %', description: 'Section J — Profit share end of year', cellType: 'percentage', sortOrder: 31 }, + { boxNumber: 'J_LOSS_BEGIN', label: 'Loss — Beginning %', description: 'Section J — Loss share beginning of year', cellType: 'percentage', sortOrder: 32 }, + { boxNumber: 'J_LOSS_END', label: 'Loss — Ending %', description: 'Section J — Loss share end of year', cellType: 'percentage', sortOrder: 33 }, + { boxNumber: 'J_CAPITAL_BEGIN', label: 'Capital — Beginning %', description: 'Section J — Capital share beginning of year', cellType: 'percentage', sortOrder: 34 }, + { boxNumber: 'J_CAPITAL_END', label: 'Capital — Ending %', description: 'Section J — Capital share end of year', cellType: 'percentage', sortOrder: 35 }, + { boxNumber: 'J_SALE', label: 'Decrease due to sale', description: 'Section J — Check if decrease is due to sale', cellType: 'boolean', sortOrder: 36 }, + { boxNumber: 'J_EXCHANGE', label: 'Exchange of partnership interest', description: 'Section J — Check if exchange', cellType: 'boolean', sortOrder: 37 }, + + // ── Section K — Partner's Share of Liabilities ───────────────────────── + { boxNumber: 'K_NONRECOURSE_BEGIN', label: 'Nonrecourse — Beginning', description: 'Section K — Nonrecourse liabilities beginning', cellType: 'number', sortOrder: 40 }, + { boxNumber: 'K_NONRECOURSE_END', label: 'Nonrecourse — Ending', description: 'Section K — Nonrecourse liabilities ending', cellType: 'number', sortOrder: 41 }, + { boxNumber: 'K_QUAL_NONRECOURSE_BEGIN', label: 'Qualified nonrecourse — Beginning', description: 'Section K — Qualified nonrecourse financing beginning', cellType: 'number', sortOrder: 42 }, + { boxNumber: 'K_QUAL_NONRECOURSE_END', label: 'Qualified nonrecourse — Ending', description: 'Section K — Qualified nonrecourse financing ending', cellType: 'number', sortOrder: 43 }, + { boxNumber: 'K_RECOURSE_BEGIN', label: 'Recourse — Beginning', description: 'Section K — Recourse liabilities beginning', cellType: 'number', sortOrder: 44 }, + { boxNumber: 'K_RECOURSE_END', label: 'Recourse — Ending', description: 'Section K — Recourse liabilities ending', cellType: 'number', sortOrder: 45 }, + { boxNumber: 'K2', label: 'Includes lower-tier partnership liabilities', description: 'Section K2 — Checkbox', cellType: 'boolean', sortOrder: 46 }, + { boxNumber: 'K3', label: 'Liability subject to guarantees', description: 'Section K3 — Checkbox', cellType: 'boolean', sortOrder: 47 }, + + // ── Section L — Partner's Capital Account Analysis ───────────────────── + { boxNumber: 'L_BEG_CAPITAL', label: 'Beginning capital account', description: 'Section L — Beginning capital', cellType: 'number', sortOrder: 50 }, + { boxNumber: 'L_CONTRIBUTED', label: 'Capital contributed during year', description: 'Section L — Capital contributed', cellType: 'number', sortOrder: 51 }, + { boxNumber: 'L_CURR_YR_INCOME', label: 'Current year net income (loss)', description: 'Section L — Current year income/loss', cellType: 'number', sortOrder: 52 }, + { boxNumber: 'L_OTHER', label: 'Other increase (decrease)', description: 'Section L — Other adjustments', cellType: 'number', sortOrder: 53 }, + { boxNumber: 'L_WITHDRAWALS', label: 'Withdrawals and distributions', description: 'Section L — Withdrawals/distributions', cellType: 'number', sortOrder: 54 }, + { boxNumber: 'L_END_CAPITAL', label: 'Ending capital account', description: 'Section L — Ending capital', cellType: 'number', sortOrder: 55 }, + + // ── Section M — Contributed Property ─────────────────────────────────── + { boxNumber: 'M_YES', label: 'Contributed property with built-in gain/loss — Yes', description: 'Section M — Yes checkbox', cellType: 'boolean', sortOrder: 60 }, + { boxNumber: 'M_NO', label: 'Contributed property with built-in gain/loss — No', description: 'Section M — No checkbox', cellType: 'boolean', sortOrder: 61 }, + + // ── Section N — Net Unrecognized Section 704(c) ──────────────────────── + { boxNumber: 'N_BEGINNING', label: 'Net 704(c) gain/loss — Beginning', description: 'Section N — Beginning balance', cellType: 'number', sortOrder: 62 }, + { boxNumber: 'N_ENDING', label: 'Net 704(c) gain/loss — Ending', description: 'Section N — Ending balance', cellType: 'number', sortOrder: 63 }, + + // ── Part III — Partner's Share of Current Year Income, Deductions, etc. ─ + { boxNumber: '1', label: 'Ordinary business income (loss)', description: 'IRS Schedule K-1 Box 1', cellType: 'number', sortOrder: 100 }, + { boxNumber: '2', label: 'Net rental real estate income (loss)', description: 'IRS Schedule K-1 Box 2', cellType: 'number', sortOrder: 101 }, + { boxNumber: '3', label: 'Other net rental income (loss)', description: 'IRS Schedule K-1 Box 3', cellType: 'number', sortOrder: 102 }, + { boxNumber: '4', label: 'Guaranteed payments for services', description: 'IRS Schedule K-1 Box 4', cellType: 'number', sortOrder: 103 }, + { boxNumber: '4a', label: 'Guaranteed payments for capital', description: 'IRS Schedule K-1 Box 4a', cellType: 'number', sortOrder: 104 }, + { boxNumber: '4b', label: 'Total guaranteed payments', description: 'IRS Schedule K-1 Box 4b', cellType: 'number', sortOrder: 105 }, + { boxNumber: '5', label: 'Interest income', description: 'IRS Schedule K-1 Box 5', cellType: 'number', sortOrder: 106 }, + { boxNumber: '6a', label: 'Ordinary dividends', description: 'IRS Schedule K-1 Box 6a', cellType: 'number', sortOrder: 107 }, + { boxNumber: '6b', label: 'Qualified dividends', description: 'IRS Schedule K-1 Box 6b', cellType: 'number', sortOrder: 108 }, + { boxNumber: '6c', label: 'Dividend equivalents', description: 'IRS Schedule K-1 Box 6c', cellType: 'number', sortOrder: 109 }, + { boxNumber: '7', label: 'Royalties', description: 'IRS Schedule K-1 Box 7', cellType: 'number', sortOrder: 110 }, + { boxNumber: '8', label: 'Net short-term capital gain (loss)', description: 'IRS Schedule K-1 Box 8', cellType: 'number', sortOrder: 111 }, + { boxNumber: '9a', label: 'Net long-term capital gain (loss)', description: 'IRS Schedule K-1 Box 9a', cellType: 'number', sortOrder: 112 }, + { boxNumber: '9b', label: 'Collectibles (28%) gain (loss)', description: 'IRS Schedule K-1 Box 9b', cellType: 'number', sortOrder: 113 }, + { boxNumber: '9c', label: 'Unrecaptured section 1250 gain', description: 'IRS Schedule K-1 Box 9c', cellType: 'number', sortOrder: 114 }, + { boxNumber: '10', label: 'Net section 1231 gain (loss)', description: 'IRS Schedule K-1 Box 10', cellType: 'number', sortOrder: 115 }, + { boxNumber: '11', label: 'Other income (loss)', description: 'IRS Schedule K-1 Box 11', cellType: 'number', sortOrder: 116 }, + { boxNumber: '12', label: 'Section 179 deduction', description: 'IRS Schedule K-1 Box 12', cellType: 'number', sortOrder: 117 }, + { boxNumber: '13', label: 'Other deductions', description: 'IRS Schedule K-1 Box 13', cellType: 'number', sortOrder: 118 }, + { boxNumber: '14', label: 'Self-employment earnings (loss)', description: 'IRS Schedule K-1 Box 14', cellType: 'number', sortOrder: 119 }, + { boxNumber: '15', label: 'Credits', description: 'IRS Schedule K-1 Box 15', cellType: 'number', sortOrder: 120 }, + { boxNumber: '16', label: 'Foreign transactions', description: 'IRS Schedule K-1 Box 16', cellType: 'number', sortOrder: 121 }, + { boxNumber: '16_K3', label: 'Schedule K-3 is attached', description: 'IRS Schedule K-1 Box 16 K-3 checkbox', cellType: 'boolean', sortOrder: 122 }, + { boxNumber: '17', label: 'Alternative minimum tax (AMT) items', description: 'IRS Schedule K-1 Box 17', cellType: 'number', sortOrder: 123 }, + { boxNumber: '18', label: 'Tax-exempt income and nondeductible expenses', description: 'IRS Schedule K-1 Box 18', cellType: 'number', sortOrder: 124 }, + { boxNumber: '19', label: 'Distributions', description: 'IRS Schedule K-1 Box 19', cellType: 'number', sortOrder: 125 }, + { boxNumber: '19a', label: 'Distributions — Cash and marketable securities', description: 'IRS Schedule K-1 Box 19a', cellType: 'number', sortOrder: 126 }, + { boxNumber: '19b', label: 'Distributions — Other property', description: 'IRS Schedule K-1 Box 19b', cellType: 'number', sortOrder: 127 }, + { boxNumber: '20A', label: 'Other information — Code A', description: 'IRS Schedule K-1 Box 20, Code A', cellType: 'number', sortOrder: 128 }, + { boxNumber: '20B', label: 'Other information — Code B', description: 'IRS Schedule K-1 Box 20, Code B', cellType: 'number', sortOrder: 129 }, + { boxNumber: '20V', label: 'Other information — Code V', description: 'IRS Schedule K-1 Box 20, Code V', cellType: 'number', sortOrder: 130 }, + { boxNumber: '20_WILDCARD', label: 'Other information — Other codes', description: 'IRS Schedule K-1 Box 20, all other codes', cellType: 'number', sortOrder: 131 }, + { boxNumber: '21', label: 'Foreign taxes paid or accrued', description: 'IRS Schedule K-1 Box 21', cellType: 'number', sortOrder: 132 }, + { boxNumber: '22', label: 'More than one activity for at-risk purposes', description: 'IRS Schedule K-1 Box 22 — Checkbox', cellType: 'boolean', sortOrder: 133 }, + { boxNumber: '23', label: 'More than one activity for passive activity purposes', description: 'IRS Schedule K-1 Box 23 — Checkbox', cellType: 'boolean', sortOrder: 134 } ]; /** Default aggregation rules */ @@ -77,24 +149,54 @@ export class CellMappingService implements OnModuleInit { } /** - * Seed default IRS cell mappings (partnershipId = null) if they don't exist + * Seed default IRS cell mappings (partnershipId = null) if they don't exist. + * Also adds any new default mappings that may have been introduced in updates. */ public async seedDefaultMappings() { - const existingCount = await this.prismaService.cellMapping.count({ + const existing = await this.prismaService.cellMapping.findMany({ where: { partnershipId: null } }); + const existingBoxNumbers = new Set(existing.map((m) => m.boxNumber)); - if (existingCount > 0) { - return; + const newMappings = IRS_DEFAULT_MAPPINGS.filter( + (m) => !existingBoxNumbers.has(m.boxNumber) + ); + + if (newMappings.length > 0) { + await this.prismaService.cellMapping.createMany({ + data: newMappings.map((mapping) => ({ + ...mapping, + partnershipId: null, + isCustom: false, + isIgnored: false, + cellType: mapping.cellType + })) + }); } - await this.prismaService.cellMapping.createMany({ - data: IRS_DEFAULT_MAPPINGS.map((mapping) => ({ - ...mapping, - partnershipId: null, - isCustom: false - })) - }); + // Backfill cellType on existing defaults that were seeded before the cellType column existed + for (const defaultMapping of IRS_DEFAULT_MAPPINGS) { + const existingRow = existing.find((e) => e.boxNumber === defaultMapping.boxNumber); + if (existingRow && (existingRow as any).cellType === 'number' && defaultMapping.cellType !== 'number') { + await this.prismaService.cellMapping.update({ + where: { id: existingRow.id }, + data: { cellType: defaultMapping.cellType } + }); + } + } + + // Clean up stale parent-level box "20" that was replaced by 20A/20B/20V/20_WILDCARD + const validBoxNumbers = new Set(IRS_DEFAULT_MAPPINGS.map((m) => m.boxNumber)); + const staleDefaults = existing.filter( + (m) => !m.isCustom && !validBoxNumbers.has(m.boxNumber) + ); + if (staleDefaults.length > 0) { + await this.prismaService.cellMapping.deleteMany({ + where: { + id: { in: staleDefaults.map((m) => m.id) } + } + }); + } } /** @@ -190,6 +292,7 @@ export class CellMappingService implements OnModuleInit { boxNumber: string; label: string; description?: string; + cellType?: string; isCustom: boolean; }> ) { @@ -197,6 +300,16 @@ export class CellMappingService implements OnModuleInit { for (let i = 0; i < mappings.length; i++) { const mapping = mappings[i]; + const updateData: Record = { + label: mapping.label, + description: mapping.description || null, + isCustom: mapping.isCustom, + sortOrder: i + 1 + }; + if (mapping.cellType) { + updateData.cellType = mapping.cellType; + } + const result = await this.prismaService.cellMapping.upsert({ where: { partnershipId_boxNumber: { @@ -204,17 +317,13 @@ export class CellMappingService implements OnModuleInit { boxNumber: mapping.boxNumber } }, - update: { - label: mapping.label, - description: mapping.description || null, - isCustom: mapping.isCustom, - sortOrder: i + 1 - }, + update: updateData, create: { partnershipId, boxNumber: mapping.boxNumber, label: mapping.label, description: mapping.description || null, + cellType: mapping.cellType || 'number', isCustom: mapping.isCustom, sortOrder: i + 1 } @@ -237,6 +346,53 @@ export class CellMappingService implements OnModuleInit { return { deleted: true, partnershipId }; } + /** + * Toggle the isIgnored flag on a cell mapping. + * If a partnership-specific override exists, toggles it. + * If only the global default exists, creates a partnership-specific override with isIgnored toggled. + */ + public async toggleIgnored( + partnershipId: string, + boxNumber: string + ) { + // Check for partnership-specific mapping first + const existing = await this.prismaService.cellMapping.findUnique({ + where: { partnershipId_boxNumber: { partnershipId, boxNumber } } + }); + + if (existing) { + return this.prismaService.cellMapping.update({ + where: { id: existing.id }, + data: { isIgnored: !existing.isIgnored } + }); + } + + // No partnership override — check for global default and create an override + const globalMapping = await this.prismaService.cellMapping.findFirst({ + where: { partnershipId: null, boxNumber } + }); + + if (globalMapping) { + return this.prismaService.cellMapping.create({ + data: { + partnershipId, + boxNumber: globalMapping.boxNumber, + label: globalMapping.label, + description: globalMapping.description, + cellType: globalMapping.cellType, + isCustom: false, + isIgnored: true, + sortOrder: globalMapping.sortOrder + } + }); + } + + throw new HttpException( + `No cell mapping found for box ${boxNumber}`, + StatusCodes.NOT_FOUND + ); + } + /** * Update aggregation rules for a partnership. */ diff --git a/apps/api/src/app/k1-import/extractors/k1-position-regions.ts b/apps/api/src/app/k1-import/extractors/k1-position-regions.ts index 39f997602..3a8af2b92 100644 --- a/apps/api/src/app/k1-import/extractors/k1-position-regions.ts +++ b/apps/api/src/app/k1-import/extractors/k1-position-regions.ts @@ -56,6 +56,20 @@ export interface K1PositionRegion { // Verified: FINAL_K1 'X' at (324.3, 746.2), TAX_YEAR '20'+'25' at (236.8/262.1, 727.7) // ============================================================================ const HEADER_REGIONS: K1PositionRegion[] = [ + { + fieldId: 'K1_DOCUMENT_ID', + boxNumber: 'K1_DOCUMENT_ID', + label: 'K-1 Document ID', + fieldCategory: 'METADATA', + valueType: 'text', + xMin: 500, + xMax: 580, + yMin: 750, + yMax: 770, + hasSubtype: false, + subtypeXMin: null, + subtypeXMax: null + }, { fieldId: 'TAX_YEAR', boxNumber: 'TAX_YEAR', @@ -1143,19 +1157,61 @@ const PART_III_RIGHT_REGIONS: K1PositionRegion[] = [ subtypeXMax: 510 }, { - fieldId: 'BOX_20', - boxNumber: '20', - label: 'Other information', + fieldId: 'BOX_20A', + boxNumber: '20A', + label: 'Other information — Code A', fieldCategory: 'PART_III', valueType: 'numeric', xMin: 510, xMax: 600, - yMin: 284, + yMin: 356, yMax: 396, hasSubtype: true, subtypeXMin: 445, subtypeXMax: 510 }, + { + fieldId: 'BOX_20B', + boxNumber: '20B', + label: 'Other information — Code B', + fieldCategory: 'PART_III', + valueType: 'numeric', + xMin: 510, + xMax: 600, + yMin: 332, + yMax: 356, + hasSubtype: true, + subtypeXMin: 445, + subtypeXMax: 510 + }, + { + fieldId: 'BOX_20V', + boxNumber: '20V', + label: 'Other information — Code V', + fieldCategory: 'PART_III', + valueType: 'numeric', + xMin: 510, + xMax: 600, + yMin: 308, + yMax: 332, + hasSubtype: true, + subtypeXMin: 445, + subtypeXMax: 510 + }, + { + fieldId: 'BOX_20_WILDCARD', + boxNumber: '20_WILDCARD', + label: 'Other information — Other codes', + fieldCategory: 'PART_III', + valueType: 'numeric', + xMin: 510, + xMax: 600, + yMin: 284, + yMax: 308, + hasSubtype: true, + subtypeXMin: 445, + subtypeXMax: 510 + }, { fieldId: 'BOX_21', boxNumber: '21', diff --git a/apps/api/src/app/k1-import/extractors/pdf-parse-extractor.ts b/apps/api/src/app/k1-import/extractors/pdf-parse-extractor.ts index cc6db3e82..6cd0bcf68 100644 --- a/apps/api/src/app/k1-import/extractors/pdf-parse-extractor.ts +++ b/apps/api/src/app/k1-import/extractors/pdf-parse-extractor.ts @@ -472,7 +472,7 @@ export class PdfParseExtractor implements K1Extractor { if (!item) return; const numericValue = - region.valueType === 'checkbox' + region.valueType === 'checkbox' || region.valueType === 'text' ? null : this.parseNumericValue(item.text); const { confidence, confidenceLevel } = this.computeConfidence( @@ -531,6 +531,21 @@ export class PdfParseExtractor implements K1Extractor { for (const item of taxYearItems) { item.matched = true; } + // Also emit as a field so it appears in cell mapping + fields.push({ + boxNumber: 'TAX_YEAR', + label: 'Tax Year', + customLabel: null, + rawValue: String(year), + numericValue: null, + confidence: 1.0, + confidenceLevel: 'HIGH', + isUserEdited: false, + isReviewed: false, + subtype: null, + fieldCategory: 'METADATA', + isCheckbox: false + }); } } else if (taxYearItems.length === 1) { const text = taxYearItems[0].text; @@ -538,6 +553,20 @@ export class PdfParseExtractor implements K1Extractor { if (text.length === 4 && year >= 1900 && year <= 2100) { metadata.taxYear = year; taxYearItems[0].matched = true; + fields.push({ + boxNumber: 'TAX_YEAR', + label: 'Tax Year', + customLabel: null, + rawValue: String(year), + numericValue: null, + confidence: 1.0, + confidenceLevel: 'HIGH', + isUserEdited: false, + isReviewed: false, + subtype: null, + fieldCategory: 'METADATA', + isCheckbox: false + }); } } @@ -688,6 +717,9 @@ export class PdfParseExtractor implements K1Extractor { const assignments = this.assignItemsToRegions(dataItems, checkboxRegions); + // Track which checkbox regions were matched (checked) + const checkedRegionIds = new Set(); + for (const [region, item] of assignments) { const isChecked = item.text.toUpperCase() === 'X' || @@ -696,6 +728,8 @@ export class PdfParseExtractor implements K1Extractor { if (!isChecked) continue; + checkedRegionIds.add(region.fieldId); + const { confidence, confidenceLevel } = this.computeConfidence( item.x, item.y, @@ -706,7 +740,7 @@ export class PdfParseExtractor implements K1Extractor { boxNumber: region.boxNumber, label: region.label, customLabel: null, - rawValue: 'X', + rawValue: 'true', numericValue: null, confidence, confidenceLevel, @@ -726,6 +760,26 @@ export class PdfParseExtractor implements K1Extractor { metadata.isAmended = true; } } + + // Emit false for all unchecked checkbox regions + for (const region of checkboxRegions) { + if (checkedRegionIds.has(region.fieldId)) continue; + + fields.push({ + boxNumber: region.boxNumber, + label: region.label, + customLabel: null, + rawValue: 'false', + numericValue: null, + confidence: 1.0, + confidenceLevel: 'HIGH', + isUserEdited: false, + isReviewed: false, + subtype: null, + fieldCategory: 'CHECKBOX', + isCheckbox: true + }); + } } // ========================================================================== diff --git a/apps/api/src/app/k1-import/k1-aggregation.service.ts b/apps/api/src/app/k1-import/k1-aggregation.service.ts index 7c8f36ef3..5e1990d9c 100644 --- a/apps/api/src/app/k1-import/k1-aggregation.service.ts +++ b/apps/api/src/app/k1-import/k1-aggregation.service.ts @@ -50,7 +50,7 @@ export class K1AggregationService { name: rule.name, operation: rule.operation, sourceCells, - computedValue: Math.round(computedValue * 100) / 100, + computedValue, breakdown }; }); diff --git a/apps/api/src/app/k1-import/k1-field-mapper.service.ts b/apps/api/src/app/k1-import/k1-field-mapper.service.ts index 5c213a604..832ba3387 100644 --- a/apps/api/src/app/k1-import/k1-field-mapper.service.ts +++ b/apps/api/src/app/k1-import/k1-field-mapper.service.ts @@ -39,11 +39,20 @@ export class K1FieldMapperService { const mapping = mappingMap.get(field.boxNumber); if (mapping) { + // Skip ignored fields — they are filtered out of extraction results + if (mapping.isIgnored) { + this.logger.debug( + `Skipping ignored field: box ${field.boxNumber}` + ); + continue; + } + mappedFields.push({ ...field, label: mapping.label, - customLabel: mapping.isCustom ? mapping.label : field.customLabel - }); + customLabel: mapping.isCustom ? mapping.label : field.customLabel, + cellType: mapping.cellType + } as any); } else { // Field has a box number but no corresponding cell mapping this.logger.debug( @@ -103,6 +112,11 @@ export class K1FieldMapperService { const missingFields: K1ExtractedField[] = []; for (const mapping of mappings) { + // Skip ignored mappings — don't generate empty placeholder rows + if (mapping.isIgnored) { + continue; + } + if (!existingBoxes.has(mapping.boxNumber)) { missingFields.push({ boxNumber: mapping.boxNumber, @@ -113,8 +127,9 @@ export class K1FieldMapperService { confidence: 1.0, // Empty fields have full confidence confidenceLevel: 'HIGH', isUserEdited: false, - isReviewed: true // No review needed for empty fields - }); + isReviewed: true, // No review needed for empty fields + cellType: mapping.cellType + } as any); } } diff --git a/apps/api/src/app/k1-import/k1-import.service.ts b/apps/api/src/app/k1-import/k1-import.service.ts index 33de080b3..133bc8a84 100644 --- a/apps/api/src/app/k1-import/k1-import.service.ts +++ b/apps/api/src/app/k1-import/k1-import.service.ts @@ -631,13 +631,14 @@ export class K1ImportService { } // Build KDocument data from verified fields - const kDocumentData: Record = {}; + const kDocumentData: Record = {}; for (const field of verifiedData.fields) { // For subtype fields (e.g., box 11 "ZZ*", box 20 "A"), create unique key const key = field.subtype ? `${field.boxNumber}-${field.subtype}` : field.boxNumber; - kDocumentData[key] = field.numericValue ?? null; + // Persist numericValue for numeric fields, rawValue for text/checkbox/string fields + kDocumentData[key] = field.numericValue ?? field.rawValue ?? null; } // FR-012: Create or update KDocument diff --git a/apps/client/src/app/pages/cell-mapping/cell-mapping-page.component.ts b/apps/client/src/app/pages/cell-mapping/cell-mapping-page.component.ts index 316479a06..a71705e09 100644 --- a/apps/client/src/app/pages/cell-mapping/cell-mapping-page.component.ts +++ b/apps/client/src/app/pages/cell-mapping/cell-mapping-page.component.ts @@ -24,10 +24,13 @@ interface EditableMapping { boxNumber: string; label: string; description: string; + cellType: string; isCustom: boolean; + isIgnored: boolean; isEditing: boolean; editLabel: string; editDescription: string; + editCellType: string; } interface EditableRule { @@ -69,13 +72,21 @@ export class CellMappingPageComponent implements OnInit { // New custom cell form public newBoxNumber = ''; + public newCellType = 'number'; public newLabel = ''; // New rule form public newRuleName = ''; public newRuleSourceCells = ''; - public displayedColumns = ['boxNumber', 'label', 'description', 'isCustom', 'actions']; + public cellTypeOptions = [ + { value: 'number', label: 'Number ($)' }, + { value: 'string', label: 'String' }, + { value: 'percentage', label: 'Percentage (%)' }, + { value: 'boolean', label: 'Boolean' } + ]; + + public displayedColumns = ['boxNumber', 'label', 'description', 'cellType', 'isCustom', 'isIgnored', 'actions']; public constructor( private readonly changeDetectorRef: ChangeDetectorRef, @@ -101,12 +112,14 @@ export class CellMappingPageComponent implements OnInit { mapping.isEditing = true; mapping.editLabel = mapping.label; mapping.editDescription = mapping.description; + mapping.editCellType = mapping.cellType; this.changeDetectorRef.markForCheck(); } public saveEditMapping(mapping: EditableMapping): void { mapping.label = mapping.editLabel; mapping.description = mapping.editDescription; + mapping.cellType = mapping.editCellType; mapping.isEditing = false; this.changeDetectorRef.markForCheck(); } @@ -116,6 +129,30 @@ export class CellMappingPageComponent implements OnInit { this.changeDetectorRef.markForCheck(); } + public toggleIgnored(mapping: EditableMapping): void { + if (!this.selectedPartnershipId) { + return; + } + + this.k1ImportDataService + .toggleFieldIgnored({ + partnershipId: this.selectedPartnershipId, + boxNumber: mapping.boxNumber + }) + .pipe(takeUntilDestroyed(this.destroyRef)) + .subscribe({ + next: (result: any) => { + mapping.isIgnored = result.isIgnored; + this.changeDetectorRef.markForCheck(); + }, + error: (err) => { + this.error = + err?.error?.message || 'Failed to toggle ignored state.'; + this.changeDetectorRef.markForCheck(); + } + }); + } + public addCustomCell(): void { if (!this.newBoxNumber || !this.newLabel) { return; @@ -125,14 +162,18 @@ export class CellMappingPageComponent implements OnInit { boxNumber: this.newBoxNumber, label: this.newLabel, description: '', + cellType: this.newCellType, isCustom: true, + isIgnored: false, isEditing: false, editLabel: '', - editDescription: '' + editDescription: '', + editCellType: this.newCellType }); this.newBoxNumber = ''; this.newLabel = ''; + this.newCellType = 'number'; this.changeDetectorRef.markForCheck(); } @@ -158,6 +199,7 @@ export class CellMappingPageComponent implements OnInit { boxNumber: m.boxNumber, label: m.label, description: m.description, + cellType: m.cellType, isCustom: m.isCustom })) }) @@ -286,10 +328,13 @@ export class CellMappingPageComponent implements OnInit { boxNumber: m.boxNumber, label: m.label, description: m.description || '', + cellType: m.cellType || 'number', isCustom: m.isCustom, + isIgnored: m.isIgnored ?? false, isEditing: false, editLabel: '', - editDescription: '' + editDescription: '', + editCellType: m.cellType || 'number' })); this.changeDetectorRef.markForCheck(); }, diff --git a/apps/client/src/app/pages/cell-mapping/cell-mapping-page.html b/apps/client/src/app/pages/cell-mapping/cell-mapping-page.html index ffcd37f3e..ca8916f7a 100644 --- a/apps/client/src/app/pages/cell-mapping/cell-mapping-page.html +++ b/apps/client/src/app/pages/cell-mapping/cell-mapping-page.html @@ -37,9 +37,7 @@ Label @if (row.isEditing) { - - - + } @else { {{ row.label }} } @@ -51,9 +49,7 @@ Description @if (row.isEditing) { - - - + } @else { {{ row.description }} } @@ -70,6 +66,34 @@ + + + Type + + @if (row.isEditing) { + + @for (opt of cellTypeOptions; track opt.value) { + {{ opt.label }} + } + + } @else { + {{ row.cellType }} + } + + + + + + Ignored + + + + + + Actions @@ -108,6 +132,14 @@ Label + + Type + + @for (opt of cellTypeOptions; track opt.value) { + {{ opt.label }} + } + + diff --git a/apps/client/src/app/pages/cell-mapping/cell-mapping-page.scss b/apps/client/src/app/pages/cell-mapping/cell-mapping-page.scss index f5caf3783..5e681b560 100644 --- a/apps/client/src/app/pages/cell-mapping/cell-mapping-page.scss +++ b/apps/client/src/app/pages/cell-mapping/cell-mapping-page.scss @@ -3,7 +3,7 @@ } .container { - max-width: 960px; + max-width: 1400px; margin: 0 auto; padding: 1.5rem; } @@ -56,11 +56,74 @@ h2 { .mappings-table { width: 100%; margin-bottom: 1rem; + + // Let browser auto-size columns based on content + th.mat-mdc-header-cell, + td.mat-mdc-cell { + padding: 8px 12px; + vertical-align: middle; + } + + .mat-column-boxNumber { + white-space: nowrap; + font-family: 'Roboto Mono', monospace; + font-size: 0.8125rem; + color: rgba(0, 0, 0, 0.72); + width: 1%; // shrink-to-fit trick for auto layout + } + + .mat-column-label { + white-space: nowrap; + } + + .mat-column-description { + color: rgba(0, 0, 0, 0.6); + font-size: 0.8125rem; + // Allow wrapping for long descriptions + word-break: break-word; + } + + .mat-column-cellType { + width: 1%; + white-space: nowrap; + } + + .mat-column-isCustom { + width: 1%; + white-space: nowrap; + text-align: center; + } + + .mat-column-isIgnored { + width: 1%; + white-space: nowrap; + text-align: center; + } + + .mat-column-actions { + width: 1%; + white-space: nowrap; + } } -.inline-edit { +// Lightweight inline cell inputs (no mat-form-field wrapper) +.cell-input { width: 100%; - max-width: 200px; + min-width: 160px; + box-sizing: border-box; + padding: 6px 8px; + font-size: 0.8125rem; + font-family: inherit; + border: 1px solid rgba(0, 0, 0, 0.24); + border-radius: 4px; + background: transparent; + outline: none; + transition: border-color 0.15s ease; + + &:focus { + border-color: var(--primary-color, #1976d2); + box-shadow: 0 0 0 1px var(--primary-color, #1976d2); + } } .custom-badge { @@ -68,6 +131,42 @@ h2 { font-size: 20px; } +// Type badge styling +.type-badge { + display: inline-block; + font-size: 0.75rem; + font-weight: 500; + padding: 2px 8px; + border-radius: 10px; + text-transform: capitalize; + white-space: nowrap; +} + +.type-number { + background-color: #e3f2fd; + color: #1565c0; +} + +.type-string { + background-color: #f3e5f5; + color: #7b1fa2; +} + +.type-percentage { + background-color: #e8f5e9; + color: #2e7d32; +} + +.type-boolean { + background-color: #fff3e0; + color: #e65100; +} + +// Inline type selector (no mat-form-field wrapper) +.type-select { + min-width: 110px; +} + .add-row { display: flex; align-items: center; diff --git a/apps/client/src/app/pages/k-documents/k-document-detail/k-document-detail.component.ts b/apps/client/src/app/pages/k-documents/k-document-detail/k-document-detail.component.ts index c3bb3d3b7..39a621ddd 100644 --- a/apps/client/src/app/pages/k-documents/k-document-detail/k-document-detail.component.ts +++ b/apps/client/src/app/pages/k-documents/k-document-detail/k-document-detail.component.ts @@ -37,11 +37,26 @@ import { ActivatedRoute, Router, RouterModule } from '@angular/router'; export class KDocumentDetailComponent implements OnInit { public aggregations: K1AggregationResult[] = []; public boxColumns = ['boxNumber', 'value']; - public boxData: Array<{ boxNumber: string; value: number | null }> = []; + public boxData: Array<{ boxNumber: string; value: number | string | null }> = []; public error: string | null = null; public kDocument: any = null; public kDocumentId: string; + /** Box numbers that represent percentage values (Section J) */ + private static readonly PERCENTAGE_BOXES = new Set([ + 'J_PROFIT_BEGIN', 'J_PROFIT_END', + 'J_LOSS_BEGIN', 'J_LOSS_END', + 'J_CAPITAL_BEGIN', 'J_CAPITAL_END' + ]); + + public isPercentage(boxNumber: string): boolean { + return KDocumentDetailComponent.PERCENTAGE_BOXES.has(boxNumber); + } + + public isNumeric(value: any): boolean { + return typeof value === 'number'; + } + public constructor( private readonly activatedRoute: ActivatedRoute, private readonly changeDetectorRef: ChangeDetectorRef, @@ -77,7 +92,7 @@ export class KDocumentDetailComponent implements OnInit { this.boxData = Object.entries(data) .map(([boxNumber, value]) => ({ boxNumber, - value: typeof value === 'number' ? value : null + value: value ?? null })) .sort((a, b) => this.compareBoxNumbers(a.boxNumber, b.boxNumber)); } diff --git a/apps/client/src/app/pages/k-documents/k-document-detail/k-document-detail.html b/apps/client/src/app/pages/k-documents/k-document-detail/k-document-detail.html index 6ade119bc..627db29ba 100644 --- a/apps/client/src/app/pages/k-documents/k-document-detail/k-document-detail.html +++ b/apps/client/src/app/pages/k-documents/k-document-detail/k-document-detail.html @@ -52,14 +52,14 @@
- {{ agg.value | currency:'USD':'symbol':'1.2-2' }} + {{ agg.value | currency:'USD':'symbol':'1.2-6' }}
@if (agg.breakdown && agg.breakdown.length > 0) {
@for (item of agg.breakdown; track item.boxNumber) {
Box {{ item.boxNumber }}: - {{ item.value | currency:'USD':'symbol':'1.2-2' }} + {{ item.value | currency:'USD':'symbol':'1.2-6' }}
}
@@ -82,10 +82,14 @@ Value - @if (row.value !== null) { - {{ row.value | currency:'USD':'symbol':'1.2-2' }} - } @else { + @if (row.value === null || row.value === '') { + } @else if (isPercentage(row.boxNumber)) { + {{ row.value | number:'1.2-6' }}% + } @else if (isNumeric(row.value)) { + {{ row.value | currency:'USD':'symbol':'1.2-6' }} + } @else { + {{ row.value }} } diff --git a/apps/client/src/app/pages/k-documents/k-documents-page.component.ts b/apps/client/src/app/pages/k-documents/k-documents-page.component.ts index 3c3a5d76d..62f30a6b4 100644 --- a/apps/client/src/app/pages/k-documents/k-documents-page.component.ts +++ b/apps/client/src/app/pages/k-documents/k-documents-page.component.ts @@ -129,7 +129,7 @@ export class KDocumentsPageComponent implements OnInit { public onFormSubmit(event: { filingStatus: string; - data: Record; + data: Record; }): void { if (this.editingDoc) { this.familyOfficeDataService diff --git a/apps/client/src/app/pages/k1-import/k1-verification/k1-verification.component.ts b/apps/client/src/app/pages/k1-import/k1-verification/k1-verification.component.ts index 66935e2ba..30ec0bca9 100644 --- a/apps/client/src/app/pages/k1-import/k1-verification/k1-verification.component.ts +++ b/apps/client/src/app/pages/k1-import/k1-verification/k1-verification.component.ts @@ -37,6 +37,8 @@ interface EditableField extends K1ExtractedField { isEditing: boolean; editValue: string; editLabel: string; + cellType: string; + editCellType: string; } interface EditableUnmappedItem extends K1UnmappedItem { @@ -75,12 +77,20 @@ export class K1VerificationComponent implements OnInit { public taxYear: number; public unmappedItems: EditableUnmappedItem[] = []; + public cellTypeOptions = [ + { value: 'number', label: 'Number ($)' }, + { value: 'string', label: 'String' }, + { value: 'percentage', label: 'Percentage (%)' }, + { value: 'boolean', label: 'Boolean' } + ]; + // Column definitions for the fields table public displayedColumns = [ 'boxNumber', 'label', 'rawValue', 'numericValue', + 'cellType', 'confidence', 'reviewed', 'actions' @@ -132,6 +142,7 @@ export class K1VerificationComponent implements OnInit { field.isEditing = true; field.editValue = field.rawValue; field.editLabel = field.customLabel || field.label; + field.editCellType = field.cellType; this.changeDetectorRef.markForCheck(); } @@ -142,17 +153,27 @@ export class K1VerificationComponent implements OnInit { field.rawValue = field.editValue; field.customLabel = field.editLabel !== field.label ? field.editLabel : null; + field.cellType = field.editCellType; field.isUserEdited = true; field.isReviewed = true; field.isEditing = false; - // Try to parse numeric value - const cleaned = field.editValue - .replace(/[$,]/g, '') - .replace(/\(([^)]+)\)/, '-$1') - .trim(); - const parsed = parseFloat(cleaned); - field.numericValue = isNaN(parsed) ? null : parsed; + // Parse value based on cell type + if (field.cellType === 'boolean') { + const lower = field.editValue.toLowerCase().trim(); + field.numericValue = null; + field.rawValue = (lower === 'true' || lower === 'yes' || lower === '1' || lower === 'x') ? 'true' : 'false'; + } else if (field.cellType === 'string') { + field.numericValue = null; + } else { + // number or percentage + const cleaned = field.editValue + .replace(/[$,%]/g, '') + .replace(/\(([^)]+)\)/, '-$1') + .trim(); + const parsed = parseFloat(cleaned); + field.numericValue = isNaN(parsed) ? null : parsed; + } this.recalculateAggregations(); this.checkConfirmability(); @@ -219,6 +240,7 @@ export class K1VerificationComponent implements OnInit { customLabel: f.customLabel, rawValue: f.rawValue, numericValue: f.numericValue, + cellType: f.cellType, confidence: f.confidence, confidenceLevel: f.confidenceLevel, isUserEdited: f.isUserEdited, @@ -300,7 +322,9 @@ export class K1VerificationComponent implements OnInit { ...f, isEditing: false, editValue: f.rawValue, - editLabel: f.customLabel || f.label + editLabel: f.customLabel || f.label, + cellType: (f as any).cellType || 'number', + editCellType: (f as any).cellType || 'number' }) ); diff --git a/apps/client/src/app/pages/k1-import/k1-verification/k1-verification.html b/apps/client/src/app/pages/k1-import/k1-verification/k1-verification.html index 8efa85729..af5ca6d5d 100644 --- a/apps/client/src/app/pages/k1-import/k1-verification/k1-verification.html +++ b/apps/client/src/app/pages/k1-import/k1-verification/k1-verification.html @@ -68,13 +68,31 @@ Parsed @if (field.numericValue !== null && field.numericValue !== undefined) { - {{ field.numericValue | number:'1.2-2' }} + {{ field.numericValue | number:'1.2-6' }} + } @else if (field.rawValue) { + {{ field.rawValue }} } @else { }
+ + + Type + + @if (field.isEditing) { + + @for (opt of cellTypeOptions; track opt.value) { + {{ opt.label }} + } + + } @else { + {{ field.cellType }} + } + + + Confidence @@ -152,7 +170,7 @@ {{ item.rawValue }} @if (item.numericValue !== null) { - ({{ item.numericValue | number:'1.2-2' }}) + ({{ item.numericValue | number:'1.2-6' }}) } Page {{ item.pageNumber }} @@ -207,7 +225,7 @@
- {{ agg.computedValue | number:'1.2-2' }} + {{ agg.computedValue | number:'1.2-6' }}
} diff --git a/apps/client/src/app/pages/k1-import/k1-verification/k1-verification.scss b/apps/client/src/app/pages/k1-import/k1-verification/k1-verification.scss index 8b05d2e1c..f643dd7c6 100644 --- a/apps/client/src/app/pages/k1-import/k1-verification/k1-verification.scss +++ b/apps/client/src/app/pages/k1-import/k1-verification/k1-verification.scss @@ -2,17 +2,61 @@ display: block; } +// Column width hints – give Label the most room, keep Box and actions compact +.mat-column-boxNumber { + width: 60px; + white-space: nowrap; +} + +.mat-column-label { + min-width: 200px; + width: 28%; +} + +.mat-column-rawValue { + min-width: 140px; + width: 18%; +} + +.mat-column-numericValue { + white-space: nowrap; +} + +.mat-column-cellType { + width: 1%; + white-space: nowrap; +} + +.mat-column-confidence { + width: 90px; + white-space: nowrap; +} + +.mat-column-reviewed { + width: 60px; +} + +.mat-column-actions { + width: 80px; + white-space: nowrap; +} + .fields-section { .table-responsive { overflow-x: auto; } .compact-field { - width: 160px; + width: 100%; + min-width: 120px; .mat-mdc-form-field-infix { padding: 4px 0; } + + input.mat-mdc-input-element { + font-size: 0.8125rem; + } } .user-edited { @@ -75,7 +119,11 @@ } .compact-field { - width: 140px; + width: 160px; + + input.mat-mdc-input-element { + font-size: 0.8125rem; + } } } @@ -114,3 +162,38 @@ .actions { padding-bottom: 2rem; } + +// Type badge styling +.type-badge { + display: inline-block; + font-size: 0.7rem; + font-weight: 500; + padding: 2px 8px; + border-radius: 10px; + text-transform: capitalize; + white-space: nowrap; +} + +.type-number { + background-color: #e3f2fd; + color: #1565c0; +} + +.type-string { + background-color: #f3e5f5; + color: #7b1fa2; +} + +.type-percentage { + background-color: #e8f5e9; + color: #2e7d32; +} + +.type-boolean { + background-color: #fff3e0; + color: #e65100; +} + +.type-select { + min-width: 110px; +} diff --git a/apps/client/src/app/services/family-office-data.service.ts b/apps/client/src/app/services/family-office-data.service.ts index 32e5f79e4..76a149e62 100644 --- a/apps/client/src/app/services/family-office-data.service.ts +++ b/apps/client/src/app/services/family-office-data.service.ts @@ -306,7 +306,7 @@ export class FamilyOfficeDataService { type: string; taxYear: number; filingStatus?: string; - data: Record; + data: Record; }): Observable { return this.http.post('/api/v1/k-document', data); } @@ -332,7 +332,7 @@ export class FamilyOfficeDataService { public updateKDocument( kDocumentId: string, - data: { filingStatus?: string; data?: Record } + data: { filingStatus?: string; data?: Record } ): Observable { return this.http.put(`/api/v1/k-document/${kDocumentId}`, data); } diff --git a/apps/client/src/app/services/k1-import-data.service.ts b/apps/client/src/app/services/k1-import-data.service.ts index 4682f282b..40667711b 100644 --- a/apps/client/src/app/services/k1-import-data.service.ts +++ b/apps/client/src/app/services/k1-import-data.service.ts @@ -124,6 +124,7 @@ export class K1ImportDataService { boxNumber: string; label: string; description?: string; + cellType?: string; isCustom: boolean; }>; }): Observable { @@ -142,6 +143,17 @@ export class K1ImportDataService { }); } + /** + * Toggle the isIgnored flag for a cell mapping. + * PATCH /api/v1/cell-mapping/toggle-ignored + */ + public toggleFieldIgnored(data: { + partnershipId: string; + boxNumber: string; + }): Observable { + return this.http.patch('/api/v1/cell-mapping/toggle-ignored', data); + } + // ── Aggregation Rule Endpoints ─────────────────────────────────── /** diff --git a/k1-positions-dump.txt b/k1-positions-dump.txt new file mode 100644 index 000000000..aea776dbd --- /dev/null +++ b/k1-positions-dump.txt @@ -0,0 +1,274 @@ +Pages: 1 + +=== PAGE 1 === + +DATA | x= 524.2 | y= 758.7 | font=monospace | "651123" +TMPL | x= 511 | y= 748.4 | font=serif | "OMB No. 1545-0123" +DATA | x= 324.3 | y= 746.2 | font=sans-serif | "X" +TMPL | x= 336 | y= 746 | font=serif | "Final K-1" +TMPL | x= 415.2 | y= 746 | font=serif | "Amended K-1" +TMPL | x= 36 | y= 735.8 | font=serif | "Schedule K-1" +TMPL | x= 319.1 | y= 734.9 | font=serif | "Part III" +TMPL | x= 360 | y= 735.4 | font=serif | "PartnerΓÇÖs Share of Current Year Income," +DATA | x= 236.8 | y= 727.7 | font=sans-serif | "20" +DATA | x= 262.1 | y= 727.7 | font=sans-serif | "25" +TMPL | x= 36 | y= 723.8 | font=serif | "(Form 1065)" +TMPL | x= 360 | y= 723.4 | font=serif | "Deductions, Credits, and Other Items" +TMPL | x= 36 | y= 713.5 | font=serif | "Department of the Treasury" +TMPL | x= 318.5 | y= 712 | font=serif | "1" +TMPL | x= 334.2 | y= 712 | font=serif | "Ordinary business income (loss)" +TMPL | x= 453.3 | y= 712 | font=serif | "14" +TMPL | x= 471 | y= 712 | font=serif | "Self-employment earnings (loss)" +TMPL | x= 36 | y= 705.5 | font=serif | "Internal Revenue Service" +TMPL | x= 193.2 | y= 703 | font=serif | "For calendar year 2025, or tax year" +TMPL | x= 71 | y= 686 | font=serif | "beginning" +TMPL | x= 129.6 | y= 687 | font=serif | "/" +DATA | x= 151.2 | y= 686.8 | font=sans-serif | "/" +DATA | x= 159 | y= 686.8 | font=sans-serif | "2025" +TMPL | x= 195.6 | y= 686 | font=serif | "ending" +TMPL | x= 244.8 | y= 687 | font=serif | "/" +TMPL | x= 266.4 | y= 687 | font=serif | "/" +TMPL | x= 318.5 | y= 688 | font=serif | "2" +TMPL | x= 333.2 | y= 688 | font=serif | "Net rental real estate income (loss)" +TMPL | x= 36 | y= 669.6 | font=serif | "PartnerΓÇÖs Share of Income, Deductions," +TMPL | x= 318.5 | y= 664 | font=serif | "3" +TMPL | x= 334.2 | y= 664.1 | font=serif | "Other net rental income (loss)" +TMPL | x= 453.3 | y= 664 | font=serif | "15" +TMPL | x= 471 | y= 664 | font=serif | "Credits" +TMPL | x= 36 | y= 656.6 | font=serif | "Credits, etc." +TMPL | x= 215.2 | y= 656.8 | font=serif | "See separate instructions." +TMPL | x= 48.4 | y= 638.9 | font=serif | "Part I" +TMPL | x= 86.4 | y= 638.9 | font=serif | "Information About the Partnership" +TMPL | x= 316.4 | y= 640 | font=serif | "4a" +TMPL | x= 334.2 | y= 640 | font=serif | "Guaranteed payments for services" +TMPL | x= 40.8 | y= 626 | font=serif | "A" +TMPL | x= 316.3 | y= 616 | font=serif | "4b" +TMPL | x= 334.2 | y= 616 | font=serif | "Guaranteed payments for capital" +TMPL | x= 453.3 | y= 616 | font=serif | "16" +TMPL | x= 472 | y= 616 | font=serif | "Schedule K-3 is attached if" +TMPL | x= 472 | y= 606 | font=serif | "checked" +TMPL | x= 504 | y= 606 | font=serif | "." +TMPL | x= 516 | y= 606 | font=serif | "." +TMPL | x= 528 | y= 606 | font=serif | "." +TMPL | x= 540 | y= 606 | font=serif | "." +TMPL | x= 552 | y= 606 | font=serif | "." +TMPL | x= 40.7 | y= 602 | font=serif | "B" +DATA | x= 563.3 | y= 603.8 | font=sans-serif | "X" +TMPL | x= 316.4 | y= 592 | font=serif | "4c" +TMPL | x= 334.2 | y= 592 | font=serif | "Total guaranteed payments" +TMPL | x= 453.3 | y= 592 | font=serif | "17" +TMPL | x= 471 | y= 592 | font=serif | "Alternative minimum tax (AMT) items" +TMPL | x= 318.5 | y= 568 | font=serif | "5" +TMPL | x= 334.2 | y= 568 | font=serif | "Interest income" +TMPL | x= 40.6 | y= 554.5 | font=serif | "C" +TMPL | x= 58.4 | y= 554.5 | font=serif | "IRS center where partnership filed return:" +DATA | x= 185.4 | y= 553.7 | font=sans-serif | "E-FILE" +TMPL | x= 40.6 | y= 543 | font=serif | "D" +TMPL | x= 72 | y= 543 | font=serif | "Check if this is a publicly traded partnership (PTP)" +TMPL | x= 316.4 | y= 544 | font=serif | "6a" +TMPL | x= 334.2 | y= 544 | font=serif | "Ordinary dividends" +TMPL | x= 46.9 | y= 530.9 | font=serif | "Part II" +TMPL | x= 86.4 | y= 530.9 | font=serif | "Information About the Partner" +TMPL | x= 40.9 | y= 518 | font=serif | "E" +TMPL | x= 316.3 | y= 520 | font=serif | "6b" +TMPL | x= 334.2 | y= 520 | font=serif | "Qualified dividends" +TMPL | x= 453.3 | y= 520 | font=serif | "18" +TMPL | x= 471 | y= 520 | font=serif | "Tax-exempt income and" +TMPL | x= 471 | y= 511.6 | font=serif | "nondeductible expenses" +TMPL | x= 41.1 | y= 494 | font=serif | "F" +TMPL | x= 316.4 | y= 496 | font=serif | "6c" +TMPL | x= 334.2 | y= 496 | font=serif | "Dividend equivalents" +TMPL | x= 318.5 | y= 472 | font=serif | "7" +TMPL | x= 334.2 | y= 472 | font=serif | "Royalties" +TMPL | x= 40.5 | y= 447 | font=serif | "G" +TMPL | x= 72 | y= 446.6 | font=serif | "General partner or LLC" +DATA | x= 180.3 | y= 446.6 | font=sans-serif | "X" +TMPL | x= 194.4 | y= 446.6 | font=serif | "Limited partner or other LLC" +TMPL | x= 318.5 | y= 448 | font=serif | "8" +TMPL | x= 334.2 | y= 448 | font=serif | "Net short-term capital gain (loss)" +TMPL | x= 72 | y= 438.2 | font=serif | "member-manager" +TMPL | x= 194.4 | y= 438.2 | font=serif | "member" +TMPL | x= 453.3 | y= 436 | font=serif | "19" +TMPL | x= 471 | y= 436 | font=serif | "Distributions" +TMPL | x= 38.7 | y= 423 | font=serif | "H1" +DATA | x= 58 | y= 422.9 | font=sans-serif | "X" +TMPL | x= 72 | y= 422 | font=serif | "Domestic partner" +TMPL | x= 194.4 | y= 422 | font=serif | "Foreign partner" +TMPL | x= 316.4 | y= 424 | font=serif | "9a" +TMPL | x= 334.2 | y= 424 | font=serif | "Net long-term capital gain (loss)" +DATA | x= 455.2 | y= 423.2 | font=sans-serif | "A" +DATA | x= 530.6 | y= 422 | font=sans-serif | "4,493,757" +TMPL | x= 38.7 | y= 411 | font=serif | "H2" +DATA | x= 57.9 | y= 410.5 | font=sans-serif | "X" +TMPL | x= 72 | y= 410 | font=serif | "If the partner is a disregarded entity (DE), enter the partnerΓÇÖs:" +TMPL | x= 57.6 | y= 398.1 | font=serif | "TIN" +TMPL | x= 144 | y= 398.1 | font=serif | "Name" +TMPL | x= 316.3 | y= 400 | font=serif | "9b" +TMPL | x= 334.2 | y= 400 | font=serif | "Collectibles (28%) gain (loss)" +TMPL | x= 40.2 | y= 386 | font=serif | "I1" +TMPL | x= 57.6 | y= 387 | font=serif | "What type of entity is this partner?" +TMPL | x= 453.3 | y= 388 | font=serif | "20" +TMPL | x= 471 | y= 388 | font=serif | "Other information" +TMPL | x= 40.2 | y= 374 | font=serif | "I2" +TMPL | x= 57.6 | y= 374 | font=serif | "If this partner is a retirement plan (IRA/SEP/Keogh/etc.), check here" +TMPL | x= 276 | y= 374 | font=serif | "." +TMPL | x= 316.4 | y= 376 | font=serif | "9c" +TMPL | x= 334.2 | y= 376 | font=serif | "Unrecaptured section 1250 gain" +TMPL | x= 41.3 | y= 362 | font=serif | "J" +TMPL | x= 57.6 | y= 362 | font=serif | "PartnerΓÇÖs share of profit, loss, and capital (see instructions):" +DATA | x= 455.2 | y= 362.8 | font=sans-serif | "A" +DATA | x= 525.6 | y= 362.8 | font=sans-serif | "SEE STMT" +TMPL | x= 110 | y= 352.5 | font=serif | "Beginning" +TMPL | x= 229.8 | y= 352.5 | font=serif | "Ending" +TMPL | x= 316.5 | y= 352 | font=serif | "10" +TMPL | x= 334.2 | y= 352 | font=serif | "Net section 1231 gain (loss)" +TMPL | x= 57.6 | y= 338 | font=serif | "Profit" +DATA | x= 139.1 | y= 339.1 | font=sans-serif | "3.032900" +TMPL | x= 183.7 | y= 338 | font=serif | "%" +DATA | x= 250.1 | y= 339.1 | font=sans-serif | "0.000000" +TMPL | x= 291.7 | y= 338 | font=serif | "%" +DATA | x= 455.2 | y= 338.5 | font=sans-serif | "B" +DATA | x= 525.6 | y= 339 | font=sans-serif | "SEE STMT" +TMPL | x= 57.6 | y= 326 | font=serif | "Loss" +DATA | x= 139.1 | y= 326.1 | font=sans-serif | "3.032900" +TMPL | x= 183.7 | y= 326 | font=serif | "%" +DATA | x= 250.1 | y= 326.1 | font=sans-serif | "0.000000" +TMPL | x= 291.7 | y= 326 | font=serif | "%" +TMPL | x= 316.5 | y= 328 | font=serif | "11" +TMPL | x= 334.2 | y= 328 | font=serif | "Other income (loss)" +TMPL | x= 57.6 | y= 314.5 | font=serif | "Capital" +DATA | x= 139.1 | y= 314.2 | font=sans-serif | "3.032900" +TMPL | x= 183.7 | y= 314 | font=serif | "%" +DATA | x= 250.1 | y= 314.2 | font=sans-serif | "0.000000" +TMPL | x= 291.7 | y= 314 | font=serif | "%" +DATA | x= 314.2 | y= 314.4 | font=sans-serif | "ZZ*" +DATA | x= 403.9 | y= 314.4 | font=sans-serif | "(409,615)" +DATA | x= 455.2 | y= 315.6 | font=sans-serif | "V" +DATA | x= 525.6 | y= 314.6 | font=sans-serif | "SEE STMT" +TMPL | x= 57.6 | y= 302 | font=serif | "Check if decrease is due to:" +TMPL | x= 72 | y= 290 | font=serif | "Sale" +TMPL | x= 89.9 | y= 290 | font=serif | "or" +TMPL | x= 115.2 | y= 290 | font=serif | "Exchange of partnership interest. See instructions." +DATA | x= 456.4 | y= 291.3 | font=sans-serif | "*" +DATA | x= 525.6 | y= 290.3 | font=sans-serif | "SEE STMT" +TMPL | x= 38.7 | y= 278 | font=serif | "K1" +TMPL | x= 57.6 | y= 278 | font=serif | "PartnerΓÇÖs share of liabilities:" +TMPL | x= 316.5 | y= 280 | font=serif | "12" +TMPL | x= 334.2 | y= 280 | font=serif | "Section 179 deduction" +TMPL | x= 453.3 | y= 280 | font=serif | "21" +TMPL | x= 471 | y= 280 | font=serif | "Foreign taxes paid or accrued" +TMPL | x= 160.6 | y= 268.5 | font=serif | "Beginning" +TMPL | x= 251.2 | y= 268.5 | font=serif | "Ending" +DATA | x= 456.4 | y= 267.1 | font=sans-serif | "*" +DATA | x= 555.6 | y= 266.1 | font=sans-serif | "196" +TMPL | x= 57.6 | y= 254 | font=serif | "Nonrecourse" +TMPL | x= 108 | y= 254 | font=serif | "." +TMPL | x= 120 | y= 254 | font=serif | "." +TMPL | x= 134.9 | y= 254 | font=serif | "$" +DATA | x= 180.8 | y= 254.5 | font=sans-serif | "498,211" +TMPL | x= 221.3 | y= 254 | font=serif | "$" +TMPL | x= 316.5 | y= 256 | font=serif | "13" +TMPL | x= 334.2 | y= 256 | font=serif | "Other deductions" +TMPL | x= 57.6 | y= 238.4 | font=serif | "Qualified nonrecourse" +TMPL | x= 57.6 | y= 230 | font=serif | "financing" +TMPL | x= 96 | y= 230 | font=serif | "." +TMPL | x= 108 | y= 230 | font=serif | "." +TMPL | x= 120 | y= 230 | font=serif | "." +TMPL | x= 134.9 | y= 230 | font=serif | "$" +TMPL | x= 221.3 | y= 230 | font=serif | "$" +TMPL | x= 57.6 | y= 218.5 | font=serif | "Recourse" +TMPL | x= 96 | y= 218.5 | font=serif | "." +TMPL | x= 108 | y= 218.5 | font=serif | "." +TMPL | x= 120 | y= 218.5 | font=serif | "." +TMPL | x= 134.9 | y= 218.5 | font=serif | "$" +TMPL | x= 221.3 | y= 218.5 | font=serif | "$" +TMPL | x= 38.7 | y= 207 | font=serif | "K2" +TMPL | x= 57.6 | y= 207 | font=serif | "Check this box if item K1 includes liability amounts from lower-tier partnerships" +DATA | x= 294.9 | y= 205.8 | font=sans-serif | "X" +TMPL | x= 38.7 | y= 195 | font=serif | "K3" +TMPL | x= 57.6 | y= 195 | font=serif | "Check if any of the above liability is subject to guarantees or other" +TMPL | x= 57.6 | y= 186 | font=serif | "payment obligations by the partner. See instructions" +TMPL | x= 228 | y= 186 | font=serif | "." +TMPL | x= 240 | y= 186 | font=serif | "." +TMPL | x= 252 | y= 186 | font=serif | "." +TMPL | x= 264 | y= 186 | font=serif | "." +TMPL | x= 276 | y= 186 | font=serif | "." +TMPL | x= 316.2 | y= 183.1 | font=serif | "22" +TMPL | x= 345.6 | y= 183 | font=serif | "More than one activity for at-risk purposes*" +TMPL | x= 41.1 | y= 170 | font=serif | "L" +TMPL | x= 122.3 | y= 170.5 | font=serif | "PartnerΓÇÖs Capital Account Analysis" +TMPL | x= 316.2 | y= 171.1 | font=serif | "23" +TMPL | x= 345.6 | y= 171 | font=serif | "More than one activity for passive activity purposes*" +TMPL | x= 57.6 | y= 158 | font=serif | "Beginning capital account" +TMPL | x= 156 | y= 158 | font=serif | "." +TMPL | x= 168 | y= 158 | font=serif | "." +TMPL | x= 180 | y= 158 | font=serif | "." +TMPL | x= 189.5 | y= 157.6 | font=serif | "$" +DATA | x= 257.8 | y= 157.4 | font=sans-serif | "4,903,568" +TMPL | x= 316.6 | y= 158.6 | font=serif | "*See attached statement for additional information." +TMPL | x= 57.6 | y= 146 | font=serif | "Capital contributed during the year" +TMPL | x= 168 | y= 146 | font=serif | "." +TMPL | x= 180 | y= 146 | font=serif | "." +TMPL | x= 189.5 | y= 145.6 | font=serif | "$" +TMPL | x= 57.6 | y= 134 | font=serif | "Current year net income (loss)" +TMPL | x= 156 | y= 134 | font=serif | "." +TMPL | x= 168 | y= 134 | font=serif | "." +TMPL | x= 180 | y= 134 | font=serif | "." +TMPL | x= 189.5 | y= 133.6 | font=serif | "$" +DATA | x= 259.3 | y= 133.7 | font=sans-serif | "(409,811)" +TMPL | x= 57.6 | y= 122 | font=serif | "Other increase (decrease) (attach explanation)" +TMPL | x= 189.5 | y= 121.6 | font=serif | "$" +TMPL | x= 57.6 | y= 110 | font=serif | "Withdrawals and distributions" +TMPL | x= 156 | y= 110 | font=serif | "." +TMPL | x= 168 | y= 110 | font=serif | "." +TMPL | x= 180 | y= 110 | font=serif | "." +TMPL | x= 189.5 | y= 109.6 | font=serif | "$" +TMPL | x= 195.4 | y= 110.5 | font=serif | "(" +DATA | x= 257.8 | y= 109.4 | font=sans-serif | "4,493,757" +TMPL | x= 300.4 | y= 110.5 | font=serif | ")" +TMPL | x= 57.6 | y= 99 | font=serif | "Ending capital account" +TMPL | x= 144 | y= 99 | font=serif | "." +TMPL | x= 156 | y= 99 | font=serif | "." +TMPL | x= 168 | y= 99 | font=serif | "." +TMPL | x= 180 | y= 99 | font=serif | "." +TMPL | x= 189.5 | y= 97.6 | font=serif | "$" +TMPL | x= 40 | y= 86 | font=serif | "M" +TMPL | x= 58.4 | y= 86 | font=serif | "Did the partner contribute property with a built-in gain (loss)?" +TMPL | x= 72 | y= 74 | font=serif | "Yes" +DATA | x= 101.2 | y= 74.2 | font=sans-serif | "X" +TMPL | x= 115.2 | y= 74 | font=serif | "No" +TMPL | x= 136.8 | y= 74 | font=serif | "If ΓÇ£Yes,ΓÇ¥ attach statement. See instructions." +TMPL | x= 40.6 | y= 62 | font=serif | "N" +TMPL | x= 70.1 | y= 62 | font=serif | "PartnerΓÇÖs Share of Net Unrecognized Section 704(c) Gain or (Loss)" +TMPL | x= 323 | y= 61.3 | font=serif | "For IRS Use Only" +TMPL | x= 57.6 | y= 51 | font=serif | "Beginning" +TMPL | x= 96 | y= 51 | font=serif | "." +TMPL | x= 108 | y= 51 | font=serif | "." +TMPL | x= 120 | y= 51 | font=serif | "." +TMPL | x= 132 | y= 51 | font=serif | "." +TMPL | x= 144 | y= 51 | font=serif | "." +TMPL | x= 156 | y= 51 | font=serif | "." +TMPL | x= 168 | y= 51 | font=serif | "." +TMPL | x= 180 | y= 51 | font=serif | "." +TMPL | x= 189.1 | y= 51 | font=serif | "$" +DATA | x= 271.5 | y= 49.7 | font=sans-serif | "(5,373)" +TMPL | x= 57.6 | y= 39 | font=serif | "Ending" +TMPL | x= 84 | y= 39 | font=serif | "." +TMPL | x= 96 | y= 39 | font=serif | "." +TMPL | x= 108 | y= 39 | font=serif | "." +TMPL | x= 120 | y= 39 | font=serif | "." +TMPL | x= 132 | y= 39 | font=serif | "." +TMPL | x= 144 | y= 39 | font=serif | "." +TMPL | x= 156 | y= 39 | font=serif | "." +TMPL | x= 168 | y= 39 | font=serif | "." +TMPL | x= 180 | y= 39 | font=serif | "." +TMPL | x= 189.1 | y= 39 | font=serif | "$" +TMPL | x= 36 | y= 26 | font=serif | "For Paperwork Reduction Act Notice, see the Instructions for Form 1065." +TMPL | x= 283.9 | y= 26 | font=serif | "www.irs.gov/Form1065" +TMPL | x= 362.7 | y= 26 | font=serif | "Cat. No. 11394R" +TMPL | x= 419.6 | y= 26 | font=serif | "Schedule K-1 (Form 1065) 2025" +TMPL | x= 524.9 | y= 26 | font=serif | "Created 2/26/25" +DATA | x= 285.6 | y= 5.5 | font=sans-serif | "Page 2 of 31" +DATA | x= 92.1 | y= 2.8 | font=sans-serif | "(409,811)" + +Done. diff --git a/libs/ui/src/lib/k-document-form/k-document-form.component.ts b/libs/ui/src/lib/k-document-form/k-document-form.component.ts index 6b8a9816c..e784c6cf9 100644 --- a/libs/ui/src/lib/k-document-form/k-document-form.component.ts +++ b/libs/ui/src/lib/k-document-form/k-document-form.component.ts @@ -1,5 +1,3 @@ -import type { K1Data } from '@ghostfolio/common/interfaces'; - import { CommonModule } from '@angular/common'; import { ChangeDetectionStrategy, @@ -9,123 +7,179 @@ import { OnChanges, Output } from '@angular/core'; -import { - FormControl, - FormGroup, - ReactiveFormsModule, - Validators -} from '@angular/forms'; +import { FormsModule } from '@angular/forms'; import { MatButtonModule } from '@angular/material/button'; +import { MatCheckboxModule } from '@angular/material/checkbox'; import { MatFormFieldModule } from '@angular/material/form-field'; +import { MatIconModule } from '@angular/material/icon'; import { MatInputModule } from '@angular/material/input'; import { MatSelectModule } from '@angular/material/select'; +import { MatTooltipModule } from '@angular/material/tooltip'; + +// ── Field types ────────────────────────────────────────────────────────── +type FieldType = 'currency' | 'percent' | 'text' | 'checkbox'; -const K1_FIELD_CONFIG: { - key: keyof K1Data; +interface K1FieldDef { + boxNumber: string; label: string; - section: string; -}[] = [ - { - key: 'ordinaryIncome', - label: 'Ordinary Income (Box 1)', - section: 'Income' - }, - { - key: 'netRentalIncome', - label: 'Net Rental Income (Box 2)', - section: 'Income' - }, - { - key: 'otherRentalIncome', - label: 'Other Rental Income (Box 3)', - section: 'Income' - }, - { - key: 'guaranteedPayments', - label: 'Guaranteed Payments (Box 4)', - section: 'Income' - }, - { - key: 'interestIncome', - label: 'Interest Income (Box 5)', - section: 'Income' - }, - { key: 'dividends', label: 'Dividends (Box 6a)', section: 'Income' }, - { - key: 'qualifiedDividends', - label: 'Qualified Dividends (Box 6b)', - section: 'Income' - }, - { key: 'royalties', label: 'Royalties (Box 7)', section: 'Income' }, - { - key: 'capitalGainLossShortTerm', - label: 'Short-Term Capital Gain/Loss (Box 8)', - section: 'Capital' - }, + type: FieldType; +} + +interface K1Section { + title: string; + description?: string; + fields: K1FieldDef[]; + collapsed?: boolean; +} + +// ── Section definitions matching the real IRS Schedule K-1 ─────────────── +const K1_SECTIONS: K1Section[] = [ { - key: 'capitalGainLossLongTerm', - label: 'Long-Term Capital Gain/Loss (Box 9a)', - section: 'Capital' + title: 'Header / Metadata', + fields: [ + { boxNumber: 'K1_DOCUMENT_ID', label: 'K-1 Document ID', type: 'text' }, + { boxNumber: 'TAX_YEAR', label: 'Tax Year', type: 'text' }, + { boxNumber: 'FINAL_K1', label: 'Final K-1', type: 'checkbox' }, + { boxNumber: 'AMENDED_K1', label: 'Amended K-1', type: 'checkbox' } + ], + collapsed: true }, { - key: 'unrecaptured1250Gain', - label: 'Unrecaptured Section 1250 Gain (Box 9b)', - section: 'Capital' + title: 'Part I — Partnership Information', + fields: [ + { boxNumber: 'A', label: "A — Partnership's EIN", type: 'text' }, + { boxNumber: 'B', label: "B — Partnership's name / address", type: 'text' }, + { boxNumber: 'C', label: 'C — IRS center where return filed', type: 'text' }, + { boxNumber: 'D', label: 'D — Publicly traded partnership', type: 'checkbox' } + ], + collapsed: true }, { - key: 'section1231GainLoss', - label: 'Section 1231 Gain/Loss (Box 10)', - section: 'Capital' + title: 'Part II — Partner Information', + fields: [ + { boxNumber: 'E', label: "E — Partner's identifying number", type: 'text' }, + { boxNumber: 'F', label: "F — Partner's name / address", type: 'text' }, + { boxNumber: 'G_GENERAL', label: 'G — General partner / LLC member-manager', type: 'checkbox' }, + { boxNumber: 'G_LIMITED', label: 'G — Limited partner / other LLC member', type: 'checkbox' }, + { boxNumber: 'H1_DOMESTIC', label: 'H1 — Domestic partner', type: 'checkbox' }, + { boxNumber: 'H1_FOREIGN', label: 'H1 — Foreign partner', type: 'checkbox' }, + { boxNumber: 'H2', label: 'H2 — Disregarded entity', type: 'checkbox' }, + { boxNumber: 'H2_TIN', label: 'H2 — DE taxpayer ID', type: 'text' }, + { boxNumber: 'I1', label: 'I1 — Type of entity', type: 'text' }, + { boxNumber: 'I2', label: 'I2 — IRA / SEP / Keogh', type: 'checkbox' } + ], + collapsed: true }, - { key: 'otherIncome', label: 'Other Income (Box 11)', section: 'Capital' }, { - key: 'section179Deduction', - label: 'Section 179 Deduction (Box 12)', - section: 'Deductions' + title: "Section J — Partner's Share of Profit, Loss & Capital", + fields: [ + { boxNumber: 'J_PROFIT_BEGIN', label: 'Profit — Beginning', type: 'percent' }, + { boxNumber: 'J_PROFIT_END', label: 'Profit — Ending', type: 'percent' }, + { boxNumber: 'J_LOSS_BEGIN', label: 'Loss — Beginning', type: 'percent' }, + { boxNumber: 'J_LOSS_END', label: 'Loss — Ending', type: 'percent' }, + { boxNumber: 'J_CAPITAL_BEGIN', label: 'Capital — Beginning', type: 'percent' }, + { boxNumber: 'J_CAPITAL_END', label: 'Capital — Ending', type: 'percent' }, + { boxNumber: 'J_SALE', label: 'Decrease due to sale', type: 'checkbox' }, + { boxNumber: 'J_EXCHANGE', label: 'Exchange of partnership interest', type: 'checkbox' } + ] }, { - key: 'otherDeductions', - label: 'Other Deductions (Box 13)', - section: 'Deductions' + title: "Section K — Partner's Share of Liabilities", + fields: [ + { boxNumber: 'K_NONRECOURSE_BEGIN', label: 'Nonrecourse — Beginning', type: 'currency' }, + { boxNumber: 'K_NONRECOURSE_END', label: 'Nonrecourse — Ending', type: 'currency' }, + { boxNumber: 'K_QUAL_NONRECOURSE_BEGIN', label: 'Qualified nonrecourse — Beginning', type: 'currency' }, + { boxNumber: 'K_QUAL_NONRECOURSE_END', label: 'Qualified nonrecourse — Ending', type: 'currency' }, + { boxNumber: 'K_RECOURSE_BEGIN', label: 'Recourse — Beginning', type: 'currency' }, + { boxNumber: 'K_RECOURSE_END', label: 'Recourse — Ending', type: 'currency' }, + { boxNumber: 'K2', label: 'Includes lower-tier partnership liabilities', type: 'checkbox' }, + { boxNumber: 'K3', label: 'Liability subject to guarantees', type: 'checkbox' } + ] }, { - key: 'selfEmploymentEarnings', - label: 'Self-Employment Earnings (Box 14)', - section: 'Other' + title: "Section L — Partner's Capital Account", + fields: [ + { boxNumber: 'L_BEG_CAPITAL', label: 'Beginning capital account', type: 'currency' }, + { boxNumber: 'L_CONTRIBUTED', label: 'Capital contributed during year', type: 'currency' }, + { boxNumber: 'L_CURR_YR_INCOME', label: 'Current year net income (loss)', type: 'currency' }, + { boxNumber: 'L_OTHER', label: 'Other increase (decrease)', type: 'currency' }, + { boxNumber: 'L_WITHDRAWALS', label: 'Withdrawals & distributions', type: 'currency' }, + { boxNumber: 'L_END_CAPITAL', label: 'Ending capital account', type: 'currency' } + ] }, { - key: 'foreignTaxesPaid', - label: 'Foreign Taxes Paid (Box 16)', - section: 'Other' + title: 'Sections M & N', + fields: [ + { boxNumber: 'M_YES', label: 'M — Contributed property: Yes', type: 'checkbox' }, + { boxNumber: 'M_NO', label: 'M — Contributed property: No', type: 'checkbox' }, + { boxNumber: 'N_BEGINNING', label: 'N — Net 704(c) gain/loss: Beginning', type: 'currency' }, + { boxNumber: 'N_ENDING', label: 'N — Net 704(c) gain/loss: Ending', type: 'currency' } + ] }, { - key: 'alternativeMinimumTaxItems', - label: 'AMT Items (Box 17)', - section: 'Other' + title: 'Part III — Income & Gains (Boxes 1–11)', + fields: [ + { boxNumber: '1', label: '1 — Ordinary business income (loss)', type: 'currency' }, + { boxNumber: '2', label: '2 — Net rental real estate income (loss)', type: 'currency' }, + { boxNumber: '3', label: '3 — Other net rental income (loss)', type: 'currency' }, + { boxNumber: '4', label: '4 — Guaranteed payments for services', type: 'currency' }, + { boxNumber: '4a', label: '4a — Guaranteed payments for capital', type: 'currency' }, + { boxNumber: '4b', label: '4b — Total guaranteed payments', type: 'currency' }, + { boxNumber: '5', label: '5 — Interest income', type: 'currency' }, + { boxNumber: '6a', label: '6a — Ordinary dividends', type: 'currency' }, + { boxNumber: '6b', label: '6b — Qualified dividends', type: 'currency' }, + { boxNumber: '6c', label: '6c — Dividend equivalents', type: 'currency' }, + { boxNumber: '7', label: '7 — Royalties', type: 'currency' }, + { boxNumber: '8', label: '8 — Net short-term capital gain (loss)', type: 'currency' }, + { boxNumber: '9a', label: '9a — Net long-term capital gain (loss)', type: 'currency' }, + { boxNumber: '9b', label: '9b — Collectibles (28%) gain (loss)', type: 'currency' }, + { boxNumber: '9c', label: '9c — Unrecaptured §1250 gain', type: 'currency' }, + { boxNumber: '10', label: '10 — Net §1231 gain (loss)', type: 'currency' }, + { boxNumber: '11', label: '11 — Other income (loss)', type: 'currency' } + ] }, { - key: 'distributionsCash', - label: 'Cash Distributions (Box 19a)', - section: 'Distributions' + title: 'Part III — Deductions & Credits (Boxes 12–18)', + fields: [ + { boxNumber: '12', label: '12 — §179 deduction', type: 'currency' }, + { boxNumber: '13', label: '13 — Other deductions', type: 'currency' }, + { boxNumber: '14', label: '14 — Self-employment earnings (loss)', type: 'currency' }, + { boxNumber: '15', label: '15 — Credits', type: 'currency' }, + { boxNumber: '16', label: '16 — Foreign transactions', type: 'currency' }, + { boxNumber: '16_K3', label: '16 — Schedule K-3 attached', type: 'checkbox' }, + { boxNumber: '17', label: '17 — AMT items', type: 'currency' }, + { boxNumber: '18', label: '18 — Tax-exempt income / nondeductible expenses', type: 'currency' } + ] }, { - key: 'distributionsProperty', - label: 'Property Distributions (Box 19b)', - section: 'Distributions' + title: 'Part III — Distributions & Other (Boxes 19–23)', + fields: [ + { boxNumber: '19', label: '19 — Distributions', type: 'currency' }, + { boxNumber: '19a', label: '19a — Cash & marketable securities', type: 'currency' }, + { boxNumber: '19b', label: '19b — Other property', type: 'currency' }, + { boxNumber: '20A', label: '20A — Other information: Code A', type: 'currency' }, + { boxNumber: '20B', label: '20B — Other information: Code B', type: 'currency' }, + { boxNumber: '20V', label: '20V — Other information: Code V', type: 'currency' }, + { boxNumber: '20_WILDCARD', label: '20 — Other information: Other codes', type: 'currency' }, + { boxNumber: '21', label: '21 — Foreign taxes paid or accrued', type: 'currency' }, + { boxNumber: '22', label: '22 — At-risk: more than one activity', type: 'checkbox' }, + { boxNumber: '23', label: '23 — Passive: more than one activity', type: 'checkbox' } + ] } ]; -const SECTIONS = ['Income', 'Capital', 'Deductions', 'Other', 'Distributions']; - @Component({ changeDetection: ChangeDetectionStrategy.OnPush, imports: [ CommonModule, + FormsModule, MatButtonModule, + MatCheckboxModule, MatFormFieldModule, + MatIconModule, MatInputModule, MatSelectModule, - ReactiveFormsModule + MatTooltipModule ], selector: 'gf-k-document-form', standalone: true, @@ -135,135 +189,342 @@ const SECTIONS = ['Income', 'Capital', 'Deductions', 'Other', 'Distributions']; display: block; } - .section-title { - font-size: 14px; + .form-header { + display: flex; + align-items: center; + justify-content: space-between; + flex-wrap: wrap; + gap: 12px; + margin-bottom: 20px; + } + + /* Collapsible sections */ + .k1-section { + margin-bottom: 12px; + border: 1px solid rgba(0, 0, 0, 0.08); + border-radius: 8px; + overflow: hidden; + } + + .section-header { + display: flex; + align-items: center; + gap: 8px; + padding: 10px 16px; + background: rgba(0, 0, 0, 0.03); + cursor: pointer; + user-select: none; font-weight: 500; - color: rgba(var(--dark-primary-text), 0.7); - margin: 16px 0 8px; - padding-bottom: 4px; - border-bottom: 1px solid rgba(var(--dark-dividers), 0.12); + font-size: 14px; + transition: background 0.15s; + } + + .section-header:hover { + background: rgba(0, 0, 0, 0.06); + } + + .section-header mat-icon { + font-size: 18px; + width: 18px; + height: 18px; + transition: transform 0.2s; + } + + .section-header mat-icon.expanded { + transform: rotate(90deg); } + .section-header .section-desc { + font-weight: 400; + font-size: 12px; + color: rgba(0, 0, 0, 0.5); + margin-left: auto; + } + + .section-body { + padding: 12px 16px 4px; + } + + /* Two-column grid */ .fields-grid { display: grid; - grid-template-columns: repeat(auto-fill, minmax(280px, 1fr)); - gap: 0 16px; + grid-template-columns: 1fr 1fr; + gap: 2px 24px; + } + + @media (max-width: 700px) { + .fields-grid { + grid-template-columns: 1fr; + } + } + + /* Field rows */ + .field-row { + display: flex; + align-items: center; + gap: 8px; + padding: 4px 0; + min-height: 34px; + } + + .field-label { + flex: 1 1 auto; + font-size: 13px; + color: rgba(0, 0, 0, 0.72); + line-height: 1.3; + min-width: 0; } - .status-row { + .field-input { + flex: 0 0 140px; display: flex; - gap: 16px; - margin-bottom: 16px; + align-items: center; + } + + .field-input input { + width: 100%; + box-sizing: border-box; + padding: 5px 8px; + font-size: 13px; + font-family: 'Roboto Mono', monospace; + border: 1px solid rgba(0, 0, 0, 0.18); + border-radius: 4px; + background: transparent; + outline: none; + text-align: right; + transition: border-color 0.15s; + } + + .field-input input:focus { + border-color: #1976d2; + box-shadow: 0 0 0 1px #1976d2; + } + + .field-input input.text-input { + text-align: left; + font-family: inherit; + } + + .field-input .unit-suffix { + font-size: 12px; + color: rgba(0, 0, 0, 0.45); + margin-left: 3px; + flex-shrink: 0; } - .actions { + .field-input .unit-prefix { + font-size: 12px; + color: rgba(0, 0, 0, 0.45); + margin-right: 3px; + flex-shrink: 0; + } + + .field-input input.is-zero { + color: rgba(0, 0, 0, 0.3); + } + + /* Checkbox row */ + .field-row-checkbox { + cursor: pointer; + } + + .field-row-checkbox .cb-label { + font-size: 13px; + color: rgba(0, 0, 0, 0.72); + } + + /* Footer */ + .form-footer { display: flex; justify-content: flex-end; gap: 8px; - margin-top: 16px; + margin-top: 20px; + padding-top: 12px; + border-top: 1px solid rgba(0, 0, 0, 0.08); } ` ], template: ` -
-
- - Filing Status - - Draft - Estimated - Final - - -
+
+ + Filing Status + + Draft + Estimated + Final + + +
- @for (section of sections; track section) { -
{{ section }}
-
- @for (field of getFieldsForSection(section); track field.key) { - - {{ field.label }} - - + @for (section of sections; track section.title) { +
+
+ chevron_right + {{ section.title }} + @if (section.description) { + {{ section.description }} }
- } - -
- - + @if (!section.collapsed) { +
+
+ @for (field of section.fields; track field.boxNumber) { + @if (field.type === 'checkbox') { +
+ + {{ field.label }} + +
+ } @else if (field.type === 'text') { +
+ {{ field.label }} +
+ +
+
+ } @else if (field.type === 'percent') { +
+ {{ field.label }} +
+ + % +
+
+ } @else { +
+ {{ field.label }} +
+ $ + +
+
+ } + } +
+
+ }
- + } + + ` }) export class GfKDocumentFormComponent implements OnChanges { - @Input() public data: K1Data | null = null; + @Input() public data: Record | null = null; @Input() public filingStatus: string = 'DRAFT'; @Input() public isEditMode: boolean = false; @Output() public cancelled = new EventEmitter(); @Output() public submitted = new EventEmitter<{ filingStatus: string; - data: Record; + data: Record; }>(); - public form: FormGroup; - public sections = SECTIONS; + public filingStatusValue = 'DRAFT'; + public sections: K1Section[] = []; + + /** Internal data store keyed by boxNumber */ + private values: Record = {}; public constructor() { - const controls: Record = { - filingStatus: new FormControl('DRAFT', Validators.required) - }; + this.sections = K1_SECTIONS.map((s) => ({ + ...s, + fields: [...s.fields], + collapsed: s.collapsed ?? false + })); + } + + public ngOnChanges(): void { + this.filingStatusValue = this.filingStatus || 'DRAFT'; - for (const field of K1_FIELD_CONFIG) { - controls[field.key] = new FormControl(0); + if (this.data) { + this.values = { ...this.data }; + } else { + this.values = {}; } + } + + // ── Value accessors ──────────────────────────────────────────────────── - this.form = new FormGroup(controls); + public isChecked(boxNumber: string): boolean { + const v = this.values[boxNumber]; + return v === 'true' || v === 1 || v === '1'; } - public ngOnChanges(): void { - if (this.data) { - const patchData: Record = { - filingStatus: this.filingStatus - }; + public setCheckbox(boxNumber: string, checked: boolean): void { + this.values[boxNumber] = checked ? 'true' : 'false'; + } - for (const field of K1_FIELD_CONFIG) { - patchData[field.key] = this.data[field.key] ?? 0; - } + public getTextValue(boxNumber: string): string { + const v = this.values[boxNumber]; + return v != null ? String(v) : ''; + } - this.form.patchValue(patchData); + public setTextValue(boxNumber: string, event: Event): void { + const input = event.target as HTMLInputElement; + this.values[boxNumber] = input.value || null; + } + + public getNumericDisplay(boxNumber: string): string { + const v = this.values[boxNumber]; + if (v == null || v === '') { + return ''; } + const n = Number(v); + return isNaN(n) ? '' : String(n); } - public getFieldsForSection( - section: string - ): { key: keyof K1Data; label: string; section: string }[] { - return K1_FIELD_CONFIG.filter((f) => f.section === section); + public isZero(boxNumber: string): boolean { + const v = this.values[boxNumber]; + return v === 0 || v === '0'; } + public setNumericValue(boxNumber: string, event: Event): void { + const input = event.target as HTMLInputElement; + const raw = input.value; + if (raw === '' || raw == null) { + this.values[boxNumber] = null; + } else { + const n = parseFloat(raw); + this.values[boxNumber] = isNaN(n) ? null : n; + } + } + + // ── Submit ───────────────────────────────────────────────────────────── + public onSubmit(): void { - if (this.form.valid) { - const value = this.form.value; - const data: Record = {}; + const data: Record = {}; - for (const field of K1_FIELD_CONFIG) { - data[field.key] = Number(value[field.key]) || 0; + for (const section of this.sections) { + for (const field of section.fields) { + const v = this.values[field.boxNumber]; + if (v != null && v !== '') { + data[field.boxNumber] = v; + } } - - this.submitted.emit({ - data, - filingStatus: value.filingStatus - }); } + + this.submitted.emit({ + data, + filingStatus: this.filingStatusValue + }); } } diff --git a/prisma/schema.prisma b/prisma/schema.prisma index 26c33cf68..dca2bbd86 100644 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -676,7 +676,9 @@ model CellMapping { boxNumber String label String description String? + cellType String @default("number") isCustom Boolean @default(false) + isIgnored Boolean @default(false) sortOrder Int createdAt DateTime @default(now()) updatedAt DateTime @updatedAt diff --git a/specs/001-family-office-transform/research-normalized-k1-model.md b/specs/001-family-office-transform/research-normalized-k1-model.md new file mode 100644 index 000000000..f23b8044d --- /dev/null +++ b/specs/001-family-office-transform/research-normalized-k1-model.md @@ -0,0 +1,535 @@ +# Research: Normalized Relational Model for K-1 Financial Data + +**Phase 0 Output** | **Date**: 2026-03-20 | **Research Only — No Code** + +--- + +## Context + +The current system stores K-1 box data as a flat JSON blob on `KDocument.data`: + +```json +{"1": 50000, "9a": -1200, "11-ZZ*": 500, "20-A": 1200} +``` + +Aggregations are computed on-the-fly in `k1-aggregation.service.ts` by iterating JSON keys. `CellMapping` provides label metadata, and `CellAggregationRule` defines which box keys to SUM. The system currently has ~80+ possible K-1 fields (boxes 1–21 with subtypes, Sections J/K/L/M/N, metadata fields like A–I). + +The goal is to evaluate whether and how to transform this into a normalized relational model. + +--- + +## Topic 1: Wide vs Normalized Financial Data Models + +### Decision + +**Move to a normalized fact table** (`K1LineItem`) for Part III financial data (boxes 1–21), but **keep a JSON metadata column** for Part I/II identity fields (A–I, J–N) that are queried infrequently. + +### Rationale + +The current JSON blob approach has these specific weaknesses for analytics: + +**Query limitations observed in this codebase:** +1. **No SQL-level filtering or aggregation** — The `computeForKDocument()` method in `k1-aggregation.service.ts` must fetch the entire `KDocument` row, deserialize JSON, and loop through `Object.entries(data)` in application code. This means you cannot write `SELECT SUM(amount) FROM ... WHERE box_number = '1' AND tax_year BETWEEN 2020 AND 2025` — every aggregation requires fetching and deserializing all rows. +2. **No indexes on values** — Cannot index `data->'1'` effectively in PostgreSQL JSONB for range queries. While GIN indexes support containment (`@>`), they don't help with `>`, `<`, or `BETWEEN` on numeric values within the JSON. +3. **No referential integrity** — A typo like `"9A"` vs `"9a"` silently creates bad data. The current `CellMapping` table defines valid box numbers, but nothing enforces that `KDocument.data` keys match them. +4. **Cross-document aggregation is O(n) deserialization** — To compute "total ordinary income (Box 1) across all partnerships for 2025," every KDocument row matching the year must be fetched and parsed. With 50+ partnerships × 5 years, this is 250+ JSON deserializations for one number. +5. **No partial update tracking** — When a KDocument transitions from ESTIMATED → FINAL, the entire JSON blob is replaced. `previousData` preserves the old blob but provides no field-level diff. +6. **Schema evolution is invisible** — If the IRS adds a Box 6d in 2027, there's no migration — it just appears as a new JSON key. This sounds convenient but means no validation, no type checking, and no discoverability for future NL-to-SQL. + +**When the wide/JSON model is acceptable:** +- Archival storage of the complete raw extraction (already served by `K1ImportSession.rawExtraction`) +- Rarely-queried metadata fields (Part I/II: partnership name, EIN, addresses) +- Configurations and user preferences (already used for `Settings.settings`) +- Fewer than ~10 documents with no cross-document queries needed + +**When it breaks down (the current situation):** +- Cross-entity/cross-year aggregation (core family office use case) +- Performance analytics over time (partnership returns by year) +- Tax planning queries ("show me all partnerships with Section 1231 losses > $10K") +- Audit trail at field granularity +- LLM-generated SQL queries (LLMs cannot reliably generate JSONB path expressions) + +### Alternatives Considered + +| Alternative | Pros | Cons | +|---|---|---| +| **Keep JSON blob** (status quo) | No migration, flexible schema | All query limitations above; blocks analytics roadmap | +| **JSONB with generated columns** | No schema change for K-1 fields; PostgreSQL 12+ supports `GENERATED ALWAYS AS (data->>'1')::numeric` | Max ~30 generated columns practical; doesn't scale to 80+ fields; still no FK integrity | +| **Wide table with 80+ columns** | Simple queries, strong typing | Extremely sparse (most K-1s populate ~20 of 80+ boxes); ALTER TABLE for every IRS form change; NULL-heavy | +| **Normalized fact table** (chosen) | SQL aggregation, indexes, FK integrity, LLM-friendly, field-level audit trail | More JOINs; migration effort; slightly more complex insert logic | + +--- + +## Topic 2: EAV vs Normalized Tables for Tax Document Fields + +### Decision + +**Use a hybrid approach**: a single EAV-style fact table (`K1LineItem`) for all Part III financial line items, combined with a reference/dimension table (`K1BoxDefinition`) that provides metadata, typing, and validation rules. Keep Part I/II identity metadata as structured JSON on the KDocument. + +This is technically EAV but with strong constraints — it's closer to a **typed fact table** pattern than classic unconstrained EAV. + +### Rationale + +**Why EAV is appropriate here (and usually isn't):** + +Classic EAV fails because it loses type safety, makes queries verbose, and resists validation. K-1 data avoids these pitfalls because: + +1. **Uniform value type** — All Part III financial values (boxes 1–21) are `Decimal` amounts. Unlike generic EAV where attributes might be strings, dates, booleans, or blobs, K-1 line items are uniformly monetary amounts with a known currency. This eliminates the "value_string / value_number / value_date" anti-pattern. + +2. **Closed attribute set** — The IRS defines ~50 Part III line items. This is not open-ended. The `K1BoxDefinition` reference table enumerates all valid attributes, so there's no unbounded attribute sprawl. + +3. **Natural query pattern** — The primary queries are aggregations across one attribute dimension: `SUM(amount) WHERE box_key = '1'`. This is exactly what EAV is good at — pivot-style aggregation across a known set of attributes. + +4. **Sparse data** — A typical K-1 populates 15–25 of ~50 possible line items. A wide table would be 50–70% NULL. The EAV/fact table stores only populated fields, which is both space-efficient and semantically clearer. + +**Proposed structure (conceptual):** + +``` +K1BoxDefinition (reference/dimension table) +├── boxKey VARCHAR PK -- "1", "9a", "11-ZZ*", "20-A" +├── label VARCHAR -- "Ordinary business income (loss)" +├── section VARCHAR -- "PART_III", "PART_I", "SECTION_J" +├── dataType VARCHAR -- "CURRENCY", "PERCENTAGE", "BOOLEAN", "TEXT" +├── sortOrder INT +├── irsFormLine VARCHAR -- "Box 1", "Box 9a", "Section J, Line 1" +└── description TEXT + +K1LineItem (fact table — one row per box per KDocument) +├── id UUID PK +├── kDocumentId UUID FK → KDocument.id +├── boxKey VARCHAR FK → K1BoxDefinition.boxKey +├── amount DECIMAL(15,2) -- financial value (null for non-monetary) +├── textValue VARCHAR -- for text/boolean fields if needed +├── sourceConfidence DECIMAL(3,2) -- 0.00–1.00, from extraction +├── sourcePageNumber INT -- PDF page where extracted +├── sourceCoordinates JSON -- {x, y, width, height} on the page +├── isUserEdited BOOLEAN -- true if user modified during verification +├── createdAt TIMESTAMP +├── updatedAt TIMESTAMP +└── @@unique([kDocumentId, boxKey]) +``` + +**Why not separate normalized tables for each box category:** + +An alternative is dedicated tables: `K1IncomeItems`, `K1DeductionItems`, `K1CreditItems`, `K1CapitalAccount`, etc. This was rejected because: +- K-1 boxes don't cleanly partition into fixed categories (Box 11 "Other income" spans multiple categories via sub-codes) +- Sub-code boxes (11-A through 11-ZZ*, 13-A through 13-ZZ*, 20-A through 20-ZZ*) have partnership-specific meaning — the same structural pattern repeats across boxes +- It would require 6–8 tables with identical column shapes, making queries harder, not easier +- The `K1BoxDefinition` reference table provides the categorical metadata without needing separate physical tables + +**Treatment of Part I/II metadata fields:** + +Fields like Partnership EIN (Box A), Partner name (Box F), Section J percentages, and Section L capital account data are better stored as structured JSON on `KDocument` in a `metadata` column because: +- They're queried for display, not for aggregation +- They have heterogeneous types (strings, booleans, percentages, addresses) +- They identify the document rather than representing financial facts +- There are ~30 of them, and they're almost all populated (not sparse) + +### Alternatives Considered + +| Alternative | Pros | Cons | +|---|---|---| +| **Pure EAV (no reference table)** | Maximum flexibility | No validation of box keys; `CellMapping` already serves this role but without FK enforcement | +| **Wide table (one column per box)** | Simple SELECTs for specific boxes | 80+ columns; 50–70% NULLs; ALTER TABLE for new boxes; poor for cross-box aggregation | +| **Separate tables per box category** | Strong typing per category | 6–8 near-identical tables; complex UNION queries; sub-code boxes don't fit cleanly | +| **Hybrid EAV + reference table** (chosen) | Uniform fact table; strong FK validation; sparse-friendly; single query pattern for aggregation; field-level provenance | Pivot queries needed for "show one K-1 as a form"; slightly more complex writes | + +--- + +## Topic 3: Financial Fact Tables for Tax Data + +### Decision + +**Model K-1 line items as a financial fact table** in a star-schema-inspired design, with KDocument as the central bridge to dimension tables (Partnership, Entity, TaxYear). Monetary values stored as `DECIMAL(15,2)` with explicit currency. + +### Rationale + +Financial data warehouses consistently use a fact/dimension pattern for tax line items: + +**Star schema mapping for K-1 data:** + +``` + ┌──────────────┐ + │ Partnership │ (dimension) + │ ────────── │ + │ id, name, │ + │ type, ein │ + └──────┬───────┘ + │ +┌──────────────┐ ┌──────┴───────┐ ┌──────────────────┐ +│ Entity │────│ KDocument │────│ K1BoxDefinition │ (dimension) +│ (dimension) │ │ (bridge) │ │ ────────────────│ +│ ────────── │ │ ────────── │ │ boxKey, label, │ +│ id, name, │ │ id, taxYear,│ │ section, type │ +│ type, taxId │ │ status │ └──────────────────┘ +└──────────────┘ └──────┬───────┘ + │ + ┌──────┴───────┐ + │ K1LineItem │ (FACT) + │ ────────── │ + │ amount, │ + │ boxKey, │ + │ confidence │ + └──────────────┘ +``` + +**Best practices from financial data warehousing applied here:** + +1. **Additive facts only** — `K1LineItem.amount` is fully additive: you can SUM across tax years, partnerships, entities, or box types. Non-additive data (percentages, booleans, text) is stored separately in `textValue` or on the KDocument metadata. + +2. **Grain = one box value per K-1 document** — Each row in `K1LineItem` represents one financial amount from one K-1 for one tax year. This is the atomic grain. Aggregation rules from `CellAggregationRule` operate on this grain. + +3. **Slowly changing dimensions** — `PartnershipMembership` already handles SCD Type 2 (effective dates) for ownership percentages. `K1BoxDefinition` is SCD Type 1 (overwritten on IRS form changes, with version tracking if needed). + +4. **Conformed dimensions** — `Partnership` and `Entity` serve as conformed dimensions shared between K-1 facts, Distribution facts, and Valuation facts. A single `Entity` dimension joins to multiple fact tables. + +5. **Currency handling** — Store amounts in the source currency with a `currency` column. The KDocument inherits currency from Partnership. Conversion to reporting currency happens at query time or in materialized views, never by mutating the fact. + +6. **Decimal precision** — `DECIMAL(15,2)` covers amounts up to $9,999,999,999,999.99. K-1 amounts from large partnerships (PE funds, hedge funds) can reach tens of millions. 15 digits provides headroom. Use 2 decimal places to match IRS reporting precision. + +**Aggregation queries enabled by this model:** + +```sql +-- Total ordinary income across all partnerships for 2025 +SELECT SUM(li.amount) +FROM k1_line_item li +JOIN k_document kd ON li.k_document_id = kd.id +WHERE li.box_key = '1' AND kd.tax_year = 2025; + +-- Income breakdown by entity for tax year 2025 +SELECT e.name, li.box_key, SUM(li.amount) +FROM k1_line_item li +JOIN k_document kd ON li.k_document_id = kd.id +JOIN partnership p ON kd.partnership_id = p.id +JOIN partnership_membership pm ON pm.partnership_id = p.id +JOIN entity e ON pm.entity_id = e.id +WHERE kd.tax_year = 2025 +GROUP BY e.name, li.box_key; + +-- Partnership performance: Box 1 over time +SELECT kd.tax_year, p.name, li.amount +FROM k1_line_item li +JOIN k_document kd ON li.k_document_id = kd.id +JOIN partnership p ON kd.partnership_id = p.id +WHERE li.box_key = '1' +ORDER BY kd.tax_year; +``` + +These queries are impossible or impractical with the current JSON blob model. + +### Alternatives Considered + +| Alternative | Pros | Cons | +|---|---|---| +| **Snowflake schema (more normalization)** | Normalized box categories into sub-dimensions | Over-normalized for ~50 box types; extra JOINs for no benefit | +| **Flat denormalized reporting table** | Fastest reads; no JOINs | Write complexity; data duplication; hard to keep consistent | +| **OLAP cube / column store** | Best aggregation performance | Overkill for <10K rows; adds infrastructure complexity | +| **Star-schema-inspired fact table** (chosen) | Natural fit for K-1 aggregation queries; leverages existing dimensions; PostgreSQL handles this scale trivially | Requires JOINs for full context (acceptable) | + +--- + +## Topic 4: Source Traceability in Financial Systems + +### Decision + +**Store extraction provenance at the line-item grain** — each `K1LineItem` records the source page number, bounding-box coordinates, raw extracted text, confidence score, and whether it was user-edited. The `K1ImportSession` retains the complete raw extraction as an immutable JSON snapshot. + +### Rationale + +The audit trail must support this flow: + +``` +Displayed aggregated number + → K1LineItem (individual box value) + → KDocument (which K-1, which year, which partnership) + → K1ImportSession (extraction record) + → Document (source PDF file) + → Specific page + coordinates on that page + → Raw extracted text before parsing +``` + +**Granularity levels and what to store where:** + +| Level | Table | Fields | Purpose | +|---|---|---|---| +| **Aggregation** | Computed at query time | SUM/formula from `CellAggregationRule` | "Where does this total come from?" → list of K1LineItems | +| **Line item** | `K1LineItem` | `amount`, `boxKey`, `sourceConfidence`, `sourcePageNumber`, `sourceCoordinates`, `rawExtractedText`, `isUserEdited` | "What exactly was extracted and from where?" | +| **Document** | `K1ImportSession` | `rawExtraction` (full JSON), `extractionMethod`, `fileName` | "What did the system originally see?" (immutable after extraction) | +| **File** | `Document` | `filePath`, `fileSize`, `mimeType` | "Where is the original PDF?" | + +**Key design principles:** + +1. **Immutability of raw extraction** — `K1ImportSession.rawExtraction` is written once at extraction time and never modified. `verifiedData` captures user edits. This provides a complete before/after audit trail. + +2. **Coordinate-level provenance** — Current `k1-positions-dump.txt` shows the parser already extracts `x, y` coordinates for each text element. Storing `sourceCoordinates: {x, y, width, height}` on each `K1LineItem` enables a future "click to highlight in PDF" feature. + +3. **Confidence as first-class data** — The system already computes confidence scores (0.0–1.0) during extraction. Persisting this on the line item (not just in the import session JSON) enables queries like "show me all low-confidence values across all partnerships" and supports audit prioritization. + +4. **User edit tracking** — `isUserEdited: boolean` distinguishes machine-extracted values from human-verified overrides. This is critical for audit and for training future extraction models. + +5. **No deletion of source data** — When a KDocument transitions from ESTIMATED → FINAL, the old line items should be soft-versioned (via `KDocument.previousData` or a separate version table), not deleted. + +**What NOT to store at line-item level:** +- Full PDF binary (stay on Document/filesystem) +- Complete OCR output for the entire page (stay on K1ImportSession.rawExtraction) +- Rendering coordinates for non-K-1 text on the page (not relevant) + +### Alternatives Considered + +| Alternative | Pros | Cons | +|---|---|---| +| **Provenance only at document level** | Simpler; fewer columns | Cannot trace an individual number back to a specific location on a page | +| **Separate provenance table** (K1LineItemProvenance) | Clean separation of concerns | Extra JOIN for every audit query; 1:1 relationship is usually better as columns | +| **Store full page image crops per line item** | Visual proof | Massive storage; PDF coordinates + original file are sufficient for re-rendering | +| **Provenance on line item** (chosen) | Direct traceability; no extra JOINs; enables "highlight in PDF"; supports audit queries | Slightly wider rows (acceptable for <10K rows) | + +--- + +## Topic 5: PostgreSQL Materialized Views for Financial Reporting + +### Decision + +**Use materialized views for cross-partnership/cross-year aggregation dashboards**, refreshed on a schedule or triggered by KDocument changes. Use regular views for single-document or single-partnership queries. Do **not** use denormalized reporting tables. + +### Rationale + +**When to use each approach in this system:** + +| Scenario | Approach | Reason | +|---|---|---| +| "Show Box 1–21 for one K-1" | Regular query on `K1LineItem` | Small result set; no aggregation; fast enough | +| "Total income by box for one partnership across years" | Regular SQL `GROUP BY` | <20 rows × <10 years = <200 rows; trivial for PostgreSQL | +| "Dashboard: all partnerships × all entities × 5 years" | **Materialized view** | Cross-joins across dimensions; 50 partnerships × 5 entities × 5 years × 20 boxes = 25,000 aggregated values; worth pre-computing | +| "Tax planning: find partnerships with specific loss patterns" | Materialized view or indexed view | Complex filtering across many K-1s | +| "YoY change in Box 1 by partnership" | Materialized view | Window functions over multiple years | + +**Proposed materialized views:** + +```sql +-- MV 1: K-1 Summary by Partnership/Year +CREATE MATERIALIZED VIEW mv_k1_partnership_year_summary AS +SELECT + kd.partnership_id, + kd.tax_year, + li.box_key, + bd.label, + bd.section, + SUM(li.amount) AS total_amount, + COUNT(*) AS line_count, + kd.filing_status +FROM k1_line_item li +JOIN k_document kd ON li.k_document_id = kd.id +JOIN k1_box_definition bd ON li.box_key = bd.box_key +GROUP BY kd.partnership_id, kd.tax_year, li.box_key, bd.label, bd.section, kd.filing_status; + +-- MV 2: Entity-level Income Aggregation +CREATE MATERIALIZED VIEW mv_entity_income_summary AS +SELECT + e.id AS entity_id, + e.name AS entity_name, + kd.tax_year, + li.box_key, + SUM(li.amount * pm.ownership_percent / 100) AS allocated_amount +FROM k1_line_item li +JOIN k_document kd ON li.k_document_id = kd.id +JOIN partnership_membership pm ON pm.partnership_id = kd.partnership_id +JOIN entity e ON pm.entity_id = e.id +WHERE pm.effective_date <= make_date(kd.tax_year, 12, 31) + AND (pm.end_date IS NULL OR pm.end_date > make_date(kd.tax_year, 12, 31)) +GROUP BY e.id, e.name, kd.tax_year, li.box_key; +``` + +**Refresh strategy:** + +- **Trigger-based refresh**: After any KDocument insert/update/delete or status change to FINAL, refresh affected materialized views. In NestJS, this is a `@OnEvent('k-document.changed')` handler that calls `REFRESH MATERIALIZED VIEW CONCURRENTLY`. +- **`CONCURRENTLY` keyword**: Allows reads during refresh (requires a unique index on the MV). Essential for a multi-user system. +- **Frequency**: For a family office with <100 K-1s updated per year, refresh takes <1 second. No scheduling needed — event-driven refresh is sufficient. + +**Why not denormalized reporting tables:** + +Denormalized tables (duplicating data into a flat reporting structure) require write-time consistency management — every KDocument change must update the reporting table transactionally. This is the pattern used in high-write OLTP systems, but K-1 data is low-write (<100 writes/year) and high-read (dashboards queried many times). Materialized views handle this perfectly with zero application-level sync logic. + +**Why not computed/generated columns:** + +PostgreSQL generated columns cannot reference other tables. Since aggregations span KDocument → K1LineItem → Partnership → Entity, generated columns are structurally insufficient. + +### Alternatives Considered + +| Alternative | Pros | Cons | +|---|---|---| +| **Application-level caching** (Redis/in-memory) | No DB schema changes | Cache invalidation complexity; doesn't help SQL-based analytics | +| **Denormalized reporting tables** | Fastest reads; works at any scale | Write-time maintenance burden; consistency bugs; overkill for <10K rows | +| **Regular views** (not materialized) | Always fresh; no refresh needed | Recomputed on every query; slow for cross-entity dashboards | +| **Materialized views** (chosen) | Pre-computed; concurrent reads; event-driven refresh; zero application-level sync | Slight staleness (mitigated by event-driven refresh); requires unique indexes for CONCURRENTLY | + +--- + +## Topic 6: Migration Strategy from JSON Blob to Normalized Tables + +### Decision + +**Phase the migration in 3 steps**: (1) Create new tables alongside existing JSON, (2) Dual-write to both during a transition period, (3) Make normalized tables authoritative. **Keep the JSON blob immutable as an archive** — never delete it. + +### Rationale + +**Step 1: Additive schema changes (zero breaking changes)** + +``` +Migration 1: Create K1BoxDefinition table, seed with IRS default box definitions +Migration 2: Create K1LineItem table with FK to KDocument and K1BoxDefinition +Migration 3: Backfill K1LineItem from existing KDocument.data JSON blobs +``` + +The backfill migration for Step 3: + +```sql +-- Pseudocode: For each KDocument, iterate JSON keys and insert K1LineItems +INSERT INTO k1_line_item (id, k_document_id, box_key, amount, created_at, updated_at) +SELECT + gen_random_uuid(), + kd.id, + je.key, + (je.value)::decimal, + kd.created_at, + NOW() +FROM k_document kd, + jsonb_each(kd.data::jsonb) AS je(key, value) +WHERE jsonb_typeof(je.value) = 'number'; +``` + +**Step 2: Dual-write transition period** + +During the transition: +- `k1-import.service.ts` `confirmImport()` writes to **both** `KDocument.data` (JSON) and `K1LineItem` (rows) +- Read operations gradually migrate from JSON-based to K1LineItem-based +- `k1-aggregation.service.ts` switches from JSON iteration to `SELECT SUM` on K1LineItem +- Run validation queries comparing JSON-derived totals to K1LineItem-derived totals + +**Step 3: K1LineItem becomes authoritative** + +- New features (dashboards, tax planning, LLM queries) read only from K1LineItem +- `KDocument.data` is retained as immutable archive but no longer written to for new documents +- `CellAggregationRule.sourceCells` continues to work — the boxKey values are the same strings +- `CellMapping` evolves into or is replaced by `K1BoxDefinition` + +**Should the old JSON be kept immutable?** + +**Yes, permanently.** Reasons: +1. **Audit requirement** — The JSON blob is the original imported representation. Regulatory and audit standards require preserving source data in its original form. +2. **Rollback safety** — If the migration has bugs, the JSON blob is the recovery source. +3. **Storage is trivial** — A JSON blob with ~30 key-value pairs is <1 KB. Even 1,000 KDocuments = <1 MB total. There's no storage pressure to delete it. +4. **Import session already preserves extraction** — `K1ImportSession.rawExtraction` holds the pre-verification extraction. `KDocument.data` holds the post-verification snapshot. Both should survive indefinitely. + +**Backward compatibility considerations:** + +- The `KDocument.data` column type stays `Json` (not nullable, not removed) +- The existing `k-document-form.component.ts` UI reads from `KDocument.data` — it continues to work during transition +- The `computeForKDocument()` aggregation service works against JSON through the transition, then switches to K1LineItem queries +- No existing API contracts change — `GET /k-documents/:id` returns the same shape + +**Handling the CellMapping → K1BoxDefinition transition:** + +The existing `CellMapping` table (per-partnership box definitions) maps closely to the proposed `K1BoxDefinition`. The migration strategy: +- `K1BoxDefinition` absorbs the global (partnershipId = null) CellMapping records +- Per-partnership CellMapping overrides become per-partnership `K1BoxDefinition` rows (or remain as display-layer configuration separate from the data model) +- `CellMapping` fields like `isIgnored`, `isCustom` are presentation concerns that may not belong on the data-layer `K1BoxDefinition` + +### Alternatives Considered + +| Alternative | Pros | Cons | +|---|---|---| +| **Big-bang migration** (drop JSON, create tables, migrate in one step) | Clean; no dual-write complexity | Risk of data loss; requires full feature freeze; hard to validate | +| **Dual-write indefinitely** | Maximum safety | Permanent write overhead; divergence risk between JSON and rows | +| **Keep JSON as authoritative, add views** | No migration of writes | Doesn't solve the core query limitation; views over JSONB are slow | +| **Phased migration with immutable archive** (chosen) | Zero-downtime; incremental validation; rollback possible; preserves audit trail | Dual-write period adds complexity (bounded to weeks, not permanent) | + +--- + +## Topic 7: Schema Design for Future LLM NL-to-SQL + +### Decision + +**Design tables with self-documenting names, add PostgreSQL `COMMENT ON` annotations for every table and column, use consistent naming conventions, and avoid ambiguity between similarly-named entities.** + +### Rationale + +LLMs generating SQL (via text-to-SQL or NL-to-SQL) work by receiving the schema as context and mapping natural language to table/column references. The schema itself is the prompt. Research from the Spider benchmark (Yale), BIRD benchmark, and production NL-to-SQL systems (e.g., Vanna.ai, DataHerald) identifies these factors as most impactful: + +**1. Naming conventions that LLMs parse correctly:** + +| Current Name | Problem | Proposed Name | Why Better | +|---|---|---|---| +| `KDocument` | "K" is ambiguous to LLMs | `k1_document` | Explicitly says "K-1" | +| `KDocument.data` | "data" is the most generic possible name | `k1_document.raw_data_json` | Describes what it holds | +| `K1LineItem.amount` | Could be confused with Distribution.amount | `k1_line_item.reported_amount` | Disambiguates | +| `CellMapping` | "Cell" is a spreadsheet term, not a tax term | `k1_box_definition` | Domain-specific | +| `CellAggregationRule` | LLMs may not connect "cell" to K-1 boxes | `k1_aggregation_rule` | Clearer context | + +**Naming conventions to adopt:** +- `snake_case` for all table and column names (PostgreSQL convention; LLMs trained on more snake_case SQL than camelCase) +- Prefix K-1-specific tables with `k1_` to create a namespace +- Use `_id` suffix for all foreign keys +- Avoid abbreviations (`partnership_id` not `ptnr_id`) +- Use `_at` suffix for timestamps (`created_at`, `updated_at`) +- Use descriptive names over short names (`tax_year` not `yr`, `filing_status` not `status`) + +**2. PostgreSQL COMMENT annotations:** + +```sql +COMMENT ON TABLE k1_line_item IS 'Individual financial line item from an IRS Schedule K-1 (Form 1065). One row per box number per K-1 document.'; +COMMENT ON COLUMN k1_line_item.box_key IS 'IRS K-1 box identifier such as "1" for ordinary income, "9a" for long-term capital gains, or "20-A" for other information code A.'; +COMMENT ON COLUMN k1_line_item.reported_amount IS 'Dollar amount reported on this K-1 line item, in the partnership base currency. Negative values represent losses.'; +COMMENT ON TABLE k1_box_definition IS 'Reference table of IRS Schedule K-1 box definitions. Maps box identifiers to human-readable labels and categories.'; +``` + +LLM NL-to-SQL systems extract these comments as schema context. A model asked "what is total ordinary income?" can map "ordinary income" → `k1_box_definition.label = 'Ordinary business income (loss)'` → `box_key = '1'` → join to `k1_line_item`. + +**3. Avoiding ambiguity:** + +Current pain points for LLM-generated SQL: +- `Distribution.amount` vs `K1LineItem.amount` — an LLM asked "total distributions" might query the wrong table. Solution: `k1_line_item.reported_amount` vs `distribution.distribution_amount`. +- `Partnership` has `distributions`, `kDocuments`, `valuations` — naming all FK columns `partnership_id` is correct and expected by LLMs. +- `Entity` is overloaded (database entities, legal entities). The table comment must clarify: "A legal person or structure (trust, LLC, individual) that owns assets and receives K-1 allocations." + +**4. Schema metadata table for LLM context:** + +Consider a lightweight `schema_metadata` table or a markdown document that provides the LLM with: +- Table relationships in natural language +- Common query patterns with examples +- Business rules ("Box 19a distributions are allocated to entities by ownership percentage") +- Valid values for enum columns + +This is cheaper than fine-tuning and more maintainable than few-shot prompts. + +**5. Avoid patterns that confuse LLMs:** + +| Anti-pattern | Why It Confuses LLMs | Alternative | +|---|---|---| +| JSON columns for queryable data | LLMs generate `->` / `->>` operators inconsistently | Normalized columns | +| Composite primary keys | LLMs often forget one part of the key in JOINs | Surrogate UUID PK + unique constraint | +| Polymorphic FKs (one FK, multiple target tables) | LLMs can't determine which table to JOIN | Separate FK columns | +| Generic column names (`type`, `status`, `data`, `value`) | Ambiguous across tables | Prefix with table context (`filing_status`, `box_data_type`) | +| Soft deletes (`is_deleted`) | LLMs forget the `WHERE is_deleted = false` filter | Use `end_date IS NULL` pattern (already in use for memberships) | + +### Alternatives Considered + +| Alternative | Pros | Cons | +|---|---|---| +| **No schema changes for LLM** | No work | LLM accuracy drops significantly with ambiguous/generic names; JSONB columns are nearly unusable for NL-to-SQL | +| **Fine-tune LLM on this schema** | Can handle any naming convention | Expensive; needs retraining on every schema change; vendor lock-in | +| **RAG over schema docs** | Flexible; schema-aware | Still limited by underlying schema quality; garbage-in-garbage-out | +| **Self-documenting schema + COMMENT annotations** (chosen) | Works with any LLM; zero runtime cost; maintainable; improves human readability too | Requires discipline to maintain comments on schema changes | + +--- + +## Summary of Decisions + +| # | Topic | Decision | +|---|---|---| +| 1 | Wide vs Normalized | Normalized fact table for Part III financial data; JSON retained for Part I/II metadata | +| 2 | EAV vs Normalized | Hybrid: typed EAV fact table (`K1LineItem`) with reference dimension (`K1BoxDefinition`); uniform `DECIMAL` value type avoids classic EAV pitfalls | +| 3 | Financial fact tables | Star-schema-inspired design with `K1LineItem` as fact, `KDocument`/`Partnership`/`Entity` as dimensions | +| 4 | Source traceability | Per-line-item provenance (page, coordinates, confidence, raw text, user-edit flag); K1ImportSession.rawExtraction as immutable full extraction archive | +| 5 | Materialized views | Event-driven materialized views for cross-entity dashboards; regular queries for single-document access | +| 6 | Migration strategy | 3-phase: additive tables → dual-write → K1LineItem authoritative; JSON blob kept immutable forever | +| 7 | LLM NL-to-SQL | Self-documenting `snake_case` names, `COMMENT ON` annotations, disambiguation of similar columns, `k1_` table prefix namespace | diff --git a/specs/005-k1-parser-fix/plan.md b/specs/005-k1-parser-fix/plan.md index 37f413e15..c2ce3bfe1 100644 --- a/specs/005-k1-parser-fix/plan.md +++ b/specs/005-k1-parser-fix/plan.md @@ -1,40 +1,41 @@ # Implementation Plan: Fix K-1 PDF Parser — Position-Based Extraction -**Branch**: `005-k1-parser-fix` | **Date**: 2026-03-18 | **Spec**: [spec.md](spec.md) +**Branch**: `005-k1-parser-fix` | **Date**: 2026-03-20 | **Spec**: [spec.md](spec.md) **Input**: Feature specification from `/specs/005-k1-parser-fix/spec.md` **Note**: This template is filled in by the `/speckit.plan` command. See `.specify/templates/plan-template.md` for the execution workflow. ## Summary -Rewrite the K-1 PDF extractor from a broken regex-based label matcher to a position-based extraction engine using pdfjs-dist. The core approach: use `page.getTextContent()` to get all text items with (x, y) coordinates and font info, discriminate data values from template text by font, then map each data value to a K-1 form field based on position regions (bounding boxes). Supports Part III boxes 1-21 with subtype codes, Part I/II metadata, sections J/K/L/M/N, and checkboxes. Unmapped values go to a fallback list for manual user assignment. +Rewrite the K-1 PDF parser from regex-based label matching to position-based text extraction using `pdfjs-dist`. The current regex parser incorrectly matches cell numbers instead of actual data values. The new parser will use font discrimination (data fonts vs template fonts) and (x,y) coordinate mapping to bounding-box regions for each K-1 form field. This fixes extraction for all Part I/II metadata, Part III boxes 1-21 (including subtypes, multi-value fields, and SEE STMT references), checkboxes, and Sections J/K/L/M/N. The existing `PdfParseExtractor` already implements position-based extraction — this spec refines its accuracy and adds confidence scoring, unmapped item handling, and dynamic font identification. ## Technical Context -**Language/Version**: TypeScript 5.x (Node.js runtime) -**Primary Dependencies**: NestJS 11.x, pdfjs-dist 5.4.x (already installed via pdf-parse), pdf-parse 2.4.x (kept for `isDigitalK1` detection) -**Storage**: PostgreSQL via Prisma ORM (existing K1ImportSession, Document tables) -**Testing**: Jest (unit tests for extraction logic, position mapping, value parsing) -**Target Platform**: Node.js server (NestJS API), Angular 21 client (existing review UI) -**Project Type**: Web service (monorepo: api + common libs) -**Performance Goals**: < 5 seconds extraction for a single-page K-1 PDF -**Constraints**: Must preserve existing `K1Extractor` interface contract; no new npm dependencies (pdfjs-dist is already transitive) -**Scale/Scope**: Single-file parser rewrite + interface expansion in common lib; ~2 files modified, ~1 new file +**Language/Version**: TypeScript 5.x, Node.js ≥22.18.0 +**Primary Dependencies**: NestJS 11+, Angular 21+, pdfjs-dist (position-based text extraction), Prisma ORM +**Storage**: PostgreSQL (via Prisma), Redis (caching), filesystem (uploaded PDFs) +**Testing**: Jest (unit + integration) +**Target Platform**: Linux server (Docker) / local dev (Windows/macOS) +**Project Type**: Web application (Nx monorepo: api + client + common + ui) +**Performance Goals**: <5 seconds for single-page K-1 extraction (SC-009) +**Constraints**: Zero data loss during extraction (SC-007); preserve existing API contract (FR-025) +**Scale/Scope**: Single-user family office; ~10-50 K-1 PDFs per tax year ## Constitution Check _GATE: Must pass before Phase 0 research. Re-check after Phase 1 design._ -| Principle | Status | Notes | -|-----------|--------|-------| -| I. Nx Monorepo Structure | PASS | Changes in `apps/api` (extractor) and `libs/common` (interfaces). No new projects. | -| II. NestJS Module Pattern | PASS | PdfParseExtractor is already a `@Injectable()` provider in K1ImportModule. Rewriting internals only. | -| III. Prisma Data Layer | PASS | No schema changes. Existing tables sufficient. | -| IV. TypeScript Strict Conventions | PASS | Will follow `noUnusedLocals`, `noUnusedParameters`, path aliases. | -| V. Simplicity First | PASS | Rewriting one file, expanding one interface. No new architectural layers. | -| VI. Interface-First Design | PASS | K1ExtractedField interface expanded first, then implementation follows. | +| Gate | Rule | Status | Notes | +|------|------|--------|-------| +| Nx boundary | Features respect project boundaries (api/client/common/ui) | ✅ PASS | Parser in `@ghostfolio/api`, interfaces in `@ghostfolio/common`, UI in `@ghostfolio/client` | +| NestJS module pattern | Module + Controller + Service structure | ✅ PASS | `K1ImportModule` already exists with proper DI | +| Prisma data layer | No direct SQL; use PrismaService | ✅ PASS | All DB access via Prisma ORM | +| TypeScript strict | No unused locals/params, path aliases | ✅ PASS | Existing codebase conventions followed | +| Simplicity first | YAGNI, minimal abstractions | ✅ PASS | Modifying existing `PdfParseExtractor`, not adding new layers | +| Interface-first design | Shared interfaces in `@ghostfolio/common` | ✅ PASS | `K1ExtractionResult`, `K1ExtractedField`, `K1UnmappedItem` already defined | +| Max 3 Nx projects per feature | api + common typical | ✅ PASS | Touches api + common only (client UI already exists, no changes needed) | -No gate violations. Proceeding to Phase 0. +**All gates pass. No violations requiring justification.** ## Project Structure @@ -47,8 +48,7 @@ specs/005-k1-parser-fix/ ├── data-model.md # Phase 1 output ├── quickstart.md # Phase 1 output ├── contracts/ # Phase 1 output -│ └── extraction.md # Extractor interface contract -└── tasks.md # Phase 2 output (created by /speckit.tasks) +└── tasks.md # Phase 2 output (/speckit.tasks) ``` ### Source Code (repository root) @@ -56,27 +56,26 @@ specs/005-k1-parser-fix/ ```text apps/api/src/app/k1-import/ ├── extractors/ -│ ├── k1-extractor.interface.ts # Unchanged -│ ├── pdf-parse-extractor.ts # REWRITE: position-based extraction -│ ├── k1-position-regions.ts # NEW: bounding box definitions for K-1 form fields -│ ├── azure-extractor.ts # Unchanged -│ └── tesseract-extractor.ts # Unchanged -├── k1-import.module.ts # Unchanged -├── k1-import.service.ts # Minor: handle new subtype field in K1ExtractedField -├── k1-import.controller.ts # Unchanged -└── ... +│ ├── k1-extractor.interface.ts # K1Extractor contract (no changes) +│ ├── k1-position-regions.ts # MODIFY: refine bounding boxes, add tolerance config +│ ├── pdf-parse-extractor.ts # MODIFY: core rewrite — font discrimination, position mapping +│ ├── azure-extractor.ts # No changes (Tier 2) +│ └── tesseract-extractor.ts # No changes (Tier 2 fallback) +├── k1-import.service.ts # Minor: add warning generation for unmapped items +├── k1-import.controller.ts # No changes +├── k1-field-mapper.service.ts # Minor: handle new confidence levels +├── k1-confidence.service.ts # MODIFY: integrate position-match confidence +└── k1-import.module.ts # No changes libs/common/src/lib/interfaces/ -└── k1-import.interface.ts # MODIFY: add subtype, fieldCategory, isCheckbox to K1ExtractedField +└── k1-import.interface.ts # Minor: add fontName/position to K1UnmappedItem if needed -tests/ -└── apps/api/src/app/k1-import/ - └── extractors/ - └── pdf-parse-extractor.spec.ts # NEW: unit tests +prisma/ +└── schema.prisma # No changes (existing schema sufficient) ``` -**Structure Decision**: Minimalist approach — rewrite one extractor file, add one position-region data file, expand one interface. Follows the existing module structure with no new architectural patterns. +**Structure Decision**: Existing Nx monorepo structure is used. The core change is within `apps/api/src/app/k1-import/extractors/` — specifically `pdf-parse-extractor.ts` and `k1-position-regions.ts`. No new modules, no new Nx projects. ## Complexity Tracking -No constitution violations. Table intentionally empty. +> No violations detected. All changes fit within existing module boundaries. diff --git a/tmp-check-users.mjs b/tmp-check-users.mjs new file mode 100644 index 000000000..be250484a --- /dev/null +++ b/tmp-check-users.mjs @@ -0,0 +1,21 @@ +import { PrismaClient } from '@prisma/client'; +const p = new PrismaClient(); + +// Delete all data in dependency order +await p.access.deleteMany(); +await p.order.deleteMany(); +await p.accountBalance.deleteMany(); +await p.account.deleteMany(); +await p.symbolProfile.deleteMany(); +await p.marketData.deleteMany(); +await p.settings.deleteMany(); +await p.subscription.deleteMany(); +await p.authDevice.deleteMany(); +await p.analytics.deleteMany(); +await p.user.deleteMany(); + +console.log('All users deleted.'); + +const users = await p.user.findMany({ select: { id: true, role: true } }); +console.log('USERS after delete:', JSON.stringify(users)); +await p.$disconnect(); diff --git a/tools/test-k1-parse.mjs b/tools/test-k1-parse.mjs index 6aac6e296..e9913a4b3 100644 --- a/tools/test-k1-parse.mjs +++ b/tools/test-k1-parse.mjs @@ -192,14 +192,16 @@ function assignItemsToRegions(items, regions) { // 1. Checkboxes (closest-center assignment) const checkboxRegions = K1_POSITION_REGIONS.filter(r => r.valueType === 'checkbox'); const cbAssignments = assignItemsToRegions(dataItems, checkboxRegions); +const checkedRegionIds = new Set(); for (const [region, item] of cbAssignments) { const isChecked = ['X', '✓', '✗'].includes(item.text.toUpperCase()); if (!isChecked) continue; + checkedRegionIds.add(region.fieldId); fields.push({ fieldId: region.fieldId, boxNumber: region.boxNumber, label: region.label, - rawValue: 'X', + rawValue: 'true', numericValue: null, fieldCategory: 'CHECKBOX', isCheckbox: true, @@ -209,6 +211,20 @@ for (const [region, item] of cbAssignments) { if (region.fieldId === 'FINAL_K1') metadata.isFinal = true; if (region.fieldId === 'AMENDED_K1') metadata.isAmended = true; } +// Emit false for unchecked checkbox regions +for (const region of checkboxRegions) { + if (checkedRegionIds.has(region.fieldId)) continue; + fields.push({ + fieldId: region.fieldId, + boxNumber: region.boxNumber, + label: region.label, + rawValue: 'false', + numericValue: null, + fieldCategory: 'CHECKBOX', + isCheckbox: true, + subtype: null + }); +} // 2. Part III — subtype regions first, then simple const partIIIRegions = K1_POSITION_REGIONS.filter(