Browse Source

feat(k1-import): position-based PDF extraction and field mapping

pull/6701/head
Robert Patch 2 months ago
parent
commit
92cca05cb7
  1. 18
      apps/api/src/app/cell-mapping/cell-mapping.controller.ts
  2. 238
      apps/api/src/app/cell-mapping/cell-mapping.service.ts
  3. 64
      apps/api/src/app/k1-import/extractors/k1-position-regions.ts
  4. 58
      apps/api/src/app/k1-import/extractors/pdf-parse-extractor.ts
  5. 2
      apps/api/src/app/k1-import/k1-aggregation.service.ts
  6. 23
      apps/api/src/app/k1-import/k1-field-mapper.service.ts
  7. 5
      apps/api/src/app/k1-import/k1-import.service.ts
  8. 51
      apps/client/src/app/pages/cell-mapping/cell-mapping-page.component.ts
  9. 44
      apps/client/src/app/pages/cell-mapping/cell-mapping-page.html
  10. 105
      apps/client/src/app/pages/cell-mapping/cell-mapping-page.scss
  11. 19
      apps/client/src/app/pages/k-documents/k-document-detail/k-document-detail.component.ts
  12. 14
      apps/client/src/app/pages/k-documents/k-document-detail/k-document-detail.html
  13. 2
      apps/client/src/app/pages/k-documents/k-documents-page.component.ts
  14. 30
      apps/client/src/app/pages/k1-import/k1-verification/k1-verification.component.ts
  15. 24
      apps/client/src/app/pages/k1-import/k1-verification/k1-verification.html
  16. 87
      apps/client/src/app/pages/k1-import/k1-verification/k1-verification.scss
  17. 4
      apps/client/src/app/services/family-office-data.service.ts
  18. 12
      apps/client/src/app/services/k1-import-data.service.ts
  19. 274
      k1-positions-dump.txt
  20. 555
      libs/ui/src/lib/k-document-form/k-document-form.component.ts
  21. 2
      prisma/schema.prisma
  22. 535
      specs/001-family-office-transform/research-normalized-k1-model.md
  23. 75
      specs/005-k1-parser-fix/plan.md
  24. 21
      tmp-check-users.mjs
  25. 18
      tools/test-k1-parse.mjs

18
apps/api/src/app/cell-mapping/cell-mapping.controller.ts

@ -6,6 +6,7 @@ import {
Controller,
Delete,
Get,
Patch,
Put,
Query,
UseGuards
@ -48,6 +49,7 @@ export class CellMappingController {
boxNumber: string;
label: string;
description?: string;
cellType?: string;
isCustom: boolean;
}>;
}
@ -71,6 +73,22 @@ export class CellMappingController {
return this.cellMappingService.resetMappings(partnershipId);
}
/**
* PATCH /api/v1/cell-mapping/toggle-ignored
* Toggle the isIgnored flag for a specific cell mapping.
*/
@HasPermission(permissions.updateKDocument)
@Patch('toggle-ignored')
@UseGuards(AuthGuard('jwt'), HasPermissionGuard)
public async toggleIgnored(
@Body() data: { partnershipId: string; boxNumber: string }
) {
return this.cellMappingService.toggleIgnored(
data.partnershipId,
data.boxNumber
);
}
/**
* GET /api/v1/cell-mapping/aggregation-rules
* Get aggregation rules for a partnership.

238
apps/api/src/app/cell-mapping/cell-mapping.service.ts

@ -3,41 +3,113 @@ import { PrismaService } from '@ghostfolio/api/services/prisma/prisma.service';
import { HttpException, Injectable, OnModuleInit } from '@nestjs/common';
import { StatusCodes } from 'http-status-codes';
/** Allowed cell types */
type CellType = 'number' | 'string' | 'percentage' | 'boolean';
/** Default IRS K-1 (Form 1065) cell mappings */
const IRS_DEFAULT_MAPPINGS: Array<{
boxNumber: string;
label: string;
description: string;
cellType: CellType;
sortOrder: number;
}> = [
{ boxNumber: '1', label: 'Ordinary business income (loss)', description: 'IRS Schedule K-1 Box 1', sortOrder: 1 },
{ boxNumber: '2', label: 'Net rental real estate income (loss)', description: 'IRS Schedule K-1 Box 2', sortOrder: 2 },
{ boxNumber: '3', label: 'Other net rental income (loss)', description: 'IRS Schedule K-1 Box 3', sortOrder: 3 },
{ boxNumber: '4', label: 'Guaranteed payments for services', description: 'IRS Schedule K-1 Box 4', sortOrder: 4 },
{ boxNumber: '4a', label: 'Guaranteed payments for capital', description: 'IRS Schedule K-1 Box 4a', sortOrder: 5 },
{ boxNumber: '4b', label: 'Total guaranteed payments', description: 'IRS Schedule K-1 Box 4b', sortOrder: 6 },
{ boxNumber: '5', label: 'Interest income', description: 'IRS Schedule K-1 Box 5', sortOrder: 7 },
{ boxNumber: '6a', label: 'Ordinary dividends', description: 'IRS Schedule K-1 Box 6a', sortOrder: 8 },
{ boxNumber: '6b', label: 'Qualified dividends', description: 'IRS Schedule K-1 Box 6b', sortOrder: 9 },
{ boxNumber: '6c', label: 'Dividend equivalents', description: 'IRS Schedule K-1 Box 6c', sortOrder: 10 },
{ boxNumber: '7', label: 'Royalties', description: 'IRS Schedule K-1 Box 7', sortOrder: 11 },
{ boxNumber: '8', label: 'Net short-term capital gain (loss)', description: 'IRS Schedule K-1 Box 8', sortOrder: 12 },
{ boxNumber: '9a', label: 'Net long-term capital gain (loss)', description: 'IRS Schedule K-1 Box 9a', sortOrder: 13 },
{ boxNumber: '9b', label: 'Collectibles (28%) gain (loss)', description: 'IRS Schedule K-1 Box 9b', sortOrder: 14 },
{ boxNumber: '9c', label: 'Unrecaptured section 1250 gain', description: 'IRS Schedule K-1 Box 9c', sortOrder: 15 },
{ boxNumber: '10', label: 'Net section 1231 gain (loss)', description: 'IRS Schedule K-1 Box 10', sortOrder: 16 },
{ boxNumber: '11', label: 'Other income (loss)', description: 'IRS Schedule K-1 Box 11', sortOrder: 17 },
{ boxNumber: '12', label: 'Section 179 deduction', description: 'IRS Schedule K-1 Box 12', sortOrder: 18 },
{ boxNumber: '13', label: 'Other deductions', description: 'IRS Schedule K-1 Box 13', sortOrder: 19 },
{ boxNumber: '14', label: 'Self-employment earnings (loss)', description: 'IRS Schedule K-1 Box 14', sortOrder: 20 },
{ boxNumber: '15', label: 'Credits', description: 'IRS Schedule K-1 Box 15', sortOrder: 21 },
{ boxNumber: '16', label: 'Foreign transactions', description: 'IRS Schedule K-1 Box 16', sortOrder: 22 },
{ boxNumber: '17', label: 'Alternative minimum tax (AMT) items', description: 'IRS Schedule K-1 Box 17', sortOrder: 23 },
{ boxNumber: '18', label: 'Tax-exempt income and nondeductible expenses', description: 'IRS Schedule K-1 Box 18', sortOrder: 24 },
{ boxNumber: '19a', label: 'Distributions — Cash and marketable securities', description: 'IRS Schedule K-1 Box 19a', sortOrder: 25 },
{ boxNumber: '19b', label: 'Distributions — Other property', description: 'IRS Schedule K-1 Box 19b', sortOrder: 26 },
{ boxNumber: '20', label: 'Other information', description: 'IRS Schedule K-1 Box 20', sortOrder: 27 },
{ boxNumber: '21', label: 'Foreign taxes paid or accrued', description: 'IRS Schedule K-1 Box 21', sortOrder: 28 }
// ── Header / Metadata ──────────────────────────────────────────────────
{ boxNumber: 'K1_DOCUMENT_ID', label: 'K-1 Document ID', description: 'Large-font ID at top right of K-1 form', cellType: 'string', sortOrder: 0 },
{ boxNumber: 'TAX_YEAR', label: 'Tax Year', description: 'Calendar year or tax year beginning/ending', cellType: 'string', sortOrder: 1 },
{ boxNumber: 'FINAL_K1', label: 'Final K-1', description: 'Check if this is a final K-1', cellType: 'boolean', sortOrder: 2 },
{ boxNumber: 'AMENDED_K1', label: 'Amended K-1', description: 'Check if this is an amended K-1', cellType: 'boolean', sortOrder: 3 },
// ── Part I — Information About the Partnership ─────────────────────────
{ boxNumber: 'A', label: "Partnership's EIN", description: 'Part I, Line A — Employer identification number', cellType: 'string', sortOrder: 10 },
{ boxNumber: 'B', label: "Partnership's name, address, city, state, ZIP", description: 'Part I, Line B', cellType: 'string', sortOrder: 11 },
{ boxNumber: 'C', label: 'IRS center where partnership filed return', description: 'Part I, Line C', cellType: 'string', sortOrder: 12 },
{ boxNumber: 'D', label: 'Publicly traded partnership (PTP)', description: 'Part I, Line D — Check if PTP', cellType: 'boolean', sortOrder: 13 },
// ── Part II — Information About the Partner ────────────────────────────
{ boxNumber: 'E', label: "Partner's identifying number", description: 'Part II, Line E — SSN or TIN', cellType: 'string', sortOrder: 20 },
{ boxNumber: 'F', label: "Partner's name, address, city, state, ZIP", description: 'Part II, Line F', cellType: 'string', sortOrder: 21 },
{ boxNumber: 'G_GENERAL', label: 'General partner or LLC member-manager', description: 'Part II, Line G — General partner checkbox', cellType: 'boolean', sortOrder: 22 },
{ boxNumber: 'G_LIMITED', label: 'Limited partner or other LLC member', description: 'Part II, Line G — Limited partner checkbox', cellType: 'boolean', sortOrder: 23 },
{ boxNumber: 'H1_DOMESTIC', label: 'Domestic partner', description: 'Part II, Line H1 — Domestic', cellType: 'boolean', sortOrder: 24 },
{ boxNumber: 'H1_FOREIGN', label: 'Foreign partner', description: 'Part II, Line H1 — Foreign', cellType: 'boolean', sortOrder: 25 },
{ boxNumber: 'H2', label: 'Disregarded entity (DE)', description: 'Part II, Line H2 — DE checkbox', cellType: 'boolean', sortOrder: 26 },
{ boxNumber: 'H2_TIN', label: 'Disregarded entity TIN', description: 'Part II, Line H2 — DE taxpayer ID', cellType: 'string', sortOrder: 27 },
{ boxNumber: 'I1', label: 'Type of entity', description: 'Part II, Line I1 — Entity type of partner', cellType: 'string', sortOrder: 28 },
{ boxNumber: 'I2', label: 'Retirement plan (IRA/SEP/Keogh)', description: 'Part II, Line I2 — Retirement plan checkbox', cellType: 'boolean', sortOrder: 29 },
// ── Section J — Partner's Share of Profit, Loss, and Capital ───────────
{ boxNumber: 'J_PROFIT_BEGIN', label: 'Profit — Beginning %', description: 'Section J — Profit share beginning of year', cellType: 'percentage', sortOrder: 30 },
{ boxNumber: 'J_PROFIT_END', label: 'Profit — Ending %', description: 'Section J — Profit share end of year', cellType: 'percentage', sortOrder: 31 },
{ boxNumber: 'J_LOSS_BEGIN', label: 'Loss — Beginning %', description: 'Section J — Loss share beginning of year', cellType: 'percentage', sortOrder: 32 },
{ boxNumber: 'J_LOSS_END', label: 'Loss — Ending %', description: 'Section J — Loss share end of year', cellType: 'percentage', sortOrder: 33 },
{ boxNumber: 'J_CAPITAL_BEGIN', label: 'Capital — Beginning %', description: 'Section J — Capital share beginning of year', cellType: 'percentage', sortOrder: 34 },
{ boxNumber: 'J_CAPITAL_END', label: 'Capital — Ending %', description: 'Section J — Capital share end of year', cellType: 'percentage', sortOrder: 35 },
{ boxNumber: 'J_SALE', label: 'Decrease due to sale', description: 'Section J — Check if decrease is due to sale', cellType: 'boolean', sortOrder: 36 },
{ boxNumber: 'J_EXCHANGE', label: 'Exchange of partnership interest', description: 'Section J — Check if exchange', cellType: 'boolean', sortOrder: 37 },
// ── Section K — Partner's Share of Liabilities ─────────────────────────
{ boxNumber: 'K_NONRECOURSE_BEGIN', label: 'Nonrecourse — Beginning', description: 'Section K — Nonrecourse liabilities beginning', cellType: 'number', sortOrder: 40 },
{ boxNumber: 'K_NONRECOURSE_END', label: 'Nonrecourse — Ending', description: 'Section K — Nonrecourse liabilities ending', cellType: 'number', sortOrder: 41 },
{ boxNumber: 'K_QUAL_NONRECOURSE_BEGIN', label: 'Qualified nonrecourse — Beginning', description: 'Section K — Qualified nonrecourse financing beginning', cellType: 'number', sortOrder: 42 },
{ boxNumber: 'K_QUAL_NONRECOURSE_END', label: 'Qualified nonrecourse — Ending', description: 'Section K — Qualified nonrecourse financing ending', cellType: 'number', sortOrder: 43 },
{ boxNumber: 'K_RECOURSE_BEGIN', label: 'Recourse — Beginning', description: 'Section K — Recourse liabilities beginning', cellType: 'number', sortOrder: 44 },
{ boxNumber: 'K_RECOURSE_END', label: 'Recourse — Ending', description: 'Section K — Recourse liabilities ending', cellType: 'number', sortOrder: 45 },
{ boxNumber: 'K2', label: 'Includes lower-tier partnership liabilities', description: 'Section K2 — Checkbox', cellType: 'boolean', sortOrder: 46 },
{ boxNumber: 'K3', label: 'Liability subject to guarantees', description: 'Section K3 — Checkbox', cellType: 'boolean', sortOrder: 47 },
// ── Section L — Partner's Capital Account Analysis ─────────────────────
{ boxNumber: 'L_BEG_CAPITAL', label: 'Beginning capital account', description: 'Section L — Beginning capital', cellType: 'number', sortOrder: 50 },
{ boxNumber: 'L_CONTRIBUTED', label: 'Capital contributed during year', description: 'Section L — Capital contributed', cellType: 'number', sortOrder: 51 },
{ boxNumber: 'L_CURR_YR_INCOME', label: 'Current year net income (loss)', description: 'Section L — Current year income/loss', cellType: 'number', sortOrder: 52 },
{ boxNumber: 'L_OTHER', label: 'Other increase (decrease)', description: 'Section L — Other adjustments', cellType: 'number', sortOrder: 53 },
{ boxNumber: 'L_WITHDRAWALS', label: 'Withdrawals and distributions', description: 'Section L — Withdrawals/distributions', cellType: 'number', sortOrder: 54 },
{ boxNumber: 'L_END_CAPITAL', label: 'Ending capital account', description: 'Section L — Ending capital', cellType: 'number', sortOrder: 55 },
// ── Section M — Contributed Property ───────────────────────────────────
{ boxNumber: 'M_YES', label: 'Contributed property with built-in gain/loss — Yes', description: 'Section M — Yes checkbox', cellType: 'boolean', sortOrder: 60 },
{ boxNumber: 'M_NO', label: 'Contributed property with built-in gain/loss — No', description: 'Section M — No checkbox', cellType: 'boolean', sortOrder: 61 },
// ── Section N — Net Unrecognized Section 704(c) ────────────────────────
{ boxNumber: 'N_BEGINNING', label: 'Net 704(c) gain/loss — Beginning', description: 'Section N — Beginning balance', cellType: 'number', sortOrder: 62 },
{ boxNumber: 'N_ENDING', label: 'Net 704(c) gain/loss — Ending', description: 'Section N — Ending balance', cellType: 'number', sortOrder: 63 },
// ── Part III — Partner's Share of Current Year Income, Deductions, etc. ─
{ boxNumber: '1', label: 'Ordinary business income (loss)', description: 'IRS Schedule K-1 Box 1', cellType: 'number', sortOrder: 100 },
{ boxNumber: '2', label: 'Net rental real estate income (loss)', description: 'IRS Schedule K-1 Box 2', cellType: 'number', sortOrder: 101 },
{ boxNumber: '3', label: 'Other net rental income (loss)', description: 'IRS Schedule K-1 Box 3', cellType: 'number', sortOrder: 102 },
{ boxNumber: '4', label: 'Guaranteed payments for services', description: 'IRS Schedule K-1 Box 4', cellType: 'number', sortOrder: 103 },
{ boxNumber: '4a', label: 'Guaranteed payments for capital', description: 'IRS Schedule K-1 Box 4a', cellType: 'number', sortOrder: 104 },
{ boxNumber: '4b', label: 'Total guaranteed payments', description: 'IRS Schedule K-1 Box 4b', cellType: 'number', sortOrder: 105 },
{ boxNumber: '5', label: 'Interest income', description: 'IRS Schedule K-1 Box 5', cellType: 'number', sortOrder: 106 },
{ boxNumber: '6a', label: 'Ordinary dividends', description: 'IRS Schedule K-1 Box 6a', cellType: 'number', sortOrder: 107 },
{ boxNumber: '6b', label: 'Qualified dividends', description: 'IRS Schedule K-1 Box 6b', cellType: 'number', sortOrder: 108 },
{ boxNumber: '6c', label: 'Dividend equivalents', description: 'IRS Schedule K-1 Box 6c', cellType: 'number', sortOrder: 109 },
{ boxNumber: '7', label: 'Royalties', description: 'IRS Schedule K-1 Box 7', cellType: 'number', sortOrder: 110 },
{ boxNumber: '8', label: 'Net short-term capital gain (loss)', description: 'IRS Schedule K-1 Box 8', cellType: 'number', sortOrder: 111 },
{ boxNumber: '9a', label: 'Net long-term capital gain (loss)', description: 'IRS Schedule K-1 Box 9a', cellType: 'number', sortOrder: 112 },
{ boxNumber: '9b', label: 'Collectibles (28%) gain (loss)', description: 'IRS Schedule K-1 Box 9b', cellType: 'number', sortOrder: 113 },
{ boxNumber: '9c', label: 'Unrecaptured section 1250 gain', description: 'IRS Schedule K-1 Box 9c', cellType: 'number', sortOrder: 114 },
{ boxNumber: '10', label: 'Net section 1231 gain (loss)', description: 'IRS Schedule K-1 Box 10', cellType: 'number', sortOrder: 115 },
{ boxNumber: '11', label: 'Other income (loss)', description: 'IRS Schedule K-1 Box 11', cellType: 'number', sortOrder: 116 },
{ boxNumber: '12', label: 'Section 179 deduction', description: 'IRS Schedule K-1 Box 12', cellType: 'number', sortOrder: 117 },
{ boxNumber: '13', label: 'Other deductions', description: 'IRS Schedule K-1 Box 13', cellType: 'number', sortOrder: 118 },
{ boxNumber: '14', label: 'Self-employment earnings (loss)', description: 'IRS Schedule K-1 Box 14', cellType: 'number', sortOrder: 119 },
{ boxNumber: '15', label: 'Credits', description: 'IRS Schedule K-1 Box 15', cellType: 'number', sortOrder: 120 },
{ boxNumber: '16', label: 'Foreign transactions', description: 'IRS Schedule K-1 Box 16', cellType: 'number', sortOrder: 121 },
{ boxNumber: '16_K3', label: 'Schedule K-3 is attached', description: 'IRS Schedule K-1 Box 16 K-3 checkbox', cellType: 'boolean', sortOrder: 122 },
{ boxNumber: '17', label: 'Alternative minimum tax (AMT) items', description: 'IRS Schedule K-1 Box 17', cellType: 'number', sortOrder: 123 },
{ boxNumber: '18', label: 'Tax-exempt income and nondeductible expenses', description: 'IRS Schedule K-1 Box 18', cellType: 'number', sortOrder: 124 },
{ boxNumber: '19', label: 'Distributions', description: 'IRS Schedule K-1 Box 19', cellType: 'number', sortOrder: 125 },
{ boxNumber: '19a', label: 'Distributions — Cash and marketable securities', description: 'IRS Schedule K-1 Box 19a', cellType: 'number', sortOrder: 126 },
{ boxNumber: '19b', label: 'Distributions — Other property', description: 'IRS Schedule K-1 Box 19b', cellType: 'number', sortOrder: 127 },
{ boxNumber: '20A', label: 'Other information — Code A', description: 'IRS Schedule K-1 Box 20, Code A', cellType: 'number', sortOrder: 128 },
{ boxNumber: '20B', label: 'Other information — Code B', description: 'IRS Schedule K-1 Box 20, Code B', cellType: 'number', sortOrder: 129 },
{ boxNumber: '20V', label: 'Other information — Code V', description: 'IRS Schedule K-1 Box 20, Code V', cellType: 'number', sortOrder: 130 },
{ boxNumber: '20_WILDCARD', label: 'Other information — Other codes', description: 'IRS Schedule K-1 Box 20, all other codes', cellType: 'number', sortOrder: 131 },
{ boxNumber: '21', label: 'Foreign taxes paid or accrued', description: 'IRS Schedule K-1 Box 21', cellType: 'number', sortOrder: 132 },
{ boxNumber: '22', label: 'More than one activity for at-risk purposes', description: 'IRS Schedule K-1 Box 22 — Checkbox', cellType: 'boolean', sortOrder: 133 },
{ boxNumber: '23', label: 'More than one activity for passive activity purposes', description: 'IRS Schedule K-1 Box 23 — Checkbox', cellType: 'boolean', sortOrder: 134 }
];
/** Default aggregation rules */
@ -77,26 +149,56 @@ export class CellMappingService implements OnModuleInit {
}
/**
* Seed default IRS cell mappings (partnershipId = null) if they don't exist
* Seed default IRS cell mappings (partnershipId = null) if they don't exist.
* Also adds any new default mappings that may have been introduced in updates.
*/
public async seedDefaultMappings() {
const existingCount = await this.prismaService.cellMapping.count({
const existing = await this.prismaService.cellMapping.findMany({
where: { partnershipId: null }
});
const existingBoxNumbers = new Set(existing.map((m) => m.boxNumber));
if (existingCount > 0) {
return;
}
const newMappings = IRS_DEFAULT_MAPPINGS.filter(
(m) => !existingBoxNumbers.has(m.boxNumber)
);
if (newMappings.length > 0) {
await this.prismaService.cellMapping.createMany({
data: IRS_DEFAULT_MAPPINGS.map((mapping) => ({
data: newMappings.map((mapping) => ({
...mapping,
partnershipId: null,
isCustom: false
isCustom: false,
isIgnored: false,
cellType: mapping.cellType
}))
});
}
// Backfill cellType on existing defaults that were seeded before the cellType column existed
for (const defaultMapping of IRS_DEFAULT_MAPPINGS) {
const existingRow = existing.find((e) => e.boxNumber === defaultMapping.boxNumber);
if (existingRow && (existingRow as any).cellType === 'number' && defaultMapping.cellType !== 'number') {
await this.prismaService.cellMapping.update({
where: { id: existingRow.id },
data: { cellType: defaultMapping.cellType }
});
}
}
// Clean up stale parent-level box "20" that was replaced by 20A/20B/20V/20_WILDCARD
const validBoxNumbers = new Set(IRS_DEFAULT_MAPPINGS.map((m) => m.boxNumber));
const staleDefaults = existing.filter(
(m) => !m.isCustom && !validBoxNumbers.has(m.boxNumber)
);
if (staleDefaults.length > 0) {
await this.prismaService.cellMapping.deleteMany({
where: {
id: { in: staleDefaults.map((m) => m.id) }
}
});
}
}
/**
* Seed default aggregation rules (partnershipId = null) if they don't exist
*/
@ -190,6 +292,7 @@ export class CellMappingService implements OnModuleInit {
boxNumber: string;
label: string;
description?: string;
cellType?: string;
isCustom: boolean;
}>
) {
@ -197,6 +300,16 @@ export class CellMappingService implements OnModuleInit {
for (let i = 0; i < mappings.length; i++) {
const mapping = mappings[i];
const updateData: Record<string, any> = {
label: mapping.label,
description: mapping.description || null,
isCustom: mapping.isCustom,
sortOrder: i + 1
};
if (mapping.cellType) {
updateData.cellType = mapping.cellType;
}
const result = await this.prismaService.cellMapping.upsert({
where: {
partnershipId_boxNumber: {
@ -204,17 +317,13 @@ export class CellMappingService implements OnModuleInit {
boxNumber: mapping.boxNumber
}
},
update: {
label: mapping.label,
description: mapping.description || null,
isCustom: mapping.isCustom,
sortOrder: i + 1
},
update: updateData,
create: {
partnershipId,
boxNumber: mapping.boxNumber,
label: mapping.label,
description: mapping.description || null,
cellType: mapping.cellType || 'number',
isCustom: mapping.isCustom,
sortOrder: i + 1
}
@ -237,6 +346,53 @@ export class CellMappingService implements OnModuleInit {
return { deleted: true, partnershipId };
}
/**
* Toggle the isIgnored flag on a cell mapping.
* If a partnership-specific override exists, toggles it.
* If only the global default exists, creates a partnership-specific override with isIgnored toggled.
*/
public async toggleIgnored(
partnershipId: string,
boxNumber: string
) {
// Check for partnership-specific mapping first
const existing = await this.prismaService.cellMapping.findUnique({
where: { partnershipId_boxNumber: { partnershipId, boxNumber } }
});
if (existing) {
return this.prismaService.cellMapping.update({
where: { id: existing.id },
data: { isIgnored: !existing.isIgnored }
});
}
// No partnership override — check for global default and create an override
const globalMapping = await this.prismaService.cellMapping.findFirst({
where: { partnershipId: null, boxNumber }
});
if (globalMapping) {
return this.prismaService.cellMapping.create({
data: {
partnershipId,
boxNumber: globalMapping.boxNumber,
label: globalMapping.label,
description: globalMapping.description,
cellType: globalMapping.cellType,
isCustom: false,
isIgnored: true,
sortOrder: globalMapping.sortOrder
}
});
}
throw new HttpException(
`No cell mapping found for box ${boxNumber}`,
StatusCodes.NOT_FOUND
);
}
/**
* Update aggregation rules for a partnership.
*/

64
apps/api/src/app/k1-import/extractors/k1-position-regions.ts

@ -56,6 +56,20 @@ export interface K1PositionRegion {
// Verified: FINAL_K1 'X' at (324.3, 746.2), TAX_YEAR '20'+'25' at (236.8/262.1, 727.7)
// ============================================================================
const HEADER_REGIONS: K1PositionRegion[] = [
{
fieldId: 'K1_DOCUMENT_ID',
boxNumber: 'K1_DOCUMENT_ID',
label: 'K-1 Document ID',
fieldCategory: 'METADATA',
valueType: 'text',
xMin: 500,
xMax: 580,
yMin: 750,
yMax: 770,
hasSubtype: false,
subtypeXMin: null,
subtypeXMax: null
},
{
fieldId: 'TAX_YEAR',
boxNumber: 'TAX_YEAR',
@ -1143,19 +1157,61 @@ const PART_III_RIGHT_REGIONS: K1PositionRegion[] = [
subtypeXMax: 510
},
{
fieldId: 'BOX_20',
boxNumber: '20',
label: 'Other information',
fieldId: 'BOX_20A',
boxNumber: '20A',
label: 'Other information — Code A',
fieldCategory: 'PART_III',
valueType: 'numeric',
xMin: 510,
xMax: 600,
yMin: 284,
yMin: 356,
yMax: 396,
hasSubtype: true,
subtypeXMin: 445,
subtypeXMax: 510
},
{
fieldId: 'BOX_20B',
boxNumber: '20B',
label: 'Other information — Code B',
fieldCategory: 'PART_III',
valueType: 'numeric',
xMin: 510,
xMax: 600,
yMin: 332,
yMax: 356,
hasSubtype: true,
subtypeXMin: 445,
subtypeXMax: 510
},
{
fieldId: 'BOX_20V',
boxNumber: '20V',
label: 'Other information — Code V',
fieldCategory: 'PART_III',
valueType: 'numeric',
xMin: 510,
xMax: 600,
yMin: 308,
yMax: 332,
hasSubtype: true,
subtypeXMin: 445,
subtypeXMax: 510
},
{
fieldId: 'BOX_20_WILDCARD',
boxNumber: '20_WILDCARD',
label: 'Other information — Other codes',
fieldCategory: 'PART_III',
valueType: 'numeric',
xMin: 510,
xMax: 600,
yMin: 284,
yMax: 308,
hasSubtype: true,
subtypeXMin: 445,
subtypeXMax: 510
},
{
fieldId: 'BOX_21',
boxNumber: '21',

58
apps/api/src/app/k1-import/extractors/pdf-parse-extractor.ts

@ -472,7 +472,7 @@ export class PdfParseExtractor implements K1Extractor {
if (!item) return;
const numericValue =
region.valueType === 'checkbox'
region.valueType === 'checkbox' || region.valueType === 'text'
? null
: this.parseNumericValue(item.text);
const { confidence, confidenceLevel } = this.computeConfidence(
@ -531,6 +531,21 @@ export class PdfParseExtractor implements K1Extractor {
for (const item of taxYearItems) {
item.matched = true;
}
// Also emit as a field so it appears in cell mapping
fields.push({
boxNumber: 'TAX_YEAR',
label: 'Tax Year',
customLabel: null,
rawValue: String(year),
numericValue: null,
confidence: 1.0,
confidenceLevel: 'HIGH',
isUserEdited: false,
isReviewed: false,
subtype: null,
fieldCategory: 'METADATA',
isCheckbox: false
});
}
} else if (taxYearItems.length === 1) {
const text = taxYearItems[0].text;
@ -538,6 +553,20 @@ export class PdfParseExtractor implements K1Extractor {
if (text.length === 4 && year >= 1900 && year <= 2100) {
metadata.taxYear = year;
taxYearItems[0].matched = true;
fields.push({
boxNumber: 'TAX_YEAR',
label: 'Tax Year',
customLabel: null,
rawValue: String(year),
numericValue: null,
confidence: 1.0,
confidenceLevel: 'HIGH',
isUserEdited: false,
isReviewed: false,
subtype: null,
fieldCategory: 'METADATA',
isCheckbox: false
});
}
}
@ -688,6 +717,9 @@ export class PdfParseExtractor implements K1Extractor {
const assignments = this.assignItemsToRegions(dataItems, checkboxRegions);
// Track which checkbox regions were matched (checked)
const checkedRegionIds = new Set<string>();
for (const [region, item] of assignments) {
const isChecked =
item.text.toUpperCase() === 'X' ||
@ -696,6 +728,8 @@ export class PdfParseExtractor implements K1Extractor {
if (!isChecked) continue;
checkedRegionIds.add(region.fieldId);
const { confidence, confidenceLevel } = this.computeConfidence(
item.x,
item.y,
@ -706,7 +740,7 @@ export class PdfParseExtractor implements K1Extractor {
boxNumber: region.boxNumber,
label: region.label,
customLabel: null,
rawValue: 'X',
rawValue: 'true',
numericValue: null,
confidence,
confidenceLevel,
@ -726,6 +760,26 @@ export class PdfParseExtractor implements K1Extractor {
metadata.isAmended = true;
}
}
// Emit false for all unchecked checkbox regions
for (const region of checkboxRegions) {
if (checkedRegionIds.has(region.fieldId)) continue;
fields.push({
boxNumber: region.boxNumber,
label: region.label,
customLabel: null,
rawValue: 'false',
numericValue: null,
confidence: 1.0,
confidenceLevel: 'HIGH',
isUserEdited: false,
isReviewed: false,
subtype: null,
fieldCategory: 'CHECKBOX',
isCheckbox: true
});
}
}
// ==========================================================================

2
apps/api/src/app/k1-import/k1-aggregation.service.ts

@ -50,7 +50,7 @@ export class K1AggregationService {
name: rule.name,
operation: rule.operation,
sourceCells,
computedValue: Math.round(computedValue * 100) / 100,
computedValue,
breakdown
};
});

23
apps/api/src/app/k1-import/k1-field-mapper.service.ts

@ -39,11 +39,20 @@ export class K1FieldMapperService {
const mapping = mappingMap.get(field.boxNumber);
if (mapping) {
// Skip ignored fields — they are filtered out of extraction results
if (mapping.isIgnored) {
this.logger.debug(
`Skipping ignored field: box ${field.boxNumber}`
);
continue;
}
mappedFields.push({
...field,
label: mapping.label,
customLabel: mapping.isCustom ? mapping.label : field.customLabel
});
customLabel: mapping.isCustom ? mapping.label : field.customLabel,
cellType: mapping.cellType
} as any);
} else {
// Field has a box number but no corresponding cell mapping
this.logger.debug(
@ -103,6 +112,11 @@ export class K1FieldMapperService {
const missingFields: K1ExtractedField[] = [];
for (const mapping of mappings) {
// Skip ignored mappings — don't generate empty placeholder rows
if (mapping.isIgnored) {
continue;
}
if (!existingBoxes.has(mapping.boxNumber)) {
missingFields.push({
boxNumber: mapping.boxNumber,
@ -113,8 +127,9 @@ export class K1FieldMapperService {
confidence: 1.0, // Empty fields have full confidence
confidenceLevel: 'HIGH',
isUserEdited: false,
isReviewed: true // No review needed for empty fields
});
isReviewed: true, // No review needed for empty fields
cellType: mapping.cellType
} as any);
}
}

5
apps/api/src/app/k1-import/k1-import.service.ts

@ -631,13 +631,14 @@ export class K1ImportService {
}
// Build KDocument data from verified fields
const kDocumentData: Record<string, number | null> = {};
const kDocumentData: Record<string, number | string | null> = {};
for (const field of verifiedData.fields) {
// For subtype fields (e.g., box 11 "ZZ*", box 20 "A"), create unique key
const key = field.subtype
? `${field.boxNumber}-${field.subtype}`
: field.boxNumber;
kDocumentData[key] = field.numericValue ?? null;
// Persist numericValue for numeric fields, rawValue for text/checkbox/string fields
kDocumentData[key] = field.numericValue ?? field.rawValue ?? null;
}
// FR-012: Create or update KDocument

51
apps/client/src/app/pages/cell-mapping/cell-mapping-page.component.ts

@ -24,10 +24,13 @@ interface EditableMapping {
boxNumber: string;
label: string;
description: string;
cellType: string;
isCustom: boolean;
isIgnored: boolean;
isEditing: boolean;
editLabel: string;
editDescription: string;
editCellType: string;
}
interface EditableRule {
@ -69,13 +72,21 @@ export class CellMappingPageComponent implements OnInit {
// New custom cell form
public newBoxNumber = '';
public newCellType = 'number';
public newLabel = '';
// New rule form
public newRuleName = '';
public newRuleSourceCells = '';
public displayedColumns = ['boxNumber', 'label', 'description', 'isCustom', 'actions'];
public cellTypeOptions = [
{ value: 'number', label: 'Number ($)' },
{ value: 'string', label: 'String' },
{ value: 'percentage', label: 'Percentage (%)' },
{ value: 'boolean', label: 'Boolean' }
];
public displayedColumns = ['boxNumber', 'label', 'description', 'cellType', 'isCustom', 'isIgnored', 'actions'];
public constructor(
private readonly changeDetectorRef: ChangeDetectorRef,
@ -101,12 +112,14 @@ export class CellMappingPageComponent implements OnInit {
mapping.isEditing = true;
mapping.editLabel = mapping.label;
mapping.editDescription = mapping.description;
mapping.editCellType = mapping.cellType;
this.changeDetectorRef.markForCheck();
}
public saveEditMapping(mapping: EditableMapping): void {
mapping.label = mapping.editLabel;
mapping.description = mapping.editDescription;
mapping.cellType = mapping.editCellType;
mapping.isEditing = false;
this.changeDetectorRef.markForCheck();
}
@ -116,6 +129,30 @@ export class CellMappingPageComponent implements OnInit {
this.changeDetectorRef.markForCheck();
}
public toggleIgnored(mapping: EditableMapping): void {
if (!this.selectedPartnershipId) {
return;
}
this.k1ImportDataService
.toggleFieldIgnored({
partnershipId: this.selectedPartnershipId,
boxNumber: mapping.boxNumber
})
.pipe(takeUntilDestroyed(this.destroyRef))
.subscribe({
next: (result: any) => {
mapping.isIgnored = result.isIgnored;
this.changeDetectorRef.markForCheck();
},
error: (err) => {
this.error =
err?.error?.message || 'Failed to toggle ignored state.';
this.changeDetectorRef.markForCheck();
}
});
}
public addCustomCell(): void {
if (!this.newBoxNumber || !this.newLabel) {
return;
@ -125,14 +162,18 @@ export class CellMappingPageComponent implements OnInit {
boxNumber: this.newBoxNumber,
label: this.newLabel,
description: '',
cellType: this.newCellType,
isCustom: true,
isIgnored: false,
isEditing: false,
editLabel: '',
editDescription: ''
editDescription: '',
editCellType: this.newCellType
});
this.newBoxNumber = '';
this.newLabel = '';
this.newCellType = 'number';
this.changeDetectorRef.markForCheck();
}
@ -158,6 +199,7 @@ export class CellMappingPageComponent implements OnInit {
boxNumber: m.boxNumber,
label: m.label,
description: m.description,
cellType: m.cellType,
isCustom: m.isCustom
}))
})
@ -286,10 +328,13 @@ export class CellMappingPageComponent implements OnInit {
boxNumber: m.boxNumber,
label: m.label,
description: m.description || '',
cellType: m.cellType || 'number',
isCustom: m.isCustom,
isIgnored: m.isIgnored ?? false,
isEditing: false,
editLabel: '',
editDescription: ''
editDescription: '',
editCellType: m.cellType || 'number'
}));
this.changeDetectorRef.markForCheck();
},

44
apps/client/src/app/pages/cell-mapping/cell-mapping-page.html

@ -37,9 +37,7 @@
<th mat-header-cell *matHeaderCellDef>Label</th>
<td mat-cell *matCellDef="let row">
@if (row.isEditing) {
<mat-form-field appearance="outline" class="inline-edit">
<input matInput [(ngModel)]="row.editLabel" />
</mat-form-field>
<input class="cell-input" [(ngModel)]="row.editLabel" />
} @else {
{{ row.label }}
}
@ -51,9 +49,7 @@
<th mat-header-cell *matHeaderCellDef>Description</th>
<td mat-cell *matCellDef="let row">
@if (row.isEditing) {
<mat-form-field appearance="outline" class="inline-edit">
<input matInput [(ngModel)]="row.editDescription" />
</mat-form-field>
<input class="cell-input" [(ngModel)]="row.editDescription" />
} @else {
{{ row.description }}
}
@ -70,6 +66,34 @@
</td>
</ng-container>
<!-- Cell Type -->
<ng-container matColumnDef="cellType">
<th mat-header-cell *matHeaderCellDef>Type</th>
<td mat-cell *matCellDef="let row">
@if (row.isEditing) {
<mat-select class="type-select" [(ngModel)]="row.editCellType">
@for (opt of cellTypeOptions; track opt.value) {
<mat-option [value]="opt.value">{{ opt.label }}</mat-option>
}
</mat-select>
} @else {
<span class="type-badge type-{{ row.cellType }}">{{ row.cellType }}</span>
}
</td>
</ng-container>
<!-- Is Ignored -->
<ng-container matColumnDef="isIgnored">
<th mat-header-cell *matHeaderCellDef>Ignored</th>
<td mat-cell *matCellDef="let row">
<mat-checkbox
[checked]="row.isIgnored"
(change)="toggleIgnored(row)"
matTooltip="Ignored fields are excluded from scan results">
</mat-checkbox>
</td>
</ng-container>
<!-- Actions -->
<ng-container matColumnDef="actions">
<th mat-header-cell *matHeaderCellDef>Actions</th>
@ -108,6 +132,14 @@
<mat-label>Label</mat-label>
<input matInput [(ngModel)]="newLabel" placeholder="e.g. Other deductions" />
</mat-form-field>
<mat-form-field appearance="outline">
<mat-label>Type</mat-label>
<mat-select [(ngModel)]="newCellType">
@for (opt of cellTypeOptions; track opt.value) {
<mat-option [value]="opt.value">{{ opt.label }}</mat-option>
}
</mat-select>
</mat-form-field>
<button mat-stroked-button (click)="addCustomCell()" [disabled]="!newBoxNumber || !newLabel">
<mat-icon>add</mat-icon> Add Custom Cell
</button>

105
apps/client/src/app/pages/cell-mapping/cell-mapping-page.scss

@ -3,7 +3,7 @@
}
.container {
max-width: 960px;
max-width: 1400px;
margin: 0 auto;
padding: 1.5rem;
}
@ -56,11 +56,74 @@ h2 {
.mappings-table {
width: 100%;
margin-bottom: 1rem;
// Let browser auto-size columns based on content
th.mat-mdc-header-cell,
td.mat-mdc-cell {
padding: 8px 12px;
vertical-align: middle;
}
.mat-column-boxNumber {
white-space: nowrap;
font-family: 'Roboto Mono', monospace;
font-size: 0.8125rem;
color: rgba(0, 0, 0, 0.72);
width: 1%; // shrink-to-fit trick for auto layout
}
.mat-column-label {
white-space: nowrap;
}
.mat-column-description {
color: rgba(0, 0, 0, 0.6);
font-size: 0.8125rem;
// Allow wrapping for long descriptions
word-break: break-word;
}
.mat-column-cellType {
width: 1%;
white-space: nowrap;
}
.mat-column-isCustom {
width: 1%;
white-space: nowrap;
text-align: center;
}
.mat-column-isIgnored {
width: 1%;
white-space: nowrap;
text-align: center;
}
.mat-column-actions {
width: 1%;
white-space: nowrap;
}
}
.inline-edit {
// Lightweight inline cell inputs (no mat-form-field wrapper)
.cell-input {
width: 100%;
max-width: 200px;
min-width: 160px;
box-sizing: border-box;
padding: 6px 8px;
font-size: 0.8125rem;
font-family: inherit;
border: 1px solid rgba(0, 0, 0, 0.24);
border-radius: 4px;
background: transparent;
outline: none;
transition: border-color 0.15s ease;
&:focus {
border-color: var(--primary-color, #1976d2);
box-shadow: 0 0 0 1px var(--primary-color, #1976d2);
}
}
.custom-badge {
@ -68,6 +131,42 @@ h2 {
font-size: 20px;
}
// Type badge styling
.type-badge {
display: inline-block;
font-size: 0.75rem;
font-weight: 500;
padding: 2px 8px;
border-radius: 10px;
text-transform: capitalize;
white-space: nowrap;
}
.type-number {
background-color: #e3f2fd;
color: #1565c0;
}
.type-string {
background-color: #f3e5f5;
color: #7b1fa2;
}
.type-percentage {
background-color: #e8f5e9;
color: #2e7d32;
}
.type-boolean {
background-color: #fff3e0;
color: #e65100;
}
// Inline type selector (no mat-form-field wrapper)
.type-select {
min-width: 110px;
}
.add-row {
display: flex;
align-items: center;

19
apps/client/src/app/pages/k-documents/k-document-detail/k-document-detail.component.ts

@ -37,11 +37,26 @@ import { ActivatedRoute, Router, RouterModule } from '@angular/router';
export class KDocumentDetailComponent implements OnInit {
public aggregations: K1AggregationResult[] = [];
public boxColumns = ['boxNumber', 'value'];
public boxData: Array<{ boxNumber: string; value: number | null }> = [];
public boxData: Array<{ boxNumber: string; value: number | string | null }> = [];
public error: string | null = null;
public kDocument: any = null;
public kDocumentId: string;
/** Box numbers that represent percentage values (Section J) */
private static readonly PERCENTAGE_BOXES = new Set([
'J_PROFIT_BEGIN', 'J_PROFIT_END',
'J_LOSS_BEGIN', 'J_LOSS_END',
'J_CAPITAL_BEGIN', 'J_CAPITAL_END'
]);
public isPercentage(boxNumber: string): boolean {
return KDocumentDetailComponent.PERCENTAGE_BOXES.has(boxNumber);
}
public isNumeric(value: any): boolean {
return typeof value === 'number';
}
public constructor(
private readonly activatedRoute: ActivatedRoute,
private readonly changeDetectorRef: ChangeDetectorRef,
@ -77,7 +92,7 @@ export class KDocumentDetailComponent implements OnInit {
this.boxData = Object.entries(data)
.map(([boxNumber, value]) => ({
boxNumber,
value: typeof value === 'number' ? value : null
value: value ?? null
}))
.sort((a, b) => this.compareBoxNumbers(a.boxNumber, b.boxNumber));
}

14
apps/client/src/app/pages/k-documents/k-document-detail/k-document-detail.html

@ -52,14 +52,14 @@
</mat-card-header>
<mat-card-content>
<div class="aggregation-value">
{{ agg.value | currency:'USD':'symbol':'1.2-2' }}
{{ agg.value | currency:'USD':'symbol':'1.2-6' }}
</div>
@if (agg.breakdown && agg.breakdown.length > 0) {
<div class="breakdown">
@for (item of agg.breakdown; track item.boxNumber) {
<div class="breakdown-row">
<span class="box-label">Box {{ item.boxNumber }}:</span>
<span class="box-value">{{ item.value | currency:'USD':'symbol':'1.2-2' }}</span>
<span class="box-value">{{ item.value | currency:'USD':'symbol':'1.2-6' }}</span>
</div>
}
</div>
@ -82,10 +82,14 @@
<ng-container matColumnDef="value">
<th mat-header-cell *matHeaderCellDef>Value</th>
<td mat-cell *matCellDef="let row">
@if (row.value !== null) {
{{ row.value | currency:'USD':'symbol':'1.2-2' }}
} @else {
@if (row.value === null || row.value === '') {
<span class="text-muted"></span>
} @else if (isPercentage(row.boxNumber)) {
{{ row.value | number:'1.2-6' }}%
} @else if (isNumeric(row.value)) {
{{ row.value | currency:'USD':'symbol':'1.2-6' }}
} @else {
{{ row.value }}
}
</td>
</ng-container>

2
apps/client/src/app/pages/k-documents/k-documents-page.component.ts

@ -129,7 +129,7 @@ export class KDocumentsPageComponent implements OnInit {
public onFormSubmit(event: {
filingStatus: string;
data: Record<string, number>;
data: Record<string, number | string | null>;
}): void {
if (this.editingDoc) {
this.familyOfficeDataService

30
apps/client/src/app/pages/k1-import/k1-verification/k1-verification.component.ts

@ -37,6 +37,8 @@ interface EditableField extends K1ExtractedField {
isEditing: boolean;
editValue: string;
editLabel: string;
cellType: string;
editCellType: string;
}
interface EditableUnmappedItem extends K1UnmappedItem {
@ -75,12 +77,20 @@ export class K1VerificationComponent implements OnInit {
public taxYear: number;
public unmappedItems: EditableUnmappedItem[] = [];
public cellTypeOptions = [
{ value: 'number', label: 'Number ($)' },
{ value: 'string', label: 'String' },
{ value: 'percentage', label: 'Percentage (%)' },
{ value: 'boolean', label: 'Boolean' }
];
// Column definitions for the fields table
public displayedColumns = [
'boxNumber',
'label',
'rawValue',
'numericValue',
'cellType',
'confidence',
'reviewed',
'actions'
@ -132,6 +142,7 @@ export class K1VerificationComponent implements OnInit {
field.isEditing = true;
field.editValue = field.rawValue;
field.editLabel = field.customLabel || field.label;
field.editCellType = field.cellType;
this.changeDetectorRef.markForCheck();
}
@ -142,17 +153,27 @@ export class K1VerificationComponent implements OnInit {
field.rawValue = field.editValue;
field.customLabel =
field.editLabel !== field.label ? field.editLabel : null;
field.cellType = field.editCellType;
field.isUserEdited = true;
field.isReviewed = true;
field.isEditing = false;
// Try to parse numeric value
// Parse value based on cell type
if (field.cellType === 'boolean') {
const lower = field.editValue.toLowerCase().trim();
field.numericValue = null;
field.rawValue = (lower === 'true' || lower === 'yes' || lower === '1' || lower === 'x') ? 'true' : 'false';
} else if (field.cellType === 'string') {
field.numericValue = null;
} else {
// number or percentage
const cleaned = field.editValue
.replace(/[$,]/g, '')
.replace(/[$,%]/g, '')
.replace(/\(([^)]+)\)/, '-$1')
.trim();
const parsed = parseFloat(cleaned);
field.numericValue = isNaN(parsed) ? null : parsed;
}
this.recalculateAggregations();
this.checkConfirmability();
@ -219,6 +240,7 @@ export class K1VerificationComponent implements OnInit {
customLabel: f.customLabel,
rawValue: f.rawValue,
numericValue: f.numericValue,
cellType: f.cellType,
confidence: f.confidence,
confidenceLevel: f.confidenceLevel,
isUserEdited: f.isUserEdited,
@ -300,7 +322,9 @@ export class K1VerificationComponent implements OnInit {
...f,
isEditing: false,
editValue: f.rawValue,
editLabel: f.customLabel || f.label
editLabel: f.customLabel || f.label,
cellType: (f as any).cellType || 'number',
editCellType: (f as any).cellType || 'number'
})
);

24
apps/client/src/app/pages/k1-import/k1-verification/k1-verification.html

@ -68,13 +68,31 @@
<th mat-header-cell *matHeaderCellDef>Parsed</th>
<td mat-cell *matCellDef="let field">
@if (field.numericValue !== null && field.numericValue !== undefined) {
{{ field.numericValue | number:'1.2-2' }}
{{ field.numericValue | number:'1.2-6' }}
} @else if (field.rawValue) {
{{ field.rawValue }}
} @else {
<span class="text-muted"></span>
}
</td>
</ng-container>
<!-- Cell Type Column -->
<ng-container matColumnDef="cellType">
<th mat-header-cell *matHeaderCellDef>Type</th>
<td mat-cell *matCellDef="let field">
@if (field.isEditing) {
<mat-select class="type-select" [(ngModel)]="field.editCellType">
@for (opt of cellTypeOptions; track opt.value) {
<mat-option [value]="opt.value">{{ opt.label }}</mat-option>
}
</mat-select>
} @else {
<span class="type-badge type-{{ field.cellType }}">{{ field.cellType }}</span>
}
</td>
</ng-container>
<!-- Confidence Column -->
<ng-container matColumnDef="confidence">
<th mat-header-cell *matHeaderCellDef>Confidence</th>
@ -152,7 +170,7 @@
<span class="ms-2">{{ item.rawValue }}</span>
@if (item.numericValue !== null) {
<small class="text-muted ms-1">
({{ item.numericValue | number:'1.2-2' }})
({{ item.numericValue | number:'1.2-6' }})
</small>
}
<small class="text-muted d-block">Page {{ item.pageNumber }}</small>
@ -207,7 +225,7 @@
</small>
</div>
<div class="aggregation-value">
<strong>{{ agg.computedValue | number:'1.2-2' }}</strong>
<strong>{{ agg.computedValue | number:'1.2-6' }}</strong>
</div>
</div>
}

87
apps/client/src/app/pages/k1-import/k1-verification/k1-verification.scss

@ -2,17 +2,61 @@
display: block;
}
// Column width hints give Label the most room, keep Box and actions compact
.mat-column-boxNumber {
width: 60px;
white-space: nowrap;
}
.mat-column-label {
min-width: 200px;
width: 28%;
}
.mat-column-rawValue {
min-width: 140px;
width: 18%;
}
.mat-column-numericValue {
white-space: nowrap;
}
.mat-column-cellType {
width: 1%;
white-space: nowrap;
}
.mat-column-confidence {
width: 90px;
white-space: nowrap;
}
.mat-column-reviewed {
width: 60px;
}
.mat-column-actions {
width: 80px;
white-space: nowrap;
}
.fields-section {
.table-responsive {
overflow-x: auto;
}
.compact-field {
width: 160px;
width: 100%;
min-width: 120px;
.mat-mdc-form-field-infix {
padding: 4px 0;
}
input.mat-mdc-input-element {
font-size: 0.8125rem;
}
}
.user-edited {
@ -75,7 +119,11 @@
}
.compact-field {
width: 140px;
width: 160px;
input.mat-mdc-input-element {
font-size: 0.8125rem;
}
}
}
@ -114,3 +162,38 @@
.actions {
padding-bottom: 2rem;
}
// Type badge styling
.type-badge {
display: inline-block;
font-size: 0.7rem;
font-weight: 500;
padding: 2px 8px;
border-radius: 10px;
text-transform: capitalize;
white-space: nowrap;
}
.type-number {
background-color: #e3f2fd;
color: #1565c0;
}
.type-string {
background-color: #f3e5f5;
color: #7b1fa2;
}
.type-percentage {
background-color: #e8f5e9;
color: #2e7d32;
}
.type-boolean {
background-color: #fff3e0;
color: #e65100;
}
.type-select {
min-width: 110px;
}

4
apps/client/src/app/services/family-office-data.service.ts

@ -306,7 +306,7 @@ export class FamilyOfficeDataService {
type: string;
taxYear: number;
filingStatus?: string;
data: Record<string, number>;
data: Record<string, number | string | null>;
}): Observable<IKDocument> {
return this.http.post<IKDocument>('/api/v1/k-document', data);
}
@ -332,7 +332,7 @@ export class FamilyOfficeDataService {
public updateKDocument(
kDocumentId: string,
data: { filingStatus?: string; data?: Record<string, number> }
data: { filingStatus?: string; data?: Record<string, number | string | null> }
): Observable<IKDocument> {
return this.http.put<IKDocument>(`/api/v1/k-document/${kDocumentId}`, data);
}

12
apps/client/src/app/services/k1-import-data.service.ts

@ -124,6 +124,7 @@ export class K1ImportDataService {
boxNumber: string;
label: string;
description?: string;
cellType?: string;
isCustom: boolean;
}>;
}): Observable<any[]> {
@ -142,6 +143,17 @@ export class K1ImportDataService {
});
}
/**
* Toggle the isIgnored flag for a cell mapping.
* PATCH /api/v1/cell-mapping/toggle-ignored
*/
public toggleFieldIgnored(data: {
partnershipId: string;
boxNumber: string;
}): Observable<any> {
return this.http.patch('/api/v1/cell-mapping/toggle-ignored', data);
}
// ── Aggregation Rule Endpoints ───────────────────────────────────
/**

274
k1-positions-dump.txt

@ -0,0 +1,274 @@
Pages: 1
=== PAGE 1 ===
DATA | x= 524.2 | y= 758.7 | font=monospace | "651123"
TMPL | x= 511 | y= 748.4 | font=serif | "OMB No. 1545-0123"
DATA | x= 324.3 | y= 746.2 | font=sans-serif | "X"
TMPL | x= 336 | y= 746 | font=serif | "Final K-1"
TMPL | x= 415.2 | y= 746 | font=serif | "Amended K-1"
TMPL | x= 36 | y= 735.8 | font=serif | "Schedule K-1"
TMPL | x= 319.1 | y= 734.9 | font=serif | "Part III"
TMPL | x= 360 | y= 735.4 | font=serif | "PartnerΓÇÖs Share of Current Year Income,"
DATA | x= 236.8 | y= 727.7 | font=sans-serif | "20"
DATA | x= 262.1 | y= 727.7 | font=sans-serif | "25"
TMPL | x= 36 | y= 723.8 | font=serif | "(Form 1065)"
TMPL | x= 360 | y= 723.4 | font=serif | "Deductions, Credits, and Other Items"
TMPL | x= 36 | y= 713.5 | font=serif | "Department of the Treasury"
TMPL | x= 318.5 | y= 712 | font=serif | "1"
TMPL | x= 334.2 | y= 712 | font=serif | "Ordinary business income (loss)"
TMPL | x= 453.3 | y= 712 | font=serif | "14"
TMPL | x= 471 | y= 712 | font=serif | "Self-employment earnings (loss)"
TMPL | x= 36 | y= 705.5 | font=serif | "Internal Revenue Service"
TMPL | x= 193.2 | y= 703 | font=serif | "For calendar year 2025, or tax year"
TMPL | x= 71 | y= 686 | font=serif | "beginning"
TMPL | x= 129.6 | y= 687 | font=serif | "/"
DATA | x= 151.2 | y= 686.8 | font=sans-serif | "/"
DATA | x= 159 | y= 686.8 | font=sans-serif | "2025"
TMPL | x= 195.6 | y= 686 | font=serif | "ending"
TMPL | x= 244.8 | y= 687 | font=serif | "/"
TMPL | x= 266.4 | y= 687 | font=serif | "/"
TMPL | x= 318.5 | y= 688 | font=serif | "2"
TMPL | x= 333.2 | y= 688 | font=serif | "Net rental real estate income (loss)"
TMPL | x= 36 | y= 669.6 | font=serif | "PartnerΓÇÖs Share of Income, Deductions,"
TMPL | x= 318.5 | y= 664 | font=serif | "3"
TMPL | x= 334.2 | y= 664.1 | font=serif | "Other net rental income (loss)"
TMPL | x= 453.3 | y= 664 | font=serif | "15"
TMPL | x= 471 | y= 664 | font=serif | "Credits"
TMPL | x= 36 | y= 656.6 | font=serif | "Credits, etc."
TMPL | x= 215.2 | y= 656.8 | font=serif | "See separate instructions."
TMPL | x= 48.4 | y= 638.9 | font=serif | "Part I"
TMPL | x= 86.4 | y= 638.9 | font=serif | "Information About the Partnership"
TMPL | x= 316.4 | y= 640 | font=serif | "4a"
TMPL | x= 334.2 | y= 640 | font=serif | "Guaranteed payments for services"
TMPL | x= 40.8 | y= 626 | font=serif | "A"
TMPL | x= 316.3 | y= 616 | font=serif | "4b"
TMPL | x= 334.2 | y= 616 | font=serif | "Guaranteed payments for capital"
TMPL | x= 453.3 | y= 616 | font=serif | "16"
TMPL | x= 472 | y= 616 | font=serif | "Schedule K-3 is attached if"
TMPL | x= 472 | y= 606 | font=serif | "checked"
TMPL | x= 504 | y= 606 | font=serif | "."
TMPL | x= 516 | y= 606 | font=serif | "."
TMPL | x= 528 | y= 606 | font=serif | "."
TMPL | x= 540 | y= 606 | font=serif | "."
TMPL | x= 552 | y= 606 | font=serif | "."
TMPL | x= 40.7 | y= 602 | font=serif | "B"
DATA | x= 563.3 | y= 603.8 | font=sans-serif | "X"
TMPL | x= 316.4 | y= 592 | font=serif | "4c"
TMPL | x= 334.2 | y= 592 | font=serif | "Total guaranteed payments"
TMPL | x= 453.3 | y= 592 | font=serif | "17"
TMPL | x= 471 | y= 592 | font=serif | "Alternative minimum tax (AMT) items"
TMPL | x= 318.5 | y= 568 | font=serif | "5"
TMPL | x= 334.2 | y= 568 | font=serif | "Interest income"
TMPL | x= 40.6 | y= 554.5 | font=serif | "C"
TMPL | x= 58.4 | y= 554.5 | font=serif | "IRS center where partnership filed return:"
DATA | x= 185.4 | y= 553.7 | font=sans-serif | "E-FILE"
TMPL | x= 40.6 | y= 543 | font=serif | "D"
TMPL | x= 72 | y= 543 | font=serif | "Check if this is a publicly traded partnership (PTP)"
TMPL | x= 316.4 | y= 544 | font=serif | "6a"
TMPL | x= 334.2 | y= 544 | font=serif | "Ordinary dividends"
TMPL | x= 46.9 | y= 530.9 | font=serif | "Part II"
TMPL | x= 86.4 | y= 530.9 | font=serif | "Information About the Partner"
TMPL | x= 40.9 | y= 518 | font=serif | "E"
TMPL | x= 316.3 | y= 520 | font=serif | "6b"
TMPL | x= 334.2 | y= 520 | font=serif | "Qualified dividends"
TMPL | x= 453.3 | y= 520 | font=serif | "18"
TMPL | x= 471 | y= 520 | font=serif | "Tax-exempt income and"
TMPL | x= 471 | y= 511.6 | font=serif | "nondeductible expenses"
TMPL | x= 41.1 | y= 494 | font=serif | "F"
TMPL | x= 316.4 | y= 496 | font=serif | "6c"
TMPL | x= 334.2 | y= 496 | font=serif | "Dividend equivalents"
TMPL | x= 318.5 | y= 472 | font=serif | "7"
TMPL | x= 334.2 | y= 472 | font=serif | "Royalties"
TMPL | x= 40.5 | y= 447 | font=serif | "G"
TMPL | x= 72 | y= 446.6 | font=serif | "General partner or LLC"
DATA | x= 180.3 | y= 446.6 | font=sans-serif | "X"
TMPL | x= 194.4 | y= 446.6 | font=serif | "Limited partner or other LLC"
TMPL | x= 318.5 | y= 448 | font=serif | "8"
TMPL | x= 334.2 | y= 448 | font=serif | "Net short-term capital gain (loss)"
TMPL | x= 72 | y= 438.2 | font=serif | "member-manager"
TMPL | x= 194.4 | y= 438.2 | font=serif | "member"
TMPL | x= 453.3 | y= 436 | font=serif | "19"
TMPL | x= 471 | y= 436 | font=serif | "Distributions"
TMPL | x= 38.7 | y= 423 | font=serif | "H1"
DATA | x= 58 | y= 422.9 | font=sans-serif | "X"
TMPL | x= 72 | y= 422 | font=serif | "Domestic partner"
TMPL | x= 194.4 | y= 422 | font=serif | "Foreign partner"
TMPL | x= 316.4 | y= 424 | font=serif | "9a"
TMPL | x= 334.2 | y= 424 | font=serif | "Net long-term capital gain (loss)"
DATA | x= 455.2 | y= 423.2 | font=sans-serif | "A"
DATA | x= 530.6 | y= 422 | font=sans-serif | "4,493,757"
TMPL | x= 38.7 | y= 411 | font=serif | "H2"
DATA | x= 57.9 | y= 410.5 | font=sans-serif | "X"
TMPL | x= 72 | y= 410 | font=serif | "If the partner is a disregarded entity (DE), enter the partnerΓÇÖs:"
TMPL | x= 57.6 | y= 398.1 | font=serif | "TIN"
TMPL | x= 144 | y= 398.1 | font=serif | "Name"
TMPL | x= 316.3 | y= 400 | font=serif | "9b"
TMPL | x= 334.2 | y= 400 | font=serif | "Collectibles (28%) gain (loss)"
TMPL | x= 40.2 | y= 386 | font=serif | "I1"
TMPL | x= 57.6 | y= 387 | font=serif | "What type of entity is this partner?"
TMPL | x= 453.3 | y= 388 | font=serif | "20"
TMPL | x= 471 | y= 388 | font=serif | "Other information"
TMPL | x= 40.2 | y= 374 | font=serif | "I2"
TMPL | x= 57.6 | y= 374 | font=serif | "If this partner is a retirement plan (IRA/SEP/Keogh/etc.), check here"
TMPL | x= 276 | y= 374 | font=serif | "."
TMPL | x= 316.4 | y= 376 | font=serif | "9c"
TMPL | x= 334.2 | y= 376 | font=serif | "Unrecaptured section 1250 gain"
TMPL | x= 41.3 | y= 362 | font=serif | "J"
TMPL | x= 57.6 | y= 362 | font=serif | "PartnerΓÇÖs share of profit, loss, and capital (see instructions):"
DATA | x= 455.2 | y= 362.8 | font=sans-serif | "A"
DATA | x= 525.6 | y= 362.8 | font=sans-serif | "SEE STMT"
TMPL | x= 110 | y= 352.5 | font=serif | "Beginning"
TMPL | x= 229.8 | y= 352.5 | font=serif | "Ending"
TMPL | x= 316.5 | y= 352 | font=serif | "10"
TMPL | x= 334.2 | y= 352 | font=serif | "Net section 1231 gain (loss)"
TMPL | x= 57.6 | y= 338 | font=serif | "Profit"
DATA | x= 139.1 | y= 339.1 | font=sans-serif | "3.032900"
TMPL | x= 183.7 | y= 338 | font=serif | "%"
DATA | x= 250.1 | y= 339.1 | font=sans-serif | "0.000000"
TMPL | x= 291.7 | y= 338 | font=serif | "%"
DATA | x= 455.2 | y= 338.5 | font=sans-serif | "B"
DATA | x= 525.6 | y= 339 | font=sans-serif | "SEE STMT"
TMPL | x= 57.6 | y= 326 | font=serif | "Loss"
DATA | x= 139.1 | y= 326.1 | font=sans-serif | "3.032900"
TMPL | x= 183.7 | y= 326 | font=serif | "%"
DATA | x= 250.1 | y= 326.1 | font=sans-serif | "0.000000"
TMPL | x= 291.7 | y= 326 | font=serif | "%"
TMPL | x= 316.5 | y= 328 | font=serif | "11"
TMPL | x= 334.2 | y= 328 | font=serif | "Other income (loss)"
TMPL | x= 57.6 | y= 314.5 | font=serif | "Capital"
DATA | x= 139.1 | y= 314.2 | font=sans-serif | "3.032900"
TMPL | x= 183.7 | y= 314 | font=serif | "%"
DATA | x= 250.1 | y= 314.2 | font=sans-serif | "0.000000"
TMPL | x= 291.7 | y= 314 | font=serif | "%"
DATA | x= 314.2 | y= 314.4 | font=sans-serif | "ZZ*"
DATA | x= 403.9 | y= 314.4 | font=sans-serif | "(409,615)"
DATA | x= 455.2 | y= 315.6 | font=sans-serif | "V"
DATA | x= 525.6 | y= 314.6 | font=sans-serif | "SEE STMT"
TMPL | x= 57.6 | y= 302 | font=serif | "Check if decrease is due to:"
TMPL | x= 72 | y= 290 | font=serif | "Sale"
TMPL | x= 89.9 | y= 290 | font=serif | "or"
TMPL | x= 115.2 | y= 290 | font=serif | "Exchange of partnership interest. See instructions."
DATA | x= 456.4 | y= 291.3 | font=sans-serif | "*"
DATA | x= 525.6 | y= 290.3 | font=sans-serif | "SEE STMT"
TMPL | x= 38.7 | y= 278 | font=serif | "K1"
TMPL | x= 57.6 | y= 278 | font=serif | "PartnerΓÇÖs share of liabilities:"
TMPL | x= 316.5 | y= 280 | font=serif | "12"
TMPL | x= 334.2 | y= 280 | font=serif | "Section 179 deduction"
TMPL | x= 453.3 | y= 280 | font=serif | "21"
TMPL | x= 471 | y= 280 | font=serif | "Foreign taxes paid or accrued"
TMPL | x= 160.6 | y= 268.5 | font=serif | "Beginning"
TMPL | x= 251.2 | y= 268.5 | font=serif | "Ending"
DATA | x= 456.4 | y= 267.1 | font=sans-serif | "*"
DATA | x= 555.6 | y= 266.1 | font=sans-serif | "196"
TMPL | x= 57.6 | y= 254 | font=serif | "Nonrecourse"
TMPL | x= 108 | y= 254 | font=serif | "."
TMPL | x= 120 | y= 254 | font=serif | "."
TMPL | x= 134.9 | y= 254 | font=serif | "$"
DATA | x= 180.8 | y= 254.5 | font=sans-serif | "498,211"
TMPL | x= 221.3 | y= 254 | font=serif | "$"
TMPL | x= 316.5 | y= 256 | font=serif | "13"
TMPL | x= 334.2 | y= 256 | font=serif | "Other deductions"
TMPL | x= 57.6 | y= 238.4 | font=serif | "Qualified nonrecourse"
TMPL | x= 57.6 | y= 230 | font=serif | "financing"
TMPL | x= 96 | y= 230 | font=serif | "."
TMPL | x= 108 | y= 230 | font=serif | "."
TMPL | x= 120 | y= 230 | font=serif | "."
TMPL | x= 134.9 | y= 230 | font=serif | "$"
TMPL | x= 221.3 | y= 230 | font=serif | "$"
TMPL | x= 57.6 | y= 218.5 | font=serif | "Recourse"
TMPL | x= 96 | y= 218.5 | font=serif | "."
TMPL | x= 108 | y= 218.5 | font=serif | "."
TMPL | x= 120 | y= 218.5 | font=serif | "."
TMPL | x= 134.9 | y= 218.5 | font=serif | "$"
TMPL | x= 221.3 | y= 218.5 | font=serif | "$"
TMPL | x= 38.7 | y= 207 | font=serif | "K2"
TMPL | x= 57.6 | y= 207 | font=serif | "Check this box if item K1 includes liability amounts from lower-tier partnerships"
DATA | x= 294.9 | y= 205.8 | font=sans-serif | "X"
TMPL | x= 38.7 | y= 195 | font=serif | "K3"
TMPL | x= 57.6 | y= 195 | font=serif | "Check if any of the above liability is subject to guarantees or other"
TMPL | x= 57.6 | y= 186 | font=serif | "payment obligations by the partner. See instructions"
TMPL | x= 228 | y= 186 | font=serif | "."
TMPL | x= 240 | y= 186 | font=serif | "."
TMPL | x= 252 | y= 186 | font=serif | "."
TMPL | x= 264 | y= 186 | font=serif | "."
TMPL | x= 276 | y= 186 | font=serif | "."
TMPL | x= 316.2 | y= 183.1 | font=serif | "22"
TMPL | x= 345.6 | y= 183 | font=serif | "More than one activity for at-risk purposes*"
TMPL | x= 41.1 | y= 170 | font=serif | "L"
TMPL | x= 122.3 | y= 170.5 | font=serif | "PartnerΓÇÖs Capital Account Analysis"
TMPL | x= 316.2 | y= 171.1 | font=serif | "23"
TMPL | x= 345.6 | y= 171 | font=serif | "More than one activity for passive activity purposes*"
TMPL | x= 57.6 | y= 158 | font=serif | "Beginning capital account"
TMPL | x= 156 | y= 158 | font=serif | "."
TMPL | x= 168 | y= 158 | font=serif | "."
TMPL | x= 180 | y= 158 | font=serif | "."
TMPL | x= 189.5 | y= 157.6 | font=serif | "$"
DATA | x= 257.8 | y= 157.4 | font=sans-serif | "4,903,568"
TMPL | x= 316.6 | y= 158.6 | font=serif | "*See attached statement for additional information."
TMPL | x= 57.6 | y= 146 | font=serif | "Capital contributed during the year"
TMPL | x= 168 | y= 146 | font=serif | "."
TMPL | x= 180 | y= 146 | font=serif | "."
TMPL | x= 189.5 | y= 145.6 | font=serif | "$"
TMPL | x= 57.6 | y= 134 | font=serif | "Current year net income (loss)"
TMPL | x= 156 | y= 134 | font=serif | "."
TMPL | x= 168 | y= 134 | font=serif | "."
TMPL | x= 180 | y= 134 | font=serif | "."
TMPL | x= 189.5 | y= 133.6 | font=serif | "$"
DATA | x= 259.3 | y= 133.7 | font=sans-serif | "(409,811)"
TMPL | x= 57.6 | y= 122 | font=serif | "Other increase (decrease) (attach explanation)"
TMPL | x= 189.5 | y= 121.6 | font=serif | "$"
TMPL | x= 57.6 | y= 110 | font=serif | "Withdrawals and distributions"
TMPL | x= 156 | y= 110 | font=serif | "."
TMPL | x= 168 | y= 110 | font=serif | "."
TMPL | x= 180 | y= 110 | font=serif | "."
TMPL | x= 189.5 | y= 109.6 | font=serif | "$"
TMPL | x= 195.4 | y= 110.5 | font=serif | "("
DATA | x= 257.8 | y= 109.4 | font=sans-serif | "4,493,757"
TMPL | x= 300.4 | y= 110.5 | font=serif | ")"
TMPL | x= 57.6 | y= 99 | font=serif | "Ending capital account"
TMPL | x= 144 | y= 99 | font=serif | "."
TMPL | x= 156 | y= 99 | font=serif | "."
TMPL | x= 168 | y= 99 | font=serif | "."
TMPL | x= 180 | y= 99 | font=serif | "."
TMPL | x= 189.5 | y= 97.6 | font=serif | "$"
TMPL | x= 40 | y= 86 | font=serif | "M"
TMPL | x= 58.4 | y= 86 | font=serif | "Did the partner contribute property with a built-in gain (loss)?"
TMPL | x= 72 | y= 74 | font=serif | "Yes"
DATA | x= 101.2 | y= 74.2 | font=sans-serif | "X"
TMPL | x= 115.2 | y= 74 | font=serif | "No"
TMPL | x= 136.8 | y= 74 | font=serif | "If ΓÇ£Yes,ΓÇ¥ attach statement. See instructions."
TMPL | x= 40.6 | y= 62 | font=serif | "N"
TMPL | x= 70.1 | y= 62 | font=serif | "PartnerΓÇÖs Share of Net Unrecognized Section 704(c) Gain or (Loss)"
TMPL | x= 323 | y= 61.3 | font=serif | "For IRS Use Only"
TMPL | x= 57.6 | y= 51 | font=serif | "Beginning"
TMPL | x= 96 | y= 51 | font=serif | "."
TMPL | x= 108 | y= 51 | font=serif | "."
TMPL | x= 120 | y= 51 | font=serif | "."
TMPL | x= 132 | y= 51 | font=serif | "."
TMPL | x= 144 | y= 51 | font=serif | "."
TMPL | x= 156 | y= 51 | font=serif | "."
TMPL | x= 168 | y= 51 | font=serif | "."
TMPL | x= 180 | y= 51 | font=serif | "."
TMPL | x= 189.1 | y= 51 | font=serif | "$"
DATA | x= 271.5 | y= 49.7 | font=sans-serif | "(5,373)"
TMPL | x= 57.6 | y= 39 | font=serif | "Ending"
TMPL | x= 84 | y= 39 | font=serif | "."
TMPL | x= 96 | y= 39 | font=serif | "."
TMPL | x= 108 | y= 39 | font=serif | "."
TMPL | x= 120 | y= 39 | font=serif | "."
TMPL | x= 132 | y= 39 | font=serif | "."
TMPL | x= 144 | y= 39 | font=serif | "."
TMPL | x= 156 | y= 39 | font=serif | "."
TMPL | x= 168 | y= 39 | font=serif | "."
TMPL | x= 180 | y= 39 | font=serif | "."
TMPL | x= 189.1 | y= 39 | font=serif | "$"
TMPL | x= 36 | y= 26 | font=serif | "For Paperwork Reduction Act Notice, see the Instructions for Form 1065."
TMPL | x= 283.9 | y= 26 | font=serif | "www.irs.gov/Form1065"
TMPL | x= 362.7 | y= 26 | font=serif | "Cat. No. 11394R"
TMPL | x= 419.6 | y= 26 | font=serif | "Schedule K-1 (Form 1065) 2025"
TMPL | x= 524.9 | y= 26 | font=serif | "Created 2/26/25"
DATA | x= 285.6 | y= 5.5 | font=sans-serif | "Page 2 of 31"
DATA | x= 92.1 | y= 2.8 | font=sans-serif | "(409,811)"
Done.

555
libs/ui/src/lib/k-document-form/k-document-form.component.ts

@ -1,5 +1,3 @@
import type { K1Data } from '@ghostfolio/common/interfaces';
import { CommonModule } from '@angular/common';
import {
ChangeDetectionStrategy,
@ -9,123 +7,179 @@ import {
OnChanges,
Output
} from '@angular/core';
import {
FormControl,
FormGroup,
ReactiveFormsModule,
Validators
} from '@angular/forms';
import { FormsModule } from '@angular/forms';
import { MatButtonModule } from '@angular/material/button';
import { MatCheckboxModule } from '@angular/material/checkbox';
import { MatFormFieldModule } from '@angular/material/form-field';
import { MatIconModule } from '@angular/material/icon';
import { MatInputModule } from '@angular/material/input';
import { MatSelectModule } from '@angular/material/select';
import { MatTooltipModule } from '@angular/material/tooltip';
const K1_FIELD_CONFIG: {
key: keyof K1Data;
// ── Field types ──────────────────────────────────────────────────────────
type FieldType = 'currency' | 'percent' | 'text' | 'checkbox';
interface K1FieldDef {
boxNumber: string;
label: string;
section: string;
}[] = [
{
key: 'ordinaryIncome',
label: 'Ordinary Income (Box 1)',
section: 'Income'
},
{
key: 'netRentalIncome',
label: 'Net Rental Income (Box 2)',
section: 'Income'
},
{
key: 'otherRentalIncome',
label: 'Other Rental Income (Box 3)',
section: 'Income'
},
{
key: 'guaranteedPayments',
label: 'Guaranteed Payments (Box 4)',
section: 'Income'
},
{
key: 'interestIncome',
label: 'Interest Income (Box 5)',
section: 'Income'
},
{ key: 'dividends', label: 'Dividends (Box 6a)', section: 'Income' },
{
key: 'qualifiedDividends',
label: 'Qualified Dividends (Box 6b)',
section: 'Income'
},
{ key: 'royalties', label: 'Royalties (Box 7)', section: 'Income' },
{
key: 'capitalGainLossShortTerm',
label: 'Short-Term Capital Gain/Loss (Box 8)',
section: 'Capital'
},
type: FieldType;
}
interface K1Section {
title: string;
description?: string;
fields: K1FieldDef[];
collapsed?: boolean;
}
// ── Section definitions matching the real IRS Schedule K-1 ───────────────
const K1_SECTIONS: K1Section[] = [
{
key: 'capitalGainLossLongTerm',
label: 'Long-Term Capital Gain/Loss (Box 9a)',
section: 'Capital'
title: 'Header / Metadata',
fields: [
{ boxNumber: 'K1_DOCUMENT_ID', label: 'K-1 Document ID', type: 'text' },
{ boxNumber: 'TAX_YEAR', label: 'Tax Year', type: 'text' },
{ boxNumber: 'FINAL_K1', label: 'Final K-1', type: 'checkbox' },
{ boxNumber: 'AMENDED_K1', label: 'Amended K-1', type: 'checkbox' }
],
collapsed: true
},
{
key: 'unrecaptured1250Gain',
label: 'Unrecaptured Section 1250 Gain (Box 9b)',
section: 'Capital'
title: 'Part I — Partnership Information',
fields: [
{ boxNumber: 'A', label: "A — Partnership's EIN", type: 'text' },
{ boxNumber: 'B', label: "B — Partnership's name / address", type: 'text' },
{ boxNumber: 'C', label: 'C — IRS center where return filed', type: 'text' },
{ boxNumber: 'D', label: 'D — Publicly traded partnership', type: 'checkbox' }
],
collapsed: true
},
{
key: 'section1231GainLoss',
label: 'Section 1231 Gain/Loss (Box 10)',
section: 'Capital'
title: 'Part II — Partner Information',
fields: [
{ boxNumber: 'E', label: "E — Partner's identifying number", type: 'text' },
{ boxNumber: 'F', label: "F — Partner's name / address", type: 'text' },
{ boxNumber: 'G_GENERAL', label: 'G — General partner / LLC member-manager', type: 'checkbox' },
{ boxNumber: 'G_LIMITED', label: 'G — Limited partner / other LLC member', type: 'checkbox' },
{ boxNumber: 'H1_DOMESTIC', label: 'H1 — Domestic partner', type: 'checkbox' },
{ boxNumber: 'H1_FOREIGN', label: 'H1 — Foreign partner', type: 'checkbox' },
{ boxNumber: 'H2', label: 'H2 — Disregarded entity', type: 'checkbox' },
{ boxNumber: 'H2_TIN', label: 'H2 — DE taxpayer ID', type: 'text' },
{ boxNumber: 'I1', label: 'I1 — Type of entity', type: 'text' },
{ boxNumber: 'I2', label: 'I2 — IRA / SEP / Keogh', type: 'checkbox' }
],
collapsed: true
},
{ key: 'otherIncome', label: 'Other Income (Box 11)', section: 'Capital' },
{
key: 'section179Deduction',
label: 'Section 179 Deduction (Box 12)',
section: 'Deductions'
title: "Section J — Partner's Share of Profit, Loss & Capital",
fields: [
{ boxNumber: 'J_PROFIT_BEGIN', label: 'Profit — Beginning', type: 'percent' },
{ boxNumber: 'J_PROFIT_END', label: 'Profit — Ending', type: 'percent' },
{ boxNumber: 'J_LOSS_BEGIN', label: 'Loss — Beginning', type: 'percent' },
{ boxNumber: 'J_LOSS_END', label: 'Loss — Ending', type: 'percent' },
{ boxNumber: 'J_CAPITAL_BEGIN', label: 'Capital — Beginning', type: 'percent' },
{ boxNumber: 'J_CAPITAL_END', label: 'Capital — Ending', type: 'percent' },
{ boxNumber: 'J_SALE', label: 'Decrease due to sale', type: 'checkbox' },
{ boxNumber: 'J_EXCHANGE', label: 'Exchange of partnership interest', type: 'checkbox' }
]
},
{
key: 'otherDeductions',
label: 'Other Deductions (Box 13)',
section: 'Deductions'
title: "Section K — Partner's Share of Liabilities",
fields: [
{ boxNumber: 'K_NONRECOURSE_BEGIN', label: 'Nonrecourse — Beginning', type: 'currency' },
{ boxNumber: 'K_NONRECOURSE_END', label: 'Nonrecourse — Ending', type: 'currency' },
{ boxNumber: 'K_QUAL_NONRECOURSE_BEGIN', label: 'Qualified nonrecourse — Beginning', type: 'currency' },
{ boxNumber: 'K_QUAL_NONRECOURSE_END', label: 'Qualified nonrecourse — Ending', type: 'currency' },
{ boxNumber: 'K_RECOURSE_BEGIN', label: 'Recourse — Beginning', type: 'currency' },
{ boxNumber: 'K_RECOURSE_END', label: 'Recourse — Ending', type: 'currency' },
{ boxNumber: 'K2', label: 'Includes lower-tier partnership liabilities', type: 'checkbox' },
{ boxNumber: 'K3', label: 'Liability subject to guarantees', type: 'checkbox' }
]
},
{
key: 'selfEmploymentEarnings',
label: 'Self-Employment Earnings (Box 14)',
section: 'Other'
title: "Section L — Partner's Capital Account",
fields: [
{ boxNumber: 'L_BEG_CAPITAL', label: 'Beginning capital account', type: 'currency' },
{ boxNumber: 'L_CONTRIBUTED', label: 'Capital contributed during year', type: 'currency' },
{ boxNumber: 'L_CURR_YR_INCOME', label: 'Current year net income (loss)', type: 'currency' },
{ boxNumber: 'L_OTHER', label: 'Other increase (decrease)', type: 'currency' },
{ boxNumber: 'L_WITHDRAWALS', label: 'Withdrawals & distributions', type: 'currency' },
{ boxNumber: 'L_END_CAPITAL', label: 'Ending capital account', type: 'currency' }
]
},
{
key: 'foreignTaxesPaid',
label: 'Foreign Taxes Paid (Box 16)',
section: 'Other'
title: 'Sections M & N',
fields: [
{ boxNumber: 'M_YES', label: 'M — Contributed property: Yes', type: 'checkbox' },
{ boxNumber: 'M_NO', label: 'M — Contributed property: No', type: 'checkbox' },
{ boxNumber: 'N_BEGINNING', label: 'N — Net 704(c) gain/loss: Beginning', type: 'currency' },
{ boxNumber: 'N_ENDING', label: 'N — Net 704(c) gain/loss: Ending', type: 'currency' }
]
},
{
key: 'alternativeMinimumTaxItems',
label: 'AMT Items (Box 17)',
section: 'Other'
title: 'Part III — Income & Gains (Boxes 1–11)',
fields: [
{ boxNumber: '1', label: '1 — Ordinary business income (loss)', type: 'currency' },
{ boxNumber: '2', label: '2 — Net rental real estate income (loss)', type: 'currency' },
{ boxNumber: '3', label: '3 — Other net rental income (loss)', type: 'currency' },
{ boxNumber: '4', label: '4 — Guaranteed payments for services', type: 'currency' },
{ boxNumber: '4a', label: '4a — Guaranteed payments for capital', type: 'currency' },
{ boxNumber: '4b', label: '4b — Total guaranteed payments', type: 'currency' },
{ boxNumber: '5', label: '5 — Interest income', type: 'currency' },
{ boxNumber: '6a', label: '6a — Ordinary dividends', type: 'currency' },
{ boxNumber: '6b', label: '6b — Qualified dividends', type: 'currency' },
{ boxNumber: '6c', label: '6c — Dividend equivalents', type: 'currency' },
{ boxNumber: '7', label: '7 — Royalties', type: 'currency' },
{ boxNumber: '8', label: '8 — Net short-term capital gain (loss)', type: 'currency' },
{ boxNumber: '9a', label: '9a — Net long-term capital gain (loss)', type: 'currency' },
{ boxNumber: '9b', label: '9b — Collectibles (28%) gain (loss)', type: 'currency' },
{ boxNumber: '9c', label: '9c — Unrecaptured §1250 gain', type: 'currency' },
{ boxNumber: '10', label: '10 — Net §1231 gain (loss)', type: 'currency' },
{ boxNumber: '11', label: '11 — Other income (loss)', type: 'currency' }
]
},
{
key: 'distributionsCash',
label: 'Cash Distributions (Box 19a)',
section: 'Distributions'
title: 'Part III — Deductions & Credits (Boxes 12–18)',
fields: [
{ boxNumber: '12', label: '12 — §179 deduction', type: 'currency' },
{ boxNumber: '13', label: '13 — Other deductions', type: 'currency' },
{ boxNumber: '14', label: '14 — Self-employment earnings (loss)', type: 'currency' },
{ boxNumber: '15', label: '15 — Credits', type: 'currency' },
{ boxNumber: '16', label: '16 — Foreign transactions', type: 'currency' },
{ boxNumber: '16_K3', label: '16 — Schedule K-3 attached', type: 'checkbox' },
{ boxNumber: '17', label: '17 — AMT items', type: 'currency' },
{ boxNumber: '18', label: '18 — Tax-exempt income / nondeductible expenses', type: 'currency' }
]
},
{
key: 'distributionsProperty',
label: 'Property Distributions (Box 19b)',
section: 'Distributions'
title: 'Part III — Distributions & Other (Boxes 19–23)',
fields: [
{ boxNumber: '19', label: '19 — Distributions', type: 'currency' },
{ boxNumber: '19a', label: '19a — Cash & marketable securities', type: 'currency' },
{ boxNumber: '19b', label: '19b — Other property', type: 'currency' },
{ boxNumber: '20A', label: '20A — Other information: Code A', type: 'currency' },
{ boxNumber: '20B', label: '20B — Other information: Code B', type: 'currency' },
{ boxNumber: '20V', label: '20V — Other information: Code V', type: 'currency' },
{ boxNumber: '20_WILDCARD', label: '20 — Other information: Other codes', type: 'currency' },
{ boxNumber: '21', label: '21 — Foreign taxes paid or accrued', type: 'currency' },
{ boxNumber: '22', label: '22 — At-risk: more than one activity', type: 'checkbox' },
{ boxNumber: '23', label: '23 — Passive: more than one activity', type: 'checkbox' }
]
}
];
const SECTIONS = ['Income', 'Capital', 'Deductions', 'Other', 'Distributions'];
@Component({
changeDetection: ChangeDetectionStrategy.OnPush,
imports: [
CommonModule,
FormsModule,
MatButtonModule,
MatCheckboxModule,
MatFormFieldModule,
MatIconModule,
MatInputModule,
MatSelectModule,
ReactiveFormsModule
MatTooltipModule
],
selector: 'gf-k-document-form',
standalone: true,
@ -135,41 +189,166 @@ const SECTIONS = ['Income', 'Capital', 'Deductions', 'Other', 'Distributions'];
display: block;
}
.section-title {
font-size: 14px;
.form-header {
display: flex;
align-items: center;
justify-content: space-between;
flex-wrap: wrap;
gap: 12px;
margin-bottom: 20px;
}
/* Collapsible sections */
.k1-section {
margin-bottom: 12px;
border: 1px solid rgba(0, 0, 0, 0.08);
border-radius: 8px;
overflow: hidden;
}
.section-header {
display: flex;
align-items: center;
gap: 8px;
padding: 10px 16px;
background: rgba(0, 0, 0, 0.03);
cursor: pointer;
user-select: none;
font-weight: 500;
color: rgba(var(--dark-primary-text), 0.7);
margin: 16px 0 8px;
padding-bottom: 4px;
border-bottom: 1px solid rgba(var(--dark-dividers), 0.12);
font-size: 14px;
transition: background 0.15s;
}
.section-header:hover {
background: rgba(0, 0, 0, 0.06);
}
.section-header mat-icon {
font-size: 18px;
width: 18px;
height: 18px;
transition: transform 0.2s;
}
.section-header mat-icon.expanded {
transform: rotate(90deg);
}
.section-header .section-desc {
font-weight: 400;
font-size: 12px;
color: rgba(0, 0, 0, 0.5);
margin-left: auto;
}
.section-body {
padding: 12px 16px 4px;
}
/* Two-column grid */
.fields-grid {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(280px, 1fr));
gap: 0 16px;
grid-template-columns: 1fr 1fr;
gap: 2px 24px;
}
@media (max-width: 700px) {
.fields-grid {
grid-template-columns: 1fr;
}
}
/* Field rows */
.field-row {
display: flex;
align-items: center;
gap: 8px;
padding: 4px 0;
min-height: 34px;
}
.status-row {
.field-label {
flex: 1 1 auto;
font-size: 13px;
color: rgba(0, 0, 0, 0.72);
line-height: 1.3;
min-width: 0;
}
.field-input {
flex: 0 0 140px;
display: flex;
gap: 16px;
margin-bottom: 16px;
align-items: center;
}
.field-input input {
width: 100%;
box-sizing: border-box;
padding: 5px 8px;
font-size: 13px;
font-family: 'Roboto Mono', monospace;
border: 1px solid rgba(0, 0, 0, 0.18);
border-radius: 4px;
background: transparent;
outline: none;
text-align: right;
transition: border-color 0.15s;
}
.field-input input:focus {
border-color: #1976d2;
box-shadow: 0 0 0 1px #1976d2;
}
.field-input input.text-input {
text-align: left;
font-family: inherit;
}
.field-input .unit-suffix {
font-size: 12px;
color: rgba(0, 0, 0, 0.45);
margin-left: 3px;
flex-shrink: 0;
}
.field-input .unit-prefix {
font-size: 12px;
color: rgba(0, 0, 0, 0.45);
margin-right: 3px;
flex-shrink: 0;
}
.field-input input.is-zero {
color: rgba(0, 0, 0, 0.3);
}
/* Checkbox row */
.field-row-checkbox {
cursor: pointer;
}
.actions {
.field-row-checkbox .cb-label {
font-size: 13px;
color: rgba(0, 0, 0, 0.72);
}
/* Footer */
.form-footer {
display: flex;
justify-content: flex-end;
gap: 8px;
margin-top: 16px;
margin-top: 20px;
padding-top: 12px;
border-top: 1px solid rgba(0, 0, 0, 0.08);
}
`
],
template: `
<form [formGroup]="form" (ngSubmit)="onSubmit()">
<div class="status-row">
<mat-form-field>
<div class="form-header">
<mat-form-field style="min-width: 180px">
<mat-label>Filing Status</mat-label>
<mat-select formControlName="filingStatus">
<mat-select [(ngModel)]="filingStatusValue">
<mat-option value="DRAFT">Draft</mat-option>
<mat-option value="ESTIMATED">Estimated</mat-option>
<mat-option value="FINAL">Final</mat-option>
@ -177,93 +356,175 @@ const SECTIONS = ['Income', 'Capital', 'Deductions', 'Other', 'Distributions'];
</mat-form-field>
</div>
@for (section of sections; track section) {
<div class="section-title">{{ section }}</div>
@for (section of sections; track section.title) {
<div class="k1-section">
<div class="section-header" (click)="section.collapsed = !section.collapsed">
<mat-icon [class.expanded]="!section.collapsed">chevron_right</mat-icon>
<span>{{ section.title }}</span>
@if (section.description) {
<span class="section-desc">{{ section.description }}</span>
}
</div>
@if (!section.collapsed) {
<div class="section-body">
<div class="fields-grid">
@for (field of getFieldsForSection(section); track field.key) {
<mat-form-field>
<mat-label>{{ field.label }}</mat-label>
<input matInput type="number" [formControlName]="field.key" />
</mat-form-field>
@for (field of section.fields; track field.boxNumber) {
@if (field.type === 'checkbox') {
<div class="field-row field-row-checkbox">
<mat-checkbox
[checked]="isChecked(field.boxNumber)"
(change)="setCheckbox(field.boxNumber, $event.checked)">
<span class="cb-label">{{ field.label }}</span>
</mat-checkbox>
</div>
} @else if (field.type === 'text') {
<div class="field-row">
<span class="field-label">{{ field.label }}</span>
<div class="field-input">
<input class="text-input"
[value]="getTextValue(field.boxNumber)"
(input)="setTextValue(field.boxNumber, $event)"
placeholder="—" />
</div>
</div>
} @else if (field.type === 'percent') {
<div class="field-row">
<span class="field-label">{{ field.label }}</span>
<div class="field-input">
<input type="number" step="any"
[value]="getNumericDisplay(field.boxNumber)"
[class.is-zero]="isZero(field.boxNumber)"
(input)="setNumericValue(field.boxNumber, $event)"
placeholder="0" />
<span class="unit-suffix">%</span>
</div>
</div>
} @else {
<div class="field-row">
<span class="field-label">{{ field.label }}</span>
<div class="field-input">
<span class="unit-prefix">$</span>
<input type="number" step="any"
[value]="getNumericDisplay(field.boxNumber)"
[class.is-zero]="isZero(field.boxNumber)"
(input)="setNumericValue(field.boxNumber, $event)"
placeholder="0" />
</div>
</div>
}
}
</div>
</div>
}
</div>
}
<div class="actions">
<button mat-button type="button" (click)="cancelled.emit()">
Cancel
</button>
<button
color="primary"
mat-flat-button
type="submit"
[disabled]="!form.valid"
>
<div class="form-footer">
<button mat-button type="button" (click)="cancelled.emit()">Cancel</button>
<button mat-flat-button color="primary" (click)="onSubmit()">
{{ isEditMode ? 'Update' : 'Create' }}
</button>
</div>
</form>
`
})
export class GfKDocumentFormComponent implements OnChanges {
@Input() public data: K1Data | null = null;
@Input() public data: Record<string, number | string | null> | null = null;
@Input() public filingStatus: string = 'DRAFT';
@Input() public isEditMode: boolean = false;
@Output() public cancelled = new EventEmitter<void>();
@Output() public submitted = new EventEmitter<{
filingStatus: string;
data: Record<string, number>;
data: Record<string, number | string | null>;
}>();
public form: FormGroup;
public sections = SECTIONS;
public filingStatusValue = 'DRAFT';
public sections: K1Section[] = [];
/** Internal data store keyed by boxNumber */
private values: Record<string, number | string | null> = {};
public constructor() {
const controls: Record<string, FormControl> = {
filingStatus: new FormControl('DRAFT', Validators.required)
};
this.sections = K1_SECTIONS.map((s) => ({
...s,
fields: [...s.fields],
collapsed: s.collapsed ?? false
}));
}
public ngOnChanges(): void {
this.filingStatusValue = this.filingStatus || 'DRAFT';
if (this.data) {
this.values = { ...this.data };
} else {
this.values = {};
}
}
for (const field of K1_FIELD_CONFIG) {
controls[field.key] = new FormControl(0);
// ── Value accessors ────────────────────────────────────────────────────
public isChecked(boxNumber: string): boolean {
const v = this.values[boxNumber];
return v === 'true' || v === 1 || v === '1';
}
this.form = new FormGroup(controls);
public setCheckbox(boxNumber: string, checked: boolean): void {
this.values[boxNumber] = checked ? 'true' : 'false';
}
public ngOnChanges(): void {
if (this.data) {
const patchData: Record<string, unknown> = {
filingStatus: this.filingStatus
};
public getTextValue(boxNumber: string): string {
const v = this.values[boxNumber];
return v != null ? String(v) : '';
}
for (const field of K1_FIELD_CONFIG) {
patchData[field.key] = this.data[field.key] ?? 0;
public setTextValue(boxNumber: string, event: Event): void {
const input = event.target as HTMLInputElement;
this.values[boxNumber] = input.value || null;
}
this.form.patchValue(patchData);
public getNumericDisplay(boxNumber: string): string {
const v = this.values[boxNumber];
if (v == null || v === '') {
return '';
}
const n = Number(v);
return isNaN(n) ? '' : String(n);
}
public getFieldsForSection(
section: string
): { key: keyof K1Data; label: string; section: string }[] {
return K1_FIELD_CONFIG.filter((f) => f.section === section);
public isZero(boxNumber: string): boolean {
const v = this.values[boxNumber];
return v === 0 || v === '0';
}
public setNumericValue(boxNumber: string, event: Event): void {
const input = event.target as HTMLInputElement;
const raw = input.value;
if (raw === '' || raw == null) {
this.values[boxNumber] = null;
} else {
const n = parseFloat(raw);
this.values[boxNumber] = isNaN(n) ? null : n;
}
}
// ── Submit ─────────────────────────────────────────────────────────────
public onSubmit(): void {
if (this.form.valid) {
const value = this.form.value;
const data: Record<string, number> = {};
const data: Record<string, number | string | null> = {};
for (const field of K1_FIELD_CONFIG) {
data[field.key] = Number(value[field.key]) || 0;
for (const section of this.sections) {
for (const field of section.fields) {
const v = this.values[field.boxNumber];
if (v != null && v !== '') {
data[field.boxNumber] = v;
}
}
}
this.submitted.emit({
data,
filingStatus: value.filingStatus
filingStatus: this.filingStatusValue
});
}
}
}

2
prisma/schema.prisma

@ -676,7 +676,9 @@ model CellMapping {
boxNumber String
label String
description String?
cellType String @default("number")
isCustom Boolean @default(false)
isIgnored Boolean @default(false)
sortOrder Int
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt

535
specs/001-family-office-transform/research-normalized-k1-model.md

@ -0,0 +1,535 @@
# Research: Normalized Relational Model for K-1 Financial Data
**Phase 0 Output** | **Date**: 2026-03-20 | **Research Only — No Code**
---
## Context
The current system stores K-1 box data as a flat JSON blob on `KDocument.data`:
```json
{"1": 50000, "9a": -1200, "11-ZZ*": 500, "20-A": 1200}
```
Aggregations are computed on-the-fly in `k1-aggregation.service.ts` by iterating JSON keys. `CellMapping` provides label metadata, and `CellAggregationRule` defines which box keys to SUM. The system currently has ~80+ possible K-1 fields (boxes 1–21 with subtypes, Sections J/K/L/M/N, metadata fields like A–I).
The goal is to evaluate whether and how to transform this into a normalized relational model.
---
## Topic 1: Wide vs Normalized Financial Data Models
### Decision
**Move to a normalized fact table** (`K1LineItem`) for Part III financial data (boxes 1–21), but **keep a JSON metadata column** for Part I/II identity fields (A–I, J–N) that are queried infrequently.
### Rationale
The current JSON blob approach has these specific weaknesses for analytics:
**Query limitations observed in this codebase:**
1. **No SQL-level filtering or aggregation** — The `computeForKDocument()` method in `k1-aggregation.service.ts` must fetch the entire `KDocument` row, deserialize JSON, and loop through `Object.entries(data)` in application code. This means you cannot write `SELECT SUM(amount) FROM ... WHERE box_number = '1' AND tax_year BETWEEN 2020 AND 2025` — every aggregation requires fetching and deserializing all rows.
2. **No indexes on values** — Cannot index `data->'1'` effectively in PostgreSQL JSONB for range queries. While GIN indexes support containment (`@>`), they don't help with `>`, `<`, or `BETWEEN` on numeric values within the JSON.
3. **No referential integrity** — A typo like `"9A"` vs `"9a"` silently creates bad data. The current `CellMapping` table defines valid box numbers, but nothing enforces that `KDocument.data` keys match them.
4. **Cross-document aggregation is O(n) deserialization** — To compute "total ordinary income (Box 1) across all partnerships for 2025," every KDocument row matching the year must be fetched and parsed. With 50+ partnerships × 5 years, this is 250+ JSON deserializations for one number.
5. **No partial update tracking** — When a KDocument transitions from ESTIMATED → FINAL, the entire JSON blob is replaced. `previousData` preserves the old blob but provides no field-level diff.
6. **Schema evolution is invisible** — If the IRS adds a Box 6d in 2027, there's no migration — it just appears as a new JSON key. This sounds convenient but means no validation, no type checking, and no discoverability for future NL-to-SQL.
**When the wide/JSON model is acceptable:**
- Archival storage of the complete raw extraction (already served by `K1ImportSession.rawExtraction`)
- Rarely-queried metadata fields (Part I/II: partnership name, EIN, addresses)
- Configurations and user preferences (already used for `Settings.settings`)
- Fewer than ~10 documents with no cross-document queries needed
**When it breaks down (the current situation):**
- Cross-entity/cross-year aggregation (core family office use case)
- Performance analytics over time (partnership returns by year)
- Tax planning queries ("show me all partnerships with Section 1231 losses > $10K")
- Audit trail at field granularity
- LLM-generated SQL queries (LLMs cannot reliably generate JSONB path expressions)
### Alternatives Considered
| Alternative | Pros | Cons |
|---|---|---|
| **Keep JSON blob** (status quo) | No migration, flexible schema | All query limitations above; blocks analytics roadmap |
| **JSONB with generated columns** | No schema change for K-1 fields; PostgreSQL 12+ supports `GENERATED ALWAYS AS (data->>'1')::numeric` | Max ~30 generated columns practical; doesn't scale to 80+ fields; still no FK integrity |
| **Wide table with 80+ columns** | Simple queries, strong typing | Extremely sparse (most K-1s populate ~20 of 80+ boxes); ALTER TABLE for every IRS form change; NULL-heavy |
| **Normalized fact table** (chosen) | SQL aggregation, indexes, FK integrity, LLM-friendly, field-level audit trail | More JOINs; migration effort; slightly more complex insert logic |
---
## Topic 2: EAV vs Normalized Tables for Tax Document Fields
### Decision
**Use a hybrid approach**: a single EAV-style fact table (`K1LineItem`) for all Part III financial line items, combined with a reference/dimension table (`K1BoxDefinition`) that provides metadata, typing, and validation rules. Keep Part I/II identity metadata as structured JSON on the KDocument.
This is technically EAV but with strong constraints — it's closer to a **typed fact table** pattern than classic unconstrained EAV.
### Rationale
**Why EAV is appropriate here (and usually isn't):**
Classic EAV fails because it loses type safety, makes queries verbose, and resists validation. K-1 data avoids these pitfalls because:
1. **Uniform value type** — All Part III financial values (boxes 1–21) are `Decimal` amounts. Unlike generic EAV where attributes might be strings, dates, booleans, or blobs, K-1 line items are uniformly monetary amounts with a known currency. This eliminates the "value_string / value_number / value_date" anti-pattern.
2. **Closed attribute set** — The IRS defines ~50 Part III line items. This is not open-ended. The `K1BoxDefinition` reference table enumerates all valid attributes, so there's no unbounded attribute sprawl.
3. **Natural query pattern** — The primary queries are aggregations across one attribute dimension: `SUM(amount) WHERE box_key = '1'`. This is exactly what EAV is good at — pivot-style aggregation across a known set of attributes.
4. **Sparse data** — A typical K-1 populates 15–25 of ~50 possible line items. A wide table would be 50–70% NULL. The EAV/fact table stores only populated fields, which is both space-efficient and semantically clearer.
**Proposed structure (conceptual):**
```
K1BoxDefinition (reference/dimension table)
├── boxKey VARCHAR PK -- "1", "9a", "11-ZZ*", "20-A"
├── label VARCHAR -- "Ordinary business income (loss)"
├── section VARCHAR -- "PART_III", "PART_I", "SECTION_J"
├── dataType VARCHAR -- "CURRENCY", "PERCENTAGE", "BOOLEAN", "TEXT"
├── sortOrder INT
├── irsFormLine VARCHAR -- "Box 1", "Box 9a", "Section J, Line 1"
└── description TEXT
K1LineItem (fact table — one row per box per KDocument)
├── id UUID PK
├── kDocumentId UUID FK → KDocument.id
├── boxKey VARCHAR FK → K1BoxDefinition.boxKey
├── amount DECIMAL(15,2) -- financial value (null for non-monetary)
├── textValue VARCHAR -- for text/boolean fields if needed
├── sourceConfidence DECIMAL(3,2) -- 0.00–1.00, from extraction
├── sourcePageNumber INT -- PDF page where extracted
├── sourceCoordinates JSON -- {x, y, width, height} on the page
├── isUserEdited BOOLEAN -- true if user modified during verification
├── createdAt TIMESTAMP
├── updatedAt TIMESTAMP
└── @@unique([kDocumentId, boxKey])
```
**Why not separate normalized tables for each box category:**
An alternative is dedicated tables: `K1IncomeItems`, `K1DeductionItems`, `K1CreditItems`, `K1CapitalAccount`, etc. This was rejected because:
- K-1 boxes don't cleanly partition into fixed categories (Box 11 "Other income" spans multiple categories via sub-codes)
- Sub-code boxes (11-A through 11-ZZ*, 13-A through 13-ZZ*, 20-A through 20-ZZ*) have partnership-specific meaning — the same structural pattern repeats across boxes
- It would require 6–8 tables with identical column shapes, making queries harder, not easier
- The `K1BoxDefinition` reference table provides the categorical metadata without needing separate physical tables
**Treatment of Part I/II metadata fields:**
Fields like Partnership EIN (Box A), Partner name (Box F), Section J percentages, and Section L capital account data are better stored as structured JSON on `KDocument` in a `metadata` column because:
- They're queried for display, not for aggregation
- They have heterogeneous types (strings, booleans, percentages, addresses)
- They identify the document rather than representing financial facts
- There are ~30 of them, and they're almost all populated (not sparse)
### Alternatives Considered
| Alternative | Pros | Cons |
|---|---|---|
| **Pure EAV (no reference table)** | Maximum flexibility | No validation of box keys; `CellMapping` already serves this role but without FK enforcement |
| **Wide table (one column per box)** | Simple SELECTs for specific boxes | 80+ columns; 50–70% NULLs; ALTER TABLE for new boxes; poor for cross-box aggregation |
| **Separate tables per box category** | Strong typing per category | 6–8 near-identical tables; complex UNION queries; sub-code boxes don't fit cleanly |
| **Hybrid EAV + reference table** (chosen) | Uniform fact table; strong FK validation; sparse-friendly; single query pattern for aggregation; field-level provenance | Pivot queries needed for "show one K-1 as a form"; slightly more complex writes |
---
## Topic 3: Financial Fact Tables for Tax Data
### Decision
**Model K-1 line items as a financial fact table** in a star-schema-inspired design, with KDocument as the central bridge to dimension tables (Partnership, Entity, TaxYear). Monetary values stored as `DECIMAL(15,2)` with explicit currency.
### Rationale
Financial data warehouses consistently use a fact/dimension pattern for tax line items:
**Star schema mapping for K-1 data:**
```
┌──────────────┐
│ Partnership │ (dimension)
│ ────────── │
│ id, name, │
│ type, ein │
└──────┬───────┘
┌──────────────┐ ┌──────┴───────┐ ┌──────────────────┐
│ Entity │────│ KDocument │────│ K1BoxDefinition │ (dimension)
│ (dimension) │ │ (bridge) │ │ ────────────────│
│ ────────── │ │ ────────── │ │ boxKey, label, │
│ id, name, │ │ id, taxYear,│ │ section, type │
│ type, taxId │ │ status │ └──────────────────┘
└──────────────┘ └──────┬───────┘
┌──────┴───────┐
│ K1LineItem │ (FACT)
│ ────────── │
│ amount, │
│ boxKey, │
│ confidence │
└──────────────┘
```
**Best practices from financial data warehousing applied here:**
1. **Additive facts only**`K1LineItem.amount` is fully additive: you can SUM across tax years, partnerships, entities, or box types. Non-additive data (percentages, booleans, text) is stored separately in `textValue` or on the KDocument metadata.
2. **Grain = one box value per K-1 document** — Each row in `K1LineItem` represents one financial amount from one K-1 for one tax year. This is the atomic grain. Aggregation rules from `CellAggregationRule` operate on this grain.
3. **Slowly changing dimensions**`PartnershipMembership` already handles SCD Type 2 (effective dates) for ownership percentages. `K1BoxDefinition` is SCD Type 1 (overwritten on IRS form changes, with version tracking if needed).
4. **Conformed dimensions**`Partnership` and `Entity` serve as conformed dimensions shared between K-1 facts, Distribution facts, and Valuation facts. A single `Entity` dimension joins to multiple fact tables.
5. **Currency handling** — Store amounts in the source currency with a `currency` column. The KDocument inherits currency from Partnership. Conversion to reporting currency happens at query time or in materialized views, never by mutating the fact.
6. **Decimal precision**`DECIMAL(15,2)` covers amounts up to $9,999,999,999,999.99. K-1 amounts from large partnerships (PE funds, hedge funds) can reach tens of millions. 15 digits provides headroom. Use 2 decimal places to match IRS reporting precision.
**Aggregation queries enabled by this model:**
```sql
-- Total ordinary income across all partnerships for 2025
SELECT SUM(li.amount)
FROM k1_line_item li
JOIN k_document kd ON li.k_document_id = kd.id
WHERE li.box_key = '1' AND kd.tax_year = 2025;
-- Income breakdown by entity for tax year 2025
SELECT e.name, li.box_key, SUM(li.amount)
FROM k1_line_item li
JOIN k_document kd ON li.k_document_id = kd.id
JOIN partnership p ON kd.partnership_id = p.id
JOIN partnership_membership pm ON pm.partnership_id = p.id
JOIN entity e ON pm.entity_id = e.id
WHERE kd.tax_year = 2025
GROUP BY e.name, li.box_key;
-- Partnership performance: Box 1 over time
SELECT kd.tax_year, p.name, li.amount
FROM k1_line_item li
JOIN k_document kd ON li.k_document_id = kd.id
JOIN partnership p ON kd.partnership_id = p.id
WHERE li.box_key = '1'
ORDER BY kd.tax_year;
```
These queries are impossible or impractical with the current JSON blob model.
### Alternatives Considered
| Alternative | Pros | Cons |
|---|---|---|
| **Snowflake schema (more normalization)** | Normalized box categories into sub-dimensions | Over-normalized for ~50 box types; extra JOINs for no benefit |
| **Flat denormalized reporting table** | Fastest reads; no JOINs | Write complexity; data duplication; hard to keep consistent |
| **OLAP cube / column store** | Best aggregation performance | Overkill for <10K rows; adds infrastructure complexity |
| **Star-schema-inspired fact table** (chosen) | Natural fit for K-1 aggregation queries; leverages existing dimensions; PostgreSQL handles this scale trivially | Requires JOINs for full context (acceptable) |
---
## Topic 4: Source Traceability in Financial Systems
### Decision
**Store extraction provenance at the line-item grain** — each `K1LineItem` records the source page number, bounding-box coordinates, raw extracted text, confidence score, and whether it was user-edited. The `K1ImportSession` retains the complete raw extraction as an immutable JSON snapshot.
### Rationale
The audit trail must support this flow:
```
Displayed aggregated number
→ K1LineItem (individual box value)
→ KDocument (which K-1, which year, which partnership)
→ K1ImportSession (extraction record)
→ Document (source PDF file)
→ Specific page + coordinates on that page
→ Raw extracted text before parsing
```
**Granularity levels and what to store where:**
| Level | Table | Fields | Purpose |
|---|---|---|---|
| **Aggregation** | Computed at query time | SUM/formula from `CellAggregationRule` | "Where does this total come from?" → list of K1LineItems |
| **Line item** | `K1LineItem` | `amount`, `boxKey`, `sourceConfidence`, `sourcePageNumber`, `sourceCoordinates`, `rawExtractedText`, `isUserEdited` | "What exactly was extracted and from where?" |
| **Document** | `K1ImportSession` | `rawExtraction` (full JSON), `extractionMethod`, `fileName` | "What did the system originally see?" (immutable after extraction) |
| **File** | `Document` | `filePath`, `fileSize`, `mimeType` | "Where is the original PDF?" |
**Key design principles:**
1. **Immutability of raw extraction**`K1ImportSession.rawExtraction` is written once at extraction time and never modified. `verifiedData` captures user edits. This provides a complete before/after audit trail.
2. **Coordinate-level provenance** — Current `k1-positions-dump.txt` shows the parser already extracts `x, y` coordinates for each text element. Storing `sourceCoordinates: {x, y, width, height}` on each `K1LineItem` enables a future "click to highlight in PDF" feature.
3. **Confidence as first-class data** — The system already computes confidence scores (0.0–1.0) during extraction. Persisting this on the line item (not just in the import session JSON) enables queries like "show me all low-confidence values across all partnerships" and supports audit prioritization.
4. **User edit tracking**`isUserEdited: boolean` distinguishes machine-extracted values from human-verified overrides. This is critical for audit and for training future extraction models.
5. **No deletion of source data** — When a KDocument transitions from ESTIMATED → FINAL, the old line items should be soft-versioned (via `KDocument.previousData` or a separate version table), not deleted.
**What NOT to store at line-item level:**
- Full PDF binary (stay on Document/filesystem)
- Complete OCR output for the entire page (stay on K1ImportSession.rawExtraction)
- Rendering coordinates for non-K-1 text on the page (not relevant)
### Alternatives Considered
| Alternative | Pros | Cons |
|---|---|---|
| **Provenance only at document level** | Simpler; fewer columns | Cannot trace an individual number back to a specific location on a page |
| **Separate provenance table** (K1LineItemProvenance) | Clean separation of concerns | Extra JOIN for every audit query; 1:1 relationship is usually better as columns |
| **Store full page image crops per line item** | Visual proof | Massive storage; PDF coordinates + original file are sufficient for re-rendering |
| **Provenance on line item** (chosen) | Direct traceability; no extra JOINs; enables "highlight in PDF"; supports audit queries | Slightly wider rows (acceptable for <10K rows) |
---
## Topic 5: PostgreSQL Materialized Views for Financial Reporting
### Decision
**Use materialized views for cross-partnership/cross-year aggregation dashboards**, refreshed on a schedule or triggered by KDocument changes. Use regular views for single-document or single-partnership queries. Do **not** use denormalized reporting tables.
### Rationale
**When to use each approach in this system:**
| Scenario | Approach | Reason |
|---|---|---|
| "Show Box 1–21 for one K-1" | Regular query on `K1LineItem` | Small result set; no aggregation; fast enough |
| "Total income by box for one partnership across years" | Regular SQL `GROUP BY` | <20 rows × <10 years = <200 rows; trivial for PostgreSQL |
| "Dashboard: all partnerships × all entities × 5 years" | **Materialized view** | Cross-joins across dimensions; 50 partnerships × 5 entities × 5 years × 20 boxes = 25,000 aggregated values; worth pre-computing |
| "Tax planning: find partnerships with specific loss patterns" | Materialized view or indexed view | Complex filtering across many K-1s |
| "YoY change in Box 1 by partnership" | Materialized view | Window functions over multiple years |
**Proposed materialized views:**
```sql
-- MV 1: K-1 Summary by Partnership/Year
CREATE MATERIALIZED VIEW mv_k1_partnership_year_summary AS
SELECT
kd.partnership_id,
kd.tax_year,
li.box_key,
bd.label,
bd.section,
SUM(li.amount) AS total_amount,
COUNT(*) AS line_count,
kd.filing_status
FROM k1_line_item li
JOIN k_document kd ON li.k_document_id = kd.id
JOIN k1_box_definition bd ON li.box_key = bd.box_key
GROUP BY kd.partnership_id, kd.tax_year, li.box_key, bd.label, bd.section, kd.filing_status;
-- MV 2: Entity-level Income Aggregation
CREATE MATERIALIZED VIEW mv_entity_income_summary AS
SELECT
e.id AS entity_id,
e.name AS entity_name,
kd.tax_year,
li.box_key,
SUM(li.amount * pm.ownership_percent / 100) AS allocated_amount
FROM k1_line_item li
JOIN k_document kd ON li.k_document_id = kd.id
JOIN partnership_membership pm ON pm.partnership_id = kd.partnership_id
JOIN entity e ON pm.entity_id = e.id
WHERE pm.effective_date <= make_date(kd.tax_year, 12, 31)
AND (pm.end_date IS NULL OR pm.end_date > make_date(kd.tax_year, 12, 31))
GROUP BY e.id, e.name, kd.tax_year, li.box_key;
```
**Refresh strategy:**
- **Trigger-based refresh**: After any KDocument insert/update/delete or status change to FINAL, refresh affected materialized views. In NestJS, this is a `@OnEvent('k-document.changed')` handler that calls `REFRESH MATERIALIZED VIEW CONCURRENTLY`.
- **`CONCURRENTLY` keyword**: Allows reads during refresh (requires a unique index on the MV). Essential for a multi-user system.
- **Frequency**: For a family office with <100 K-1s updated per year, refresh takes <1 second. No scheduling needed event-driven refresh is sufficient.
**Why not denormalized reporting tables:**
Denormalized tables (duplicating data into a flat reporting structure) require write-time consistency management — every KDocument change must update the reporting table transactionally. This is the pattern used in high-write OLTP systems, but K-1 data is low-write (<100 writes/year) and high-read (dashboards queried many times). Materialized views handle this perfectly with zero application-level sync logic.
**Why not computed/generated columns:**
PostgreSQL generated columns cannot reference other tables. Since aggregations span KDocument → K1LineItem → Partnership → Entity, generated columns are structurally insufficient.
### Alternatives Considered
| Alternative | Pros | Cons |
|---|---|---|
| **Application-level caching** (Redis/in-memory) | No DB schema changes | Cache invalidation complexity; doesn't help SQL-based analytics |
| **Denormalized reporting tables** | Fastest reads; works at any scale | Write-time maintenance burden; consistency bugs; overkill for <10K rows |
| **Regular views** (not materialized) | Always fresh; no refresh needed | Recomputed on every query; slow for cross-entity dashboards |
| **Materialized views** (chosen) | Pre-computed; concurrent reads; event-driven refresh; zero application-level sync | Slight staleness (mitigated by event-driven refresh); requires unique indexes for CONCURRENTLY |
---
## Topic 6: Migration Strategy from JSON Blob to Normalized Tables
### Decision
**Phase the migration in 3 steps**: (1) Create new tables alongside existing JSON, (2) Dual-write to both during a transition period, (3) Make normalized tables authoritative. **Keep the JSON blob immutable as an archive** — never delete it.
### Rationale
**Step 1: Additive schema changes (zero breaking changes)**
```
Migration 1: Create K1BoxDefinition table, seed with IRS default box definitions
Migration 2: Create K1LineItem table with FK to KDocument and K1BoxDefinition
Migration 3: Backfill K1LineItem from existing KDocument.data JSON blobs
```
The backfill migration for Step 3:
```sql
-- Pseudocode: For each KDocument, iterate JSON keys and insert K1LineItems
INSERT INTO k1_line_item (id, k_document_id, box_key, amount, created_at, updated_at)
SELECT
gen_random_uuid(),
kd.id,
je.key,
(je.value)::decimal,
kd.created_at,
NOW()
FROM k_document kd,
jsonb_each(kd.data::jsonb) AS je(key, value)
WHERE jsonb_typeof(je.value) = 'number';
```
**Step 2: Dual-write transition period**
During the transition:
- `k1-import.service.ts` `confirmImport()` writes to **both** `KDocument.data` (JSON) and `K1LineItem` (rows)
- Read operations gradually migrate from JSON-based to K1LineItem-based
- `k1-aggregation.service.ts` switches from JSON iteration to `SELECT SUM` on K1LineItem
- Run validation queries comparing JSON-derived totals to K1LineItem-derived totals
**Step 3: K1LineItem becomes authoritative**
- New features (dashboards, tax planning, LLM queries) read only from K1LineItem
- `KDocument.data` is retained as immutable archive but no longer written to for new documents
- `CellAggregationRule.sourceCells` continues to work — the boxKey values are the same strings
- `CellMapping` evolves into or is replaced by `K1BoxDefinition`
**Should the old JSON be kept immutable?**
**Yes, permanently.** Reasons:
1. **Audit requirement** — The JSON blob is the original imported representation. Regulatory and audit standards require preserving source data in its original form.
2. **Rollback safety** — If the migration has bugs, the JSON blob is the recovery source.
3. **Storage is trivial** — A JSON blob with ~30 key-value pairs is <1 KB. Even 1,000 KDocuments = <1 MB total. There's no storage pressure to delete it.
4. **Import session already preserves extraction**`K1ImportSession.rawExtraction` holds the pre-verification extraction. `KDocument.data` holds the post-verification snapshot. Both should survive indefinitely.
**Backward compatibility considerations:**
- The `KDocument.data` column type stays `Json` (not nullable, not removed)
- The existing `k-document-form.component.ts` UI reads from `KDocument.data` — it continues to work during transition
- The `computeForKDocument()` aggregation service works against JSON through the transition, then switches to K1LineItem queries
- No existing API contracts change — `GET /k-documents/:id` returns the same shape
**Handling the CellMapping → K1BoxDefinition transition:**
The existing `CellMapping` table (per-partnership box definitions) maps closely to the proposed `K1BoxDefinition`. The migration strategy:
- `K1BoxDefinition` absorbs the global (partnershipId = null) CellMapping records
- Per-partnership CellMapping overrides become per-partnership `K1BoxDefinition` rows (or remain as display-layer configuration separate from the data model)
- `CellMapping` fields like `isIgnored`, `isCustom` are presentation concerns that may not belong on the data-layer `K1BoxDefinition`
### Alternatives Considered
| Alternative | Pros | Cons |
|---|---|---|
| **Big-bang migration** (drop JSON, create tables, migrate in one step) | Clean; no dual-write complexity | Risk of data loss; requires full feature freeze; hard to validate |
| **Dual-write indefinitely** | Maximum safety | Permanent write overhead; divergence risk between JSON and rows |
| **Keep JSON as authoritative, add views** | No migration of writes | Doesn't solve the core query limitation; views over JSONB are slow |
| **Phased migration with immutable archive** (chosen) | Zero-downtime; incremental validation; rollback possible; preserves audit trail | Dual-write period adds complexity (bounded to weeks, not permanent) |
---
## Topic 7: Schema Design for Future LLM NL-to-SQL
### Decision
**Design tables with self-documenting names, add PostgreSQL `COMMENT ON` annotations for every table and column, use consistent naming conventions, and avoid ambiguity between similarly-named entities.**
### Rationale
LLMs generating SQL (via text-to-SQL or NL-to-SQL) work by receiving the schema as context and mapping natural language to table/column references. The schema itself is the prompt. Research from the Spider benchmark (Yale), BIRD benchmark, and production NL-to-SQL systems (e.g., Vanna.ai, DataHerald) identifies these factors as most impactful:
**1. Naming conventions that LLMs parse correctly:**
| Current Name | Problem | Proposed Name | Why Better |
|---|---|---|---|
| `KDocument` | "K" is ambiguous to LLMs | `k1_document` | Explicitly says "K-1" |
| `KDocument.data` | "data" is the most generic possible name | `k1_document.raw_data_json` | Describes what it holds |
| `K1LineItem.amount` | Could be confused with Distribution.amount | `k1_line_item.reported_amount` | Disambiguates |
| `CellMapping` | "Cell" is a spreadsheet term, not a tax term | `k1_box_definition` | Domain-specific |
| `CellAggregationRule` | LLMs may not connect "cell" to K-1 boxes | `k1_aggregation_rule` | Clearer context |
**Naming conventions to adopt:**
- `snake_case` for all table and column names (PostgreSQL convention; LLMs trained on more snake_case SQL than camelCase)
- Prefix K-1-specific tables with `k1_` to create a namespace
- Use `_id` suffix for all foreign keys
- Avoid abbreviations (`partnership_id` not `ptnr_id`)
- Use `_at` suffix for timestamps (`created_at`, `updated_at`)
- Use descriptive names over short names (`tax_year` not `yr`, `filing_status` not `status`)
**2. PostgreSQL COMMENT annotations:**
```sql
COMMENT ON TABLE k1_line_item IS 'Individual financial line item from an IRS Schedule K-1 (Form 1065). One row per box number per K-1 document.';
COMMENT ON COLUMN k1_line_item.box_key IS 'IRS K-1 box identifier such as "1" for ordinary income, "9a" for long-term capital gains, or "20-A" for other information code A.';
COMMENT ON COLUMN k1_line_item.reported_amount IS 'Dollar amount reported on this K-1 line item, in the partnership base currency. Negative values represent losses.';
COMMENT ON TABLE k1_box_definition IS 'Reference table of IRS Schedule K-1 box definitions. Maps box identifiers to human-readable labels and categories.';
```
LLM NL-to-SQL systems extract these comments as schema context. A model asked "what is total ordinary income?" can map "ordinary income" → `k1_box_definition.label = 'Ordinary business income (loss)'``box_key = '1'` → join to `k1_line_item`.
**3. Avoiding ambiguity:**
Current pain points for LLM-generated SQL:
- `Distribution.amount` vs `K1LineItem.amount` — an LLM asked "total distributions" might query the wrong table. Solution: `k1_line_item.reported_amount` vs `distribution.distribution_amount`.
- `Partnership` has `distributions`, `kDocuments`, `valuations` — naming all FK columns `partnership_id` is correct and expected by LLMs.
- `Entity` is overloaded (database entities, legal entities). The table comment must clarify: "A legal person or structure (trust, LLC, individual) that owns assets and receives K-1 allocations."
**4. Schema metadata table for LLM context:**
Consider a lightweight `schema_metadata` table or a markdown document that provides the LLM with:
- Table relationships in natural language
- Common query patterns with examples
- Business rules ("Box 19a distributions are allocated to entities by ownership percentage")
- Valid values for enum columns
This is cheaper than fine-tuning and more maintainable than few-shot prompts.
**5. Avoid patterns that confuse LLMs:**
| Anti-pattern | Why It Confuses LLMs | Alternative |
|---|---|---|
| JSON columns for queryable data | LLMs generate `->` / `->>` operators inconsistently | Normalized columns |
| Composite primary keys | LLMs often forget one part of the key in JOINs | Surrogate UUID PK + unique constraint |
| Polymorphic FKs (one FK, multiple target tables) | LLMs can't determine which table to JOIN | Separate FK columns |
| Generic column names (`type`, `status`, `data`, `value`) | Ambiguous across tables | Prefix with table context (`filing_status`, `box_data_type`) |
| Soft deletes (`is_deleted`) | LLMs forget the `WHERE is_deleted = false` filter | Use `end_date IS NULL` pattern (already in use for memberships) |
### Alternatives Considered
| Alternative | Pros | Cons |
|---|---|---|
| **No schema changes for LLM** | No work | LLM accuracy drops significantly with ambiguous/generic names; JSONB columns are nearly unusable for NL-to-SQL |
| **Fine-tune LLM on this schema** | Can handle any naming convention | Expensive; needs retraining on every schema change; vendor lock-in |
| **RAG over schema docs** | Flexible; schema-aware | Still limited by underlying schema quality; garbage-in-garbage-out |
| **Self-documenting schema + COMMENT annotations** (chosen) | Works with any LLM; zero runtime cost; maintainable; improves human readability too | Requires discipline to maintain comments on schema changes |
---
## Summary of Decisions
| # | Topic | Decision |
|---|---|---|
| 1 | Wide vs Normalized | Normalized fact table for Part III financial data; JSON retained for Part I/II metadata |
| 2 | EAV vs Normalized | Hybrid: typed EAV fact table (`K1LineItem`) with reference dimension (`K1BoxDefinition`); uniform `DECIMAL` value type avoids classic EAV pitfalls |
| 3 | Financial fact tables | Star-schema-inspired design with `K1LineItem` as fact, `KDocument`/`Partnership`/`Entity` as dimensions |
| 4 | Source traceability | Per-line-item provenance (page, coordinates, confidence, raw text, user-edit flag); K1ImportSession.rawExtraction as immutable full extraction archive |
| 5 | Materialized views | Event-driven materialized views for cross-entity dashboards; regular queries for single-document access |
| 6 | Migration strategy | 3-phase: additive tables → dual-write → K1LineItem authoritative; JSON blob kept immutable forever |
| 7 | LLM NL-to-SQL | Self-documenting `snake_case` names, `COMMENT ON` annotations, disambiguation of similar columns, `k1_` table prefix namespace |

75
specs/005-k1-parser-fix/plan.md

@ -1,40 +1,41 @@
# Implementation Plan: Fix K-1 PDF Parser — Position-Based Extraction
**Branch**: `005-k1-parser-fix` | **Date**: 2026-03-18 | **Spec**: [spec.md](spec.md)
**Branch**: `005-k1-parser-fix` | **Date**: 2026-03-20 | **Spec**: [spec.md](spec.md)
**Input**: Feature specification from `/specs/005-k1-parser-fix/spec.md`
**Note**: This template is filled in by the `/speckit.plan` command. See `.specify/templates/plan-template.md` for the execution workflow.
## Summary
Rewrite the K-1 PDF extractor from a broken regex-based label matcher to a position-based extraction engine using pdfjs-dist. The core approach: use `page.getTextContent()` to get all text items with (x, y) coordinates and font info, discriminate data values from template text by font, then map each data value to a K-1 form field based on position regions (bounding boxes). Supports Part III boxes 1-21 with subtype codes, Part I/II metadata, sections J/K/L/M/N, and checkboxes. Unmapped values go to a fallback list for manual user assignment.
Rewrite the K-1 PDF parser from regex-based label matching to position-based text extraction using `pdfjs-dist`. The current regex parser incorrectly matches cell numbers instead of actual data values. The new parser will use font discrimination (data fonts vs template fonts) and (x,y) coordinate mapping to bounding-box regions for each K-1 form field. This fixes extraction for all Part I/II metadata, Part III boxes 1-21 (including subtypes, multi-value fields, and SEE STMT references), checkboxes, and Sections J/K/L/M/N. The existing `PdfParseExtractor` already implements position-based extraction — this spec refines its accuracy and adds confidence scoring, unmapped item handling, and dynamic font identification.
## Technical Context
**Language/Version**: TypeScript 5.x (Node.js runtime)
**Primary Dependencies**: NestJS 11.x, pdfjs-dist 5.4.x (already installed via pdf-parse), pdf-parse 2.4.x (kept for `isDigitalK1` detection)
**Storage**: PostgreSQL via Prisma ORM (existing K1ImportSession, Document tables)
**Testing**: Jest (unit tests for extraction logic, position mapping, value parsing)
**Target Platform**: Node.js server (NestJS API), Angular 21 client (existing review UI)
**Project Type**: Web service (monorepo: api + common libs)
**Performance Goals**: < 5 seconds extraction for a single-page K-1 PDF
**Constraints**: Must preserve existing `K1Extractor` interface contract; no new npm dependencies (pdfjs-dist is already transitive)
**Scale/Scope**: Single-file parser rewrite + interface expansion in common lib; ~2 files modified, ~1 new file
**Language/Version**: TypeScript 5.x, Node.js ≥22.18.0
**Primary Dependencies**: NestJS 11+, Angular 21+, pdfjs-dist (position-based text extraction), Prisma ORM
**Storage**: PostgreSQL (via Prisma), Redis (caching), filesystem (uploaded PDFs)
**Testing**: Jest (unit + integration)
**Target Platform**: Linux server (Docker) / local dev (Windows/macOS)
**Project Type**: Web application (Nx monorepo: api + client + common + ui)
**Performance Goals**: <5 seconds for single-page K-1 extraction (SC-009)
**Constraints**: Zero data loss during extraction (SC-007); preserve existing API contract (FR-025)
**Scale/Scope**: Single-user family office; ~10-50 K-1 PDFs per tax year
## Constitution Check
_GATE: Must pass before Phase 0 research. Re-check after Phase 1 design._
| Principle | Status | Notes |
|-----------|--------|-------|
| I. Nx Monorepo Structure | PASS | Changes in `apps/api` (extractor) and `libs/common` (interfaces). No new projects. |
| II. NestJS Module Pattern | PASS | PdfParseExtractor is already a `@Injectable()` provider in K1ImportModule. Rewriting internals only. |
| III. Prisma Data Layer | PASS | No schema changes. Existing tables sufficient. |
| IV. TypeScript Strict Conventions | PASS | Will follow `noUnusedLocals`, `noUnusedParameters`, path aliases. |
| V. Simplicity First | PASS | Rewriting one file, expanding one interface. No new architectural layers. |
| VI. Interface-First Design | PASS | K1ExtractedField interface expanded first, then implementation follows. |
| Gate | Rule | Status | Notes |
|------|------|--------|-------|
| Nx boundary | Features respect project boundaries (api/client/common/ui) | ✅ PASS | Parser in `@ghostfolio/api`, interfaces in `@ghostfolio/common`, UI in `@ghostfolio/client` |
| NestJS module pattern | Module + Controller + Service structure | ✅ PASS | `K1ImportModule` already exists with proper DI |
| Prisma data layer | No direct SQL; use PrismaService | ✅ PASS | All DB access via Prisma ORM |
| TypeScript strict | No unused locals/params, path aliases | ✅ PASS | Existing codebase conventions followed |
| Simplicity first | YAGNI, minimal abstractions | ✅ PASS | Modifying existing `PdfParseExtractor`, not adding new layers |
| Interface-first design | Shared interfaces in `@ghostfolio/common` | ✅ PASS | `K1ExtractionResult`, `K1ExtractedField`, `K1UnmappedItem` already defined |
| Max 3 Nx projects per feature | api + common typical | ✅ PASS | Touches api + common only (client UI already exists, no changes needed) |
No gate violations. Proceeding to Phase 0.
**All gates pass. No violations requiring justification.**
## Project Structure
@ -47,8 +48,7 @@ specs/005-k1-parser-fix/
├── data-model.md # Phase 1 output
├── quickstart.md # Phase 1 output
├── contracts/ # Phase 1 output
│ └── extraction.md # Extractor interface contract
└── tasks.md # Phase 2 output (created by /speckit.tasks)
└── tasks.md # Phase 2 output (/speckit.tasks)
```
### Source Code (repository root)
@ -56,27 +56,26 @@ specs/005-k1-parser-fix/
```text
apps/api/src/app/k1-import/
├── extractors/
│ ├── k1-extractor.interface.ts # Unchanged
│ ├── pdf-parse-extractor.ts # REWRITE: position-based extraction
│ ├── k1-position-regions.ts # NEW: bounding box definitions for K-1 form fields
│ ├── azure-extractor.ts # Unchanged
│ └── tesseract-extractor.ts # Unchanged
├── k1-import.module.ts # Unchanged
├── k1-import.service.ts # Minor: handle new subtype field in K1ExtractedField
├── k1-import.controller.ts # Unchanged
└── ...
│ ├── k1-extractor.interface.ts # K1Extractor contract (no changes)
│ ├── k1-position-regions.ts # MODIFY: refine bounding boxes, add tolerance config
│ ├── pdf-parse-extractor.ts # MODIFY: core rewrite — font discrimination, position mapping
│ ├── azure-extractor.ts # No changes (Tier 2)
│ └── tesseract-extractor.ts # No changes (Tier 2 fallback)
├── k1-import.service.ts # Minor: add warning generation for unmapped items
├── k1-import.controller.ts # No changes
├── k1-field-mapper.service.ts # Minor: handle new confidence levels
├── k1-confidence.service.ts # MODIFY: integrate position-match confidence
└── k1-import.module.ts # No changes
libs/common/src/lib/interfaces/
└── k1-import.interface.ts # MODIFY: add subtype, fieldCategory, isCheckbox to K1ExtractedField
└── k1-import.interface.ts # Minor: add fontName/position to K1UnmappedItem if needed
tests/
└── apps/api/src/app/k1-import/
└── extractors/
└── pdf-parse-extractor.spec.ts # NEW: unit tests
prisma/
└── schema.prisma # No changes (existing schema sufficient)
```
**Structure Decision**: Minimalist approach — rewrite one extractor file, add one position-region data file, expand one interface. Follows the existing module structure with no new architectural patterns.
**Structure Decision**: Existing Nx monorepo structure is used. The core change is within `apps/api/src/app/k1-import/extractors/` — specifically `pdf-parse-extractor.ts` and `k1-position-regions.ts`. No new modules, no new Nx projects.
## Complexity Tracking
No constitution violations. Table intentionally empty.
> No violations detected. All changes fit within existing module boundaries.

21
tmp-check-users.mjs

@ -0,0 +1,21 @@
import { PrismaClient } from '@prisma/client';
const p = new PrismaClient();
// Delete all data in dependency order
await p.access.deleteMany();
await p.order.deleteMany();
await p.accountBalance.deleteMany();
await p.account.deleteMany();
await p.symbolProfile.deleteMany();
await p.marketData.deleteMany();
await p.settings.deleteMany();
await p.subscription.deleteMany();
await p.authDevice.deleteMany();
await p.analytics.deleteMany();
await p.user.deleteMany();
console.log('All users deleted.');
const users = await p.user.findMany({ select: { id: true, role: true } });
console.log('USERS after delete:', JSON.stringify(users));
await p.$disconnect();

18
tools/test-k1-parse.mjs

@ -192,14 +192,16 @@ function assignItemsToRegions(items, regions) {
// 1. Checkboxes (closest-center assignment)
const checkboxRegions = K1_POSITION_REGIONS.filter(r => r.valueType === 'checkbox');
const cbAssignments = assignItemsToRegions(dataItems, checkboxRegions);
const checkedRegionIds = new Set();
for (const [region, item] of cbAssignments) {
const isChecked = ['X', '✓', '✗'].includes(item.text.toUpperCase());
if (!isChecked) continue;
checkedRegionIds.add(region.fieldId);
fields.push({
fieldId: region.fieldId,
boxNumber: region.boxNumber,
label: region.label,
rawValue: 'X',
rawValue: 'true',
numericValue: null,
fieldCategory: 'CHECKBOX',
isCheckbox: true,
@ -209,6 +211,20 @@ for (const [region, item] of cbAssignments) {
if (region.fieldId === 'FINAL_K1') metadata.isFinal = true;
if (region.fieldId === 'AMENDED_K1') metadata.isAmended = true;
}
// Emit false for unchecked checkbox regions
for (const region of checkboxRegions) {
if (checkedRegionIds.has(region.fieldId)) continue;
fields.push({
fieldId: region.fieldId,
boxNumber: region.boxNumber,
label: region.label,
rawValue: 'false',
numericValue: null,
fieldCategory: 'CHECKBOX',
isCheckbox: true,
subtype: null
});
}
// 2. Part III — subtype regions first, then simple
const partIIIRegions = K1_POSITION_REGIONS.filter(

Loading…
Cancel
Save