From 41659a9c5b197884f0a33a678d09c162b56aeb3d Mon Sep 17 00:00:00 2001 From: Robert Patch Date: Wed, 18 Mar 2026 13:20:54 -0700 Subject: [PATCH] fix: recalibrate K-1 position regions from actual PDF coordinates - Rewrote k1-position-regions.ts with all coordinates measured from actual e-filed K-1 PDF via pdfjs-dist text position extraction - Fixed all y-coordinate offsets (header, Part I/II, sections J-N, Part III) - Added missing regions: J_EXCHANGE, BOX_22, BOX_23, H2_DE_TIN - Set clean column boundaries (left values x=370-445, right code x=445-510, right values x=510-600) to prevent cross-column matching - Reordered extraction: checkboxes now run before Part III to prevent BOX_16_K3 checkbox X from being grabbed as a BOX_16 value - Fixed stale fieldId refs: D_PARTNER_EIN->E_TIN, E_NAME->F_NAME_ADDR - Removed SSL from client serve config for local HTTP development - Changed API rootUrl from https to http for dev environment - Added tools/extract-k1-positions.mjs utility for PDF coordinate dumps --- .../extractors/k1-position-regions.ts | 814 +++++++++--------- .../extractors/pdf-parse-extractor.ts | 12 +- apps/api/src/environments/environment.ts | 2 +- apps/client/project.json | 5 +- tools/extract-k1-positions.mjs | 74 ++ 5 files changed, 512 insertions(+), 395 deletions(-) create mode 100644 tools/extract-k1-positions.mjs diff --git a/apps/api/src/app/k1-import/extractors/k1-position-regions.ts b/apps/api/src/app/k1-import/extractors/k1-position-regions.ts index d0a8ee315..eae8aedf4 100644 --- a/apps/api/src/app/k1-import/extractors/k1-position-regions.ts +++ b/apps/api/src/app/k1-import/extractors/k1-position-regions.ts @@ -1,16 +1,20 @@ -/** +/** * K-1 Form Position Region Definitions * * Defines bounding box regions for all K-1 (Form 1065) form fields. * Coordinates are in PDF points (1 pt = 1/72 inch), origin bottom-left. - * Page size: 612 × 792 pts (US Letter). + * Page size: 612 x 792 pts (US Letter). + * + * Regions calibrated from actual e-filed K-1 PDF text extraction via pdfjs-dist. + * +/-15pt POSITION_TOLERANCE is applied at match time on top of the defined bounds. * - * Regions verified from actual e-filed K-1 PDF extraction. - * ±15pt tolerance recommended for matching. + * Layout reference (left & right columns): + * Left: box numbers x~316-318, labels x~334, values x~370-445 + * Right: box numbers x~453, labels x~471, code x~445-510, values x~510-600 */ export interface K1PositionRegion { - /** Unique identifier (e.g., "BOX_1", "J_PROFIT_BEGIN", "FINAL_K1") */ + /** Unique identifier (e.g., 'BOX_1', 'J_PROFIT_BEGIN', 'FINAL_K1') */ fieldId: string; /** K-1 box number for Part III fields; section identifier for others */ @@ -49,6 +53,7 @@ export interface K1PositionRegion { // ============================================================================ // Header Regions (5) +// Verified: FINAL_K1 'X' at (324.3, 746.2), TAX_YEAR '20'+'25' at (236.8/262.1, 727.7) // ============================================================================ const HEADER_REGIONS: K1PositionRegion[] = [ { @@ -57,10 +62,10 @@ const HEADER_REGIONS: K1PositionRegion[] = [ label: 'Tax Year', fieldCategory: 'METADATA', valueType: 'text', - xMin: 245, - xMax: 310, - yMin: 765, - yMax: 795, + xMin: 230, + xMax: 275, + yMin: 720, + yMax: 738, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -71,10 +76,10 @@ const HEADER_REGIONS: K1PositionRegion[] = [ label: 'Tax Year Beginning', fieldCategory: 'METADATA', valueType: 'text', - xMin: 120, + xMin: 135, xMax: 200, - yMin: 748, - yMax: 772, + yMin: 679, + yMax: 695, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -85,10 +90,10 @@ const HEADER_REGIONS: K1PositionRegion[] = [ label: 'Tax Year Ending', fieldCategory: 'METADATA', valueType: 'text', - xMin: 310, - xMax: 450, - yMin: 748, - yMax: 772, + xMin: 240, + xMax: 300, + yMin: 679, + yMax: 695, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -99,10 +104,10 @@ const HEADER_REGIONS: K1PositionRegion[] = [ label: 'Final K-1', fieldCategory: 'CHECKBOX', valueType: 'checkbox', - xMin: 309, - xMax: 340, - yMin: 731, - yMax: 761, + xMin: 318, + xMax: 338, + yMin: 739, + yMax: 753, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -113,10 +118,10 @@ const HEADER_REGIONS: K1PositionRegion[] = [ label: 'Amended K-1', fieldCategory: 'CHECKBOX', valueType: 'checkbox', - xMin: 379, - xMax: 410, - yMin: 731, - yMax: 761, + xMin: 398, + xMax: 418, + yMin: 739, + yMax: 753, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -124,61 +129,63 @@ const HEADER_REGIONS: K1PositionRegion[] = [ ]; // ============================================================================ -// Part I — Information About the Partnership (4) +// Part I -- Information About the Partnership (4) +// Verified: C_IRS_CENTER 'E-FILE' at (185.4, 553.7) +// TMPL: A at y=626, B at y=602, C at y=554.5, D at y=543 // ============================================================================ const PART_I_REGIONS: K1PositionRegion[] = [ { fieldId: 'A_EIN', - boxNumber: 'A_EIN', + boxNumber: 'A', label: "Partnership's EIN", fieldCategory: 'METADATA', valueType: 'text', - xMin: 30, - xMax: 200, - yMin: 700, - yMax: 735, + xMin: 58, + xMax: 190, + yMin: 615, + yMax: 635, hasSubtype: false, subtypeXMin: null, subtypeXMax: null }, { fieldId: 'B_NAME', - boxNumber: 'B_NAME', - label: "Partnership's Name/Address", + boxNumber: 'B', + label: "Partnership's name, address, city, state, and ZIP code", fieldCategory: 'METADATA', valueType: 'text', - xMin: 30, + xMin: 58, xMax: 290, - yMin: 650, - yMax: 705, + yMin: 570, + yMax: 613, hasSubtype: false, subtypeXMin: null, subtypeXMax: null }, { fieldId: 'C_IRS_CENTER', - boxNumber: 'C_IRS_CENTER', - label: 'IRS Center', + boxNumber: 'C', + label: 'IRS center where partnership filed return', fieldCategory: 'METADATA', valueType: 'text', - xMin: 30, + xMin: 170, xMax: 290, - yMin: 610, - yMax: 655, + yMin: 546, + yMax: 562, hasSubtype: false, subtypeXMin: null, subtypeXMax: null }, { - fieldId: 'D1_PUBLIC_TRADED', - boxNumber: 'D1_PUBLIC_TRADED', - label: 'Publicly Traded Partnership', + fieldId: 'D_PTP', + boxNumber: 'D', + label: 'Check if this is a publicly traded partnership', fieldCategory: 'CHECKBOX', valueType: 'checkbox', - xMin: 225, - xMax: 290, - yMin: 610, - yMax: 640, + xMin: 250, + xMax: 310, + yMin: 536, + yMax: 550, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -186,47 +193,37 @@ const PART_I_REGIONS: K1PositionRegion[] = [ ]; // ============================================================================ -// Part II — Information About the Partner (12) +// Part II -- Information About the Partner (10) +// Verified: G_LIMITED 'X' at (180.3, 446.6), H1_DOMESTIC 'X' at (58.0, 422.9), +// H2_DE 'X' at (57.9, 410.5) +// TMPL: E at y=518, F at y=494, G at y=447, H1 at y=422, H2 at y=411, +// I1 at y=386, I2 at y=374 // ============================================================================ const PART_II_REGIONS: K1PositionRegion[] = [ { - fieldId: 'D_PARTNER_EIN', - boxNumber: 'D_PARTNER_EIN', - label: "Partner's EIN/SSN", - fieldCategory: 'METADATA', - valueType: 'text', - xMin: 30, - xMax: 200, - yMin: 575, - yMax: 610, - hasSubtype: false, - subtypeXMin: null, - subtypeXMax: null - }, - { - fieldId: 'E_NAME', - boxNumber: 'E_NAME', - label: "Partner's Name", + fieldId: 'E_TIN', + boxNumber: 'E', + label: "Partner's identifying number", fieldCategory: 'METADATA', valueType: 'text', - xMin: 30, - xMax: 290, - yMin: 535, - yMax: 580, + xMin: 58, + xMax: 190, + yMin: 510, + yMax: 526, hasSubtype: false, subtypeXMin: null, subtypeXMax: null }, { - fieldId: 'F_ADDR', - boxNumber: 'F_ADDR', - label: "Partner's Address", + fieldId: 'F_NAME_ADDR', + boxNumber: 'F', + label: "Partner's name, address, city, state, and ZIP code", fieldCategory: 'METADATA', valueType: 'text', - xMin: 30, + xMin: 58, xMax: 290, - yMin: 490, - yMax: 540, + yMin: 460, + yMax: 510, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -234,13 +231,13 @@ const PART_II_REGIONS: K1PositionRegion[] = [ { fieldId: 'G_GENERAL', boxNumber: 'G_GENERAL', - label: 'General Partner', + label: 'General partner or LLC member-manager', fieldCategory: 'CHECKBOX', valueType: 'checkbox', - xMin: 100, - xMax: 165, - yMin: 450, - yMax: 480, + xMin: 152, + xMax: 178, + yMin: 439, + yMax: 454, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -248,13 +245,13 @@ const PART_II_REGIONS: K1PositionRegion[] = [ { fieldId: 'G_LIMITED', boxNumber: 'G_LIMITED', - label: 'Limited Partner', + label: 'Limited partner or other LLC member', fieldCategory: 'CHECKBOX', valueType: 'checkbox', - xMin: 165, - xMax: 230, - yMin: 432, - yMax: 462, + xMin: 178, + xMax: 202, + yMin: 439, + yMax: 454, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -262,97 +259,83 @@ const PART_II_REGIONS: K1PositionRegion[] = [ { fieldId: 'H1_DOMESTIC', boxNumber: 'H1_DOMESTIC', - label: 'Domestic Partner', + label: 'Domestic partner', fieldCategory: 'CHECKBOX', valueType: 'checkbox', - xMin: 43, - xMax: 110, - yMin: 408, - yMax: 438, + xMin: 50, + xMax: 70, + yMin: 416, + yMax: 430, hasSubtype: false, subtypeXMin: null, subtypeXMax: null }, { - fieldId: 'H2_FOREIGN', - boxNumber: 'H2_FOREIGN', - label: 'Foreign Partner', + fieldId: 'H1_FOREIGN', + boxNumber: 'H1_FOREIGN', + label: 'Foreign partner', fieldCategory: 'CHECKBOX', valueType: 'checkbox', - xMin: 100, - xMax: 165, - yMin: 408, - yMax: 438, + xMin: 178, + xMax: 198, + yMin: 416, + yMax: 430, hasSubtype: false, subtypeXMin: null, subtypeXMax: null }, { - fieldId: 'I1_DISREGARDED', - boxNumber: 'I1_DISREGARDED', - label: 'Disregarded Entity', - fieldCategory: 'METADATA', - valueType: 'text', - xMin: 30, - xMax: 290, - yMin: 380, - yMax: 415, - hasSubtype: false, - subtypeXMin: null, - subtypeXMax: null - }, - { - fieldId: 'I2_ENTITY_EIN', - boxNumber: 'I2_ENTITY_EIN', - label: 'Disregarded Entity EIN', - fieldCategory: 'METADATA', - valueType: 'text', - xMin: 30, - xMax: 290, - yMin: 355, - yMax: 385, + fieldId: 'H2_DE', + boxNumber: 'H2', + label: 'If the partner is a disregarded entity (DE)', + fieldCategory: 'CHECKBOX', + valueType: 'checkbox', + xMin: 50, + xMax: 70, + yMin: 404, + yMax: 418, hasSubtype: false, subtypeXMin: null, subtypeXMax: null }, { - fieldId: 'F1_PROFIT_PCT', - boxNumber: 'F1_PROFIT_PCT', - label: 'What type of entity', + fieldId: 'I1_ENTITY_TYPE', + boxNumber: 'I1', + label: 'What type of entity is this partner?', fieldCategory: 'METADATA', valueType: 'text', - xMin: 30, + xMin: 165, xMax: 290, - yMin: 455, - yMax: 495, + yMin: 379, + yMax: 393, hasSubtype: false, subtypeXMin: null, subtypeXMax: null }, { - fieldId: 'PARTNER_RETIRE', - boxNumber: 'PARTNER_RETIRE', - label: 'Partner Retirement', + fieldId: 'I2_RETIREMENT', + boxNumber: 'I2', + label: 'If this partner is a retirement plan (IRA/SEP/Keogh/etc.), check here', fieldCategory: 'CHECKBOX', valueType: 'checkbox', - xMin: 225, - xMax: 290, - yMin: 455, - yMax: 485, + xMin: 270, + xMax: 295, + yMin: 367, + yMax: 381, hasSubtype: false, subtypeXMin: null, subtypeXMax: null }, { - fieldId: 'ENTITY_TYPE', - boxNumber: 'ENTITY_TYPE', - label: 'Entity Type', + fieldId: 'H2_DE_TIN', + boxNumber: 'H2_TIN', + label: 'Disregarded entity TIN', fieldCategory: 'METADATA', valueType: 'text', - xMin: 30, - xMax: 290, - yMin: 460, - yMax: 500, + xMin: 70, + xMax: 140, + yMin: 391, + yMax: 405, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -360,8 +343,11 @@ const PART_II_REGIONS: K1PositionRegion[] = [ ]; // ============================================================================ -// Section J — Partner's Share of Profit, Loss, and Capital (7) -// Verified: J_PROFIT_BEGIN at (139.1, 339.1), J_PROFIT_END at (250.1, 339.1) +// Section J -- Partner's Share of Profit, Loss, and Capital (8) +// Verified: J_PROFIT_BEGIN '3.032900' at (139.1, 339.1), +// J_PROFIT_END '0.000000' at (250.1, 339.1), +// J_CAPITAL_BEGIN '3.032900' at (139.1, 314.2) +// TMPL: Profit at y=338, Loss at y=326, Capital at y=314, Sale/Exchange at y=290 // ============================================================================ const SECTION_J_REGIONS: K1PositionRegion[] = [ { @@ -370,10 +356,10 @@ const SECTION_J_REGIONS: K1PositionRegion[] = [ label: 'Profit Beginning %', fieldCategory: 'SECTION_J', valueType: 'percentage', - xMin: 124, - xMax: 200, - yMin: 324, - yMax: 354, + xMin: 130, + xMax: 185, + yMin: 331, + yMax: 347, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -384,10 +370,10 @@ const SECTION_J_REGIONS: K1PositionRegion[] = [ label: 'Profit Ending %', fieldCategory: 'SECTION_J', valueType: 'percentage', - xMin: 235, - xMax: 305, - yMin: 324, - yMax: 354, + xMin: 240, + xMax: 295, + yMin: 331, + yMax: 347, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -398,10 +384,10 @@ const SECTION_J_REGIONS: K1PositionRegion[] = [ label: 'Loss Beginning %', fieldCategory: 'SECTION_J', valueType: 'percentage', - xMin: 124, - xMax: 200, - yMin: 308, - yMax: 338, + xMin: 130, + xMax: 185, + yMin: 318, + yMax: 334, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -412,10 +398,10 @@ const SECTION_J_REGIONS: K1PositionRegion[] = [ label: 'Loss Ending %', fieldCategory: 'SECTION_J', valueType: 'percentage', - xMin: 235, - xMax: 305, - yMin: 308, - yMax: 338, + xMin: 240, + xMax: 295, + yMin: 318, + yMax: 334, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -426,9 +412,9 @@ const SECTION_J_REGIONS: K1PositionRegion[] = [ label: 'Capital Beginning %', fieldCategory: 'SECTION_J', valueType: 'percentage', - xMin: 124, - xMax: 200, - yMin: 292, + xMin: 130, + xMax: 185, + yMin: 306, yMax: 322, hasSubtype: false, subtypeXMin: null, @@ -440,24 +426,38 @@ const SECTION_J_REGIONS: K1PositionRegion[] = [ label: 'Capital Ending %', fieldCategory: 'SECTION_J', valueType: 'percentage', - xMin: 235, - xMax: 305, - yMin: 292, + xMin: 240, + xMax: 295, + yMin: 306, yMax: 322, hasSubtype: false, subtypeXMin: null, subtypeXMax: null }, { - fieldId: 'J_DECREASE_SALE', - boxNumber: 'J_DECREASE_SALE', - label: 'Decrease due to Sale', - fieldCategory: 'SECTION_J', + fieldId: 'J_SALE', + boxNumber: 'J_SALE', + label: 'Check if decrease is due to sale', + fieldCategory: 'CHECKBOX', valueType: 'checkbox', - xMin: 124, - xMax: 200, - yMin: 276, - yMax: 306, + xMin: 56, + xMax: 72, + yMin: 283, + yMax: 297, + hasSubtype: false, + subtypeXMin: null, + subtypeXMax: null + }, + { + fieldId: 'J_EXCHANGE', + boxNumber: 'J_EXCHANGE', + label: 'Exchange of partnership interest', + fieldCategory: 'CHECKBOX', + valueType: 'checkbox', + xMin: 100, + xMax: 116, + yMin: 283, + yMax: 297, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -465,8 +465,11 @@ const SECTION_J_REGIONS: K1PositionRegion[] = [ ]; // ============================================================================ -// Section K — Partner's Share of Liabilities (8) -// Verified: K_NONRECOURSE_BEGIN at (180.8, 254.5), K2_CHECKBOX at (294.9, 205.8) +// Section K -- Partner's Share of Liabilities (8) +// Verified: K_NONRECOURSE_BEGIN '498,211' at (180.8, 254.5), +// K2_CHECKBOX 'X' at (294.9, 205.8) +// TMPL: Nonrecourse at y=254, Qual nonrecourse at y=230-238, Recourse at y=218.5, +// K2 at y=207, K3 at y=186-195 // ============================================================================ const SECTION_K_REGIONS: K1PositionRegion[] = [ { @@ -475,10 +478,10 @@ const SECTION_K_REGIONS: K1PositionRegion[] = [ label: 'Nonrecourse Beginning', fieldCategory: 'SECTION_K', valueType: 'numeric', - xMin: 155, - xMax: 235, - yMin: 240, - yMax: 270, + xMin: 135, + xMax: 220, + yMin: 247, + yMax: 262, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -489,10 +492,10 @@ const SECTION_K_REGIONS: K1PositionRegion[] = [ label: 'Nonrecourse Ending', fieldCategory: 'SECTION_K', valueType: 'numeric', - xMin: 235, - xMax: 310, - yMin: 240, - yMax: 270, + xMin: 222, + xMax: 305, + yMin: 247, + yMax: 262, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -500,13 +503,13 @@ const SECTION_K_REGIONS: K1PositionRegion[] = [ { fieldId: 'K_QUAL_NONRECOURSE_BEGIN', boxNumber: 'K_QUAL_NONRECOURSE_BEGIN', - label: 'Qualified Nonrecourse Beginning', + label: 'Qualified nonrecourse financing Beginning', fieldCategory: 'SECTION_K', valueType: 'numeric', - xMin: 155, - xMax: 235, - yMin: 224, - yMax: 254, + xMin: 135, + xMax: 220, + yMin: 222, + yMax: 245, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -514,13 +517,13 @@ const SECTION_K_REGIONS: K1PositionRegion[] = [ { fieldId: 'K_QUAL_NONRECOURSE_END', boxNumber: 'K_QUAL_NONRECOURSE_END', - label: 'Qualified Nonrecourse Ending', + label: 'Qualified nonrecourse financing Ending', fieldCategory: 'SECTION_K', valueType: 'numeric', - xMin: 235, - xMax: 310, - yMin: 224, - yMax: 254, + xMin: 222, + xMax: 305, + yMin: 222, + yMax: 245, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -531,10 +534,10 @@ const SECTION_K_REGIONS: K1PositionRegion[] = [ label: 'Recourse Beginning', fieldCategory: 'SECTION_K', valueType: 'numeric', - xMin: 155, - xMax: 235, - yMin: 208, - yMax: 238, + xMin: 135, + xMax: 220, + yMin: 211, + yMax: 226, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -545,38 +548,38 @@ const SECTION_K_REGIONS: K1PositionRegion[] = [ label: 'Recourse Ending', fieldCategory: 'SECTION_K', valueType: 'numeric', - xMin: 235, - xMax: 310, - yMin: 208, - yMax: 238, + xMin: 222, + xMax: 305, + yMin: 211, + yMax: 226, hasSubtype: false, subtypeXMin: null, subtypeXMax: null }, { fieldId: 'K2_CHECKBOX', - boxNumber: 'K2_CHECKBOX', - label: 'K-2 Attached', + boxNumber: 'K2', + label: 'K1 includes liability amounts from lower-tier partnerships', fieldCategory: 'CHECKBOX', valueType: 'checkbox', - xMin: 280, - xMax: 310, - yMin: 191, - yMax: 221, + xMin: 288, + xMax: 305, + yMin: 198, + yMax: 213, hasSubtype: false, subtypeXMin: null, subtypeXMax: null }, { fieldId: 'K3_CHECKBOX', - boxNumber: 'K3_CHECKBOX', - label: 'K-3 Attached', + boxNumber: 'K3', + label: 'Liability subject to guarantees or payment obligations', fieldCategory: 'CHECKBOX', valueType: 'checkbox', - xMin: 280, - xMax: 310, - yMin: 176, - yMax: 206, + xMin: 278, + xMax: 305, + yMin: 179, + yMax: 193, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -584,21 +587,23 @@ const SECTION_K_REGIONS: K1PositionRegion[] = [ ]; // ============================================================================ -// Section L — Partner's Capital Account Analysis (6) -// Verified: L_BEG_CAPITAL at (257.8, 157.4), L_CURR_YR_INCOME at (259.3, 133.7), -// L_WITHDRAWALS at (257.8, 109.4) +// Section L -- Partner's Capital Account Analysis (6) +// Verified: L_BEG_CAPITAL '4,903,568' at (257.8, 157.4), +// L_CURR_YR_INCOME '(409,811)' at (259.3, 133.7), +// L_WITHDRAWALS '4,493,757' at (257.8, 109.4) +// Values right-aligned after $ at x~189.5, data at x~257-260 // ============================================================================ const SECTION_L_REGIONS: K1PositionRegion[] = [ { fieldId: 'L_BEG_CAPITAL', boxNumber: 'L_BEG_CAPITAL', - label: 'Beginning Capital Account', + label: 'Beginning capital account', fieldCategory: 'SECTION_L', valueType: 'numeric', - xMin: 220, - xMax: 306, - yMin: 142, - yMax: 172, + xMin: 190, + xMax: 305, + yMin: 150, + yMax: 165, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -606,13 +611,13 @@ const SECTION_L_REGIONS: K1PositionRegion[] = [ { fieldId: 'L_CONTRIBUTED', boxNumber: 'L_CONTRIBUTED', - label: 'Capital Contributed', + label: 'Capital contributed during the year', fieldCategory: 'SECTION_L', valueType: 'numeric', - xMin: 220, - xMax: 306, - yMin: 126, - yMax: 156, + xMin: 190, + xMax: 305, + yMin: 138, + yMax: 153, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -620,13 +625,13 @@ const SECTION_L_REGIONS: K1PositionRegion[] = [ { fieldId: 'L_CURR_YR_INCOME', boxNumber: 'L_CURR_YR_INCOME', - label: 'Current Year Net Income (Loss)', + label: 'Current year net income (loss)', fieldCategory: 'SECTION_L', valueType: 'numeric', - xMin: 220, - xMax: 306, - yMin: 119, - yMax: 149, + xMin: 190, + xMax: 305, + yMin: 126, + yMax: 141, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -634,13 +639,13 @@ const SECTION_L_REGIONS: K1PositionRegion[] = [ { fieldId: 'L_OTHER', boxNumber: 'L_OTHER', - label: 'Other Increase (Decrease)', + label: 'Other increase (decrease)', fieldCategory: 'SECTION_L', valueType: 'numeric', - xMin: 220, - xMax: 306, - yMin: 103, - yMax: 133, + xMin: 190, + xMax: 305, + yMin: 114, + yMax: 129, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -648,13 +653,13 @@ const SECTION_L_REGIONS: K1PositionRegion[] = [ { fieldId: 'L_WITHDRAWALS', boxNumber: 'L_WITHDRAWALS', - label: 'Withdrawals & Distributions', + label: 'Withdrawals and distributions', fieldCategory: 'SECTION_L', valueType: 'numeric', - xMin: 220, - xMax: 306, - yMin: 95, - yMax: 125, + xMin: 190, + xMax: 305, + yMin: 102, + yMax: 117, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -662,13 +667,13 @@ const SECTION_L_REGIONS: K1PositionRegion[] = [ { fieldId: 'L_END_CAPITAL', boxNumber: 'L_END_CAPITAL', - label: 'Ending Capital Account', + label: 'Ending capital account', fieldCategory: 'SECTION_L', valueType: 'numeric', - xMin: 220, - xMax: 306, - yMin: 83, - yMax: 113, + xMin: 190, + xMax: 305, + yMin: 90, + yMax: 106, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -676,20 +681,21 @@ const SECTION_L_REGIONS: K1PositionRegion[] = [ ]; // ============================================================================ -// Section M — Contributed Property (2) -// Verified: M_NO at (101.2, 74.2) +// Section M -- Contributed Property with Built-in Gain/Loss (2) +// Verified: M_NO 'X' at (101.2, 74.2) +// TMPL: 'Yes' at x=72, 'No' at x=115.2, both at y=74 // ============================================================================ const SECTION_M_REGIONS: K1PositionRegion[] = [ { fieldId: 'M_YES', boxNumber: 'M_YES', - label: 'Contributed Property: Yes', - fieldCategory: 'SECTION_M', + label: 'Did the partner contribute property with a built-in gain (loss)? Yes', + fieldCategory: 'CHECKBOX', valueType: 'checkbox', - xMin: 50, - xMax: 85, - yMin: 74, - yMax: 104, + xMin: 58, + xMax: 97, + yMin: 67, + yMax: 81, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -697,13 +703,13 @@ const SECTION_M_REGIONS: K1PositionRegion[] = [ { fieldId: 'M_NO', boxNumber: 'M_NO', - label: 'Contributed Property: No', - fieldCategory: 'SECTION_M', + label: 'Did the partner contribute property with a built-in gain (loss)? No', + fieldCategory: 'CHECKBOX', valueType: 'checkbox', - xMin: 86, - xMax: 120, - yMin: 59, - yMax: 89, + xMin: 97, + xMax: 130, + yMin: 67, + yMax: 81, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -711,20 +717,21 @@ const SECTION_M_REGIONS: K1PositionRegion[] = [ ]; // ============================================================================ -// Section N — Net Unrecognized 704(c) (2) -// Verified: N_BEGINNING at (271.5, 49.7), N_ENDING at (92.1, 2.8) +// Section N -- Net Unrecognized Section 704(c) Gain or (Loss) (2) +// Verified: N_BEGINNING '(5,373)' at (271.5, 49.7) +// Values right-aligned after $ at x~189.1 // ============================================================================ const SECTION_N_REGIONS: K1PositionRegion[] = [ { fieldId: 'N_BEGINNING', boxNumber: 'N_BEGINNING', - label: 'Net 704(c) Beginning', + label: "Partner's Share of Net Unrecognized Section 704(c) Gain or (Loss) Beginning", fieldCategory: 'SECTION_N', valueType: 'numeric', - xMin: 220, - xMax: 306, - yMin: 35, - yMax: 65, + xMin: 190, + xMax: 305, + yMin: 42, + yMax: 58, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -732,13 +739,13 @@ const SECTION_N_REGIONS: K1PositionRegion[] = [ { fieldId: 'N_ENDING', boxNumber: 'N_ENDING', - label: 'Net 704(c) Ending', + label: "Partner's Share of Net Unrecognized Section 704(c) Gain or (Loss) Ending", fieldCategory: 'SECTION_N', valueType: 'numeric', - xMin: 55, - xMax: 140, - yMin: 0, - yMax: 20, + xMin: 190, + xMax: 305, + yMin: 31, + yMax: 47, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -746,9 +753,14 @@ const SECTION_N_REGIONS: K1PositionRegion[] = [ ]; // ============================================================================ -// Part III — Partner's Share of Current Year Income, Deductions, Credits, etc. -// Left Column: Boxes 1-13 (19 regions including sub-boxes) -// Verified: BOX_11 at (314.2/403.9, 314.4), BOX_19 at (455.2/530.6, 422-423) +// Part III -- Left Column: Boxes 1-13 +// +// Row spacing: 24pt. Label y-positions measured from template text. +// Value column: x=370-445 (between label text and right-column boundary). +// Subtype code column (boxes 11-13): x=305-370. +// +// Verified: BOX_11 subtype 'ZZ*' at (314.2, 314.4), +// BOX_11 value '(409,615)' at (403.9, 314.4) // ============================================================================ const PART_III_LEFT_REGIONS: K1PositionRegion[] = [ { @@ -758,9 +770,9 @@ const PART_III_LEFT_REGIONS: K1PositionRegion[] = [ fieldCategory: 'PART_III', valueType: 'numeric', xMin: 370, - xMax: 455, - yMin: 683, - yMax: 713, + xMax: 445, + yMin: 696, + yMax: 720, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -772,9 +784,9 @@ const PART_III_LEFT_REGIONS: K1PositionRegion[] = [ fieldCategory: 'PART_III', valueType: 'numeric', xMin: 370, - xMax: 455, - yMin: 660, - yMax: 690, + xMax: 445, + yMin: 672, + yMax: 696, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -786,9 +798,9 @@ const PART_III_LEFT_REGIONS: K1PositionRegion[] = [ fieldCategory: 'PART_III', valueType: 'numeric', xMin: 370, - xMax: 455, - yMin: 637, - yMax: 667, + xMax: 445, + yMin: 648, + yMax: 672, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -800,9 +812,9 @@ const PART_III_LEFT_REGIONS: K1PositionRegion[] = [ fieldCategory: 'PART_III', valueType: 'numeric', xMin: 370, - xMax: 455, - yMin: 614, - yMax: 644, + xMax: 445, + yMin: 624, + yMax: 648, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -814,9 +826,9 @@ const PART_III_LEFT_REGIONS: K1PositionRegion[] = [ fieldCategory: 'PART_III', valueType: 'numeric', xMin: 370, - xMax: 455, - yMin: 591, - yMax: 621, + xMax: 445, + yMin: 600, + yMax: 624, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -828,9 +840,9 @@ const PART_III_LEFT_REGIONS: K1PositionRegion[] = [ fieldCategory: 'PART_III', valueType: 'numeric', xMin: 370, - xMax: 455, - yMin: 568, - yMax: 598, + xMax: 445, + yMin: 576, + yMax: 600, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -842,9 +854,9 @@ const PART_III_LEFT_REGIONS: K1PositionRegion[] = [ fieldCategory: 'PART_III', valueType: 'numeric', xMin: 370, - xMax: 455, - yMin: 545, - yMax: 575, + xMax: 445, + yMin: 552, + yMax: 576, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -856,8 +868,8 @@ const PART_III_LEFT_REGIONS: K1PositionRegion[] = [ fieldCategory: 'PART_III', valueType: 'numeric', xMin: 370, - xMax: 455, - yMin: 522, + xMax: 445, + yMin: 528, yMax: 552, hasSubtype: false, subtypeXMin: null, @@ -870,9 +882,9 @@ const PART_III_LEFT_REGIONS: K1PositionRegion[] = [ fieldCategory: 'PART_III', valueType: 'numeric', xMin: 370, - xMax: 455, - yMin: 499, - yMax: 529, + xMax: 445, + yMin: 504, + yMax: 528, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -884,9 +896,9 @@ const PART_III_LEFT_REGIONS: K1PositionRegion[] = [ fieldCategory: 'PART_III', valueType: 'numeric', xMin: 370, - xMax: 455, - yMin: 476, - yMax: 506, + xMax: 445, + yMin: 480, + yMax: 504, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -898,9 +910,9 @@ const PART_III_LEFT_REGIONS: K1PositionRegion[] = [ fieldCategory: 'PART_III', valueType: 'numeric', xMin: 370, - xMax: 455, - yMin: 453, - yMax: 483, + xMax: 445, + yMin: 456, + yMax: 480, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -912,9 +924,9 @@ const PART_III_LEFT_REGIONS: K1PositionRegion[] = [ fieldCategory: 'PART_III', valueType: 'numeric', xMin: 370, - xMax: 455, - yMin: 430, - yMax: 460, + xMax: 445, + yMin: 432, + yMax: 456, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -926,9 +938,9 @@ const PART_III_LEFT_REGIONS: K1PositionRegion[] = [ fieldCategory: 'PART_III', valueType: 'numeric', xMin: 370, - xMax: 455, - yMin: 407, - yMax: 437, + xMax: 445, + yMin: 408, + yMax: 432, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -940,9 +952,9 @@ const PART_III_LEFT_REGIONS: K1PositionRegion[] = [ fieldCategory: 'PART_III', valueType: 'numeric', xMin: 370, - xMax: 455, + xMax: 445, yMin: 384, - yMax: 414, + yMax: 408, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -954,9 +966,9 @@ const PART_III_LEFT_REGIONS: K1PositionRegion[] = [ fieldCategory: 'PART_III', valueType: 'numeric', xMin: 370, - xMax: 455, - yMin: 361, - yMax: 391, + xMax: 445, + yMin: 360, + yMax: 384, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -968,9 +980,9 @@ const PART_III_LEFT_REGIONS: K1PositionRegion[] = [ fieldCategory: 'PART_III', valueType: 'numeric', xMin: 370, - xMax: 455, - yMin: 338, - yMax: 368, + xMax: 445, + yMin: 336, + yMax: 360, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -982,12 +994,12 @@ const PART_III_LEFT_REGIONS: K1PositionRegion[] = [ fieldCategory: 'PART_III', valueType: 'numeric', xMin: 370, - xMax: 455, - yMin: 300, - yMax: 330, + xMax: 445, + yMin: 288, + yMax: 336, hasSubtype: true, - subtypeXMin: 300, - subtypeXMax: 365 + subtypeXMin: 305, + subtypeXMax: 370 }, { fieldId: 'BOX_12', @@ -996,12 +1008,12 @@ const PART_III_LEFT_REGIONS: K1PositionRegion[] = [ fieldCategory: 'PART_III', valueType: 'numeric', xMin: 370, - xMax: 455, - yMin: 277, - yMax: 307, + xMax: 445, + yMin: 264, + yMax: 288, hasSubtype: true, - subtypeXMin: 300, - subtypeXMax: 365 + subtypeXMin: 305, + subtypeXMax: 370 }, { fieldId: 'BOX_13', @@ -1010,19 +1022,25 @@ const PART_III_LEFT_REGIONS: K1PositionRegion[] = [ fieldCategory: 'PART_III', valueType: 'numeric', xMin: 370, - xMax: 455, - yMin: 245, - yMax: 275, + xMax: 445, + yMin: 240, + yMax: 264, hasSubtype: true, - subtypeXMin: 300, - subtypeXMax: 365 + subtypeXMin: 305, + subtypeXMax: 370 } ]; // ============================================================================ -// Part III Right Column: Boxes 14-21 (8 regions) -// Verified: BOX_16_K3 at (563.3, 603.8), BOX_19 at (455.2/530.6, 422-423), -// BOX_21 at (456.4/555.6, 266-267) +// Part III -- Right Column: Boxes 14-23 +// +// Code column: x=445-510 (just right of the left column boundary). +// Value column: x=510-600. +// +// Verified: BOX_16_K3 'X' at (563.3, 603.8), +// BOX_19 subtype 'A' at (455.2, 423.2), value '4,493,757' at (530.6, 422.0), +// BOX_20 subtypes A/B/V/* at x~455-456, values at x~525-526, +// BOX_21 subtype '*' at (456.4, 267.1), value '196' at (555.6, 266.1) // ============================================================================ const PART_III_RIGHT_REGIONS: K1PositionRegion[] = [ { @@ -1032,12 +1050,12 @@ const PART_III_RIGHT_REGIONS: K1PositionRegion[] = [ fieldCategory: 'PART_III', valueType: 'numeric', xMin: 510, - xMax: 595, - yMin: 683, - yMax: 713, + xMax: 600, + yMin: 672, + yMax: 720, hasSubtype: true, - subtypeXMin: 440, - subtypeXMax: 505 + subtypeXMin: 445, + subtypeXMax: 510 }, { fieldId: 'BOX_15', @@ -1046,37 +1064,37 @@ const PART_III_RIGHT_REGIONS: K1PositionRegion[] = [ fieldCategory: 'PART_III', valueType: 'numeric', xMin: 510, - xMax: 595, - yMin: 650, - yMax: 680, + xMax: 600, + yMin: 624, + yMax: 672, hasSubtype: true, - subtypeXMin: 440, - subtypeXMax: 505 + subtypeXMin: 445, + subtypeXMax: 510 }, { fieldId: 'BOX_16', boxNumber: '16', - label: 'Foreign transactions', + label: 'Schedule K-3 is attached if checked', fieldCategory: 'PART_III', valueType: 'numeric', xMin: 510, - xMax: 595, - yMin: 589, - yMax: 619, + xMax: 600, + yMin: 600, + yMax: 624, hasSubtype: true, - subtypeXMin: 440, - subtypeXMax: 505 + subtypeXMin: 445, + subtypeXMax: 510 }, { fieldId: 'BOX_16_K3', boxNumber: '16_K3', - label: 'Schedule K-2/K-3 attached', + label: 'Schedule K-3 is attached', fieldCategory: 'CHECKBOX', valueType: 'checkbox', - xMin: 548, - xMax: 580, - yMin: 589, - yMax: 619, + xMin: 556, + xMax: 575, + yMin: 596, + yMax: 612, hasSubtype: false, subtypeXMin: null, subtypeXMax: null @@ -1088,12 +1106,12 @@ const PART_III_RIGHT_REGIONS: K1PositionRegion[] = [ fieldCategory: 'PART_III', valueType: 'numeric', xMin: 510, - xMax: 595, - yMin: 510, - yMax: 540, + xMax: 600, + yMin: 530, + yMax: 600, hasSubtype: true, - subtypeXMin: 440, - subtypeXMax: 505 + subtypeXMin: 445, + subtypeXMax: 510 }, { fieldId: 'BOX_18', @@ -1102,12 +1120,12 @@ const PART_III_RIGHT_REGIONS: K1PositionRegion[] = [ fieldCategory: 'PART_III', valueType: 'numeric', xMin: 510, - xMax: 595, - yMin: 460, - yMax: 490, + xMax: 600, + yMin: 444, + yMax: 530, hasSubtype: true, - subtypeXMin: 440, - subtypeXMax: 505 + subtypeXMin: 445, + subtypeXMax: 510 }, { fieldId: 'BOX_19', @@ -1116,12 +1134,12 @@ const PART_III_RIGHT_REGIONS: K1PositionRegion[] = [ fieldCategory: 'PART_III', valueType: 'numeric', xMin: 510, - xMax: 595, - yMin: 395, - yMax: 445, + xMax: 600, + yMin: 396, + yMax: 444, hasSubtype: true, - subtypeXMin: 440, - subtypeXMax: 505 + subtypeXMin: 445, + subtypeXMax: 510 }, { fieldId: 'BOX_20', @@ -1130,12 +1148,12 @@ const PART_III_RIGHT_REGIONS: K1PositionRegion[] = [ fieldCategory: 'PART_III', valueType: 'numeric', xMin: 510, - xMax: 595, - yMin: 275, - yMax: 395, + xMax: 600, + yMin: 284, + yMax: 396, hasSubtype: true, - subtypeXMin: 440, - subtypeXMax: 505 + subtypeXMin: 445, + subtypeXMax: 510 }, { fieldId: 'BOX_21', @@ -1144,12 +1162,40 @@ const PART_III_RIGHT_REGIONS: K1PositionRegion[] = [ fieldCategory: 'PART_III', valueType: 'numeric', xMin: 510, - xMax: 595, - yMin: 245, - yMax: 280, + xMax: 600, + yMin: 240, + yMax: 284, hasSubtype: true, - subtypeXMin: 440, - subtypeXMax: 505 + subtypeXMin: 445, + subtypeXMax: 510 + }, + { + fieldId: 'BOX_22', + boxNumber: '22', + label: 'More than one activity for at-risk purposes', + fieldCategory: 'CHECKBOX', + valueType: 'checkbox', + xMin: 525, + xMax: 590, + yMin: 176, + yMax: 190, + hasSubtype: false, + subtypeXMin: null, + subtypeXMax: null + }, + { + fieldId: 'BOX_23', + boxNumber: '23', + label: 'More than one activity for passive activity purposes', + fieldCategory: 'CHECKBOX', + valueType: 'checkbox', + xMin: 525, + xMax: 590, + yMin: 164, + yMax: 178, + hasSubtype: false, + subtypeXMin: null, + subtypeXMax: null } ]; diff --git a/apps/api/src/app/k1-import/extractors/pdf-parse-extractor.ts b/apps/api/src/app/k1-import/extractors/pdf-parse-extractor.ts index 1d1abce0d..835c6aec3 100644 --- a/apps/api/src/app/k1-import/extractors/pdf-parse-extractor.ts +++ b/apps/api/src/app/k1-import/extractors/pdf-parse-extractor.ts @@ -127,6 +127,10 @@ export class PdfParseExtractor implements K1Extractor { const fields: K1ExtractedField[] = []; const metadata = this.initMetadata(); + // Checkboxes first — consume "X" marks before Part III so the + // BOX_16_K3 checkbox doesn't get grabbed as a BOX_16 value. + this.extractCheckboxes(dataItems, fields, metadata); + // T007-T010 (US1): Part III extraction this.extractPartIII(dataItems, fields); @@ -136,9 +140,6 @@ export class PdfParseExtractor implements K1Extractor { // T015-T018 (US3): Sections J/K/L/M/N this.extractSections(dataItems, fields); - // T019-T020 (US4): Checkbox detection - this.extractCheckboxes(dataItems, fields, metadata); - // T021 (US5): Unmapped items const unmappedItems = this.collectUnmappedItems(dataItems); @@ -543,9 +544,8 @@ export class PdfParseExtractor implements K1Extractor { this.extractTextMetadata(dataItems, 'C_IRS_CENTER', metadata, null); // Part II: Partner info - this.extractTextMetadata(dataItems, 'D_PARTNER_EIN', metadata, 'partnerEin'); - this.extractTextMetadata(dataItems, 'E_NAME', metadata, 'partnerName'); - this.extractTextMetadata(dataItems, 'F_ADDR', metadata, null); + this.extractTextMetadata(dataItems, 'E_TIN', metadata, 'partnerEin'); + this.extractTextMetadata(dataItems, 'F_NAME_ADDR', metadata, 'partnerName'); // Extract remaining metadata text fields into the fields array const metadataRegions = K1_POSITION_REGIONS.filter( diff --git a/apps/api/src/environments/environment.ts b/apps/api/src/environments/environment.ts index 054766460..0ae43ff44 100644 --- a/apps/api/src/environments/environment.ts +++ b/apps/api/src/environments/environment.ts @@ -2,6 +2,6 @@ import { DEFAULT_HOST } from '@ghostfolio/common/config'; export const environment = { production: false, - rootUrl: `https://${DEFAULT_HOST}:4200`, + rootUrl: `http://${DEFAULT_HOST}:4200`, version: 'dev' }; diff --git a/apps/client/project.json b/apps/client/project.json index 38887ca8a..269cca983 100644 --- a/apps/client/project.json +++ b/apps/client/project.json @@ -215,10 +215,7 @@ "executor": "@nx/angular:dev-server", "options": { "buildTarget": "client:build", - "proxyConfig": "apps/client/proxy.conf.json", - "ssl": true, - "sslCert": "apps/client/localhost.cert", - "sslKey": "apps/client/localhost.pem" + "proxyConfig": "apps/client/proxy.conf.json" }, "configurations": { "development-ca": { diff --git a/tools/extract-k1-positions.mjs b/tools/extract-k1-positions.mjs new file mode 100644 index 000000000..d411b4e17 --- /dev/null +++ b/tools/extract-k1-positions.mjs @@ -0,0 +1,74 @@ +/** + * Utility to extract all text items with their (x, y) positions from a K-1 PDF. + * This dumps every text item with coordinates so we can calibrate position regions. + * + * Usage: node tools/extract-k1-positions.mjs + */ +import { readFileSync } from 'fs'; +import { resolve } from 'path'; + +// Dynamic import of pdfjs-dist legacy build +const { getDocument, GlobalWorkerOptions } = await import( + 'pdfjs-dist/legacy/build/pdf.mjs' +); + +const workerPath = + 'file:///' + + resolve('node_modules/pdfjs-dist/legacy/build/pdf.worker.mjs').replace( + /\\/g, + '/' + ); +GlobalWorkerOptions.workerSrc = workerPath; + +const pdfPath = process.argv[2]; +if (!pdfPath) { + console.error('Usage: node tools/extract-k1-positions.mjs '); + process.exit(1); +} + +const buffer = readFileSync(pdfPath); +const loadingTask = getDocument({ + data: new Uint8Array(buffer), + standardFontDataUrl: resolve('node_modules/pdfjs-dist/standard_fonts') + '/', + cMapUrl: resolve('node_modules/pdfjs-dist/cmaps') + '/', + cMapPacked: true, + isEvalSupported: false, + disableFontFace: true +}); + +const pdfDoc = await loadingTask.promise; +console.log(`Pages: ${pdfDoc.numPages}`); + +for (let pageNum = 1; pageNum <= Math.min(pdfDoc.numPages, 2); pageNum++) { + console.log(`\n=== PAGE ${pageNum} ===\n`); + const page = await pdfDoc.getPage(pageNum); + const textContent = await page.getTextContent({ includeMarkedContent: false }); + + const items = textContent.items; + const styles = textContent.styles; + + // Sort by y descending (top of page first), then x ascending + const sorted = [...items].sort((a, b) => { + const dy = b.transform[5] - a.transform[5]; + if (Math.abs(dy) > 2) return dy; + return a.transform[4] - b.transform[4]; + }); + + for (const item of sorted) { + const text = item.str.trim(); + if (!text) continue; + + const x = Math.round(item.transform[4] * 10) / 10; + const y = Math.round(item.transform[5] * 10) / 10; + const style = styles[item.fontName] || {}; + const fontFamily = style.fontFamily || 'unknown'; + const isData = fontFamily.toLowerCase() !== 'serif'; + + console.log( + `${isData ? 'DATA' : 'TMPL'} | x=${String(x).padStart(7)} | y=${String(y).padStart(7)} | font=${fontFamily.padEnd(15)} | "${text}"` + ); + } +} + +await pdfDoc.destroy(); +console.log('\nDone.');