UNPKG

@flatfile/plugin-xlsx-extractor

Version:

A plugin for parsing xlsx files in Flatfile.

1 lines 40 kB
{"version":3,"sources":["../src/merged-cells.ts","../src/utils.ts","../src/parser.ts","../src/index.ts"],"names":["categorizeMergedCell","merge","isSingleRow","isSingleColumn","processMergedCells","sheet","options","processedSheet","merges","processTreatments","treatments","processVectorTreatments","vector","vectorOptions","vectorMerges","treatment","topLeftCellAddress","i","topLeftCell","applyToAllCells","applyToTopLeftCell","coalesceCells","concatenateCells","r","c","cellAddress","separator","values","cell","firstCellAddress","prependNonUniqueHeaderColumns","headers","counts","value","cleanValue","count","isNullOrWhitespace","trimTrailingEmptyCells","row","lastNonNullIndex","cascadeRowValues","rows","result","columnCount","col","lastValue","isLikelyHeaderRow","textCount","nonEmptyCount","cascadeHeaderValues","headerRows","rowsToProcess","likelyHeaderRows","rowIndex","resultRowIndex","parseBuffer","buffer","workbook","h","e","sheetName","sheetCapture","convertSheet","acc","rawNumbers","raw","headerDetectionOptions","headerSelectionEnabled","skipEmptyLines","debug","shouldCascadeRowValues","shouldCascadeHeaderValues","rowsToSearch","getHeaders","rawRows","excelColumnLetters","dataRows","headerCandidateRows","headerDetectionData","rowsToSkip","detectedHeaderRow","cleanedDetectedHeader","finalDataRows","finalHeaders","uniqueHeaders","flatfileData","index","headerName","sheetMetadata","ExcelExtractor","Extractor","excelParser","xlsxExtractorPlugin"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;AAgBA,SAASA,EACPC,CACiD,CAAA,CACjD,IAAMC,CAAcD,CAAAA,CAAAA,CAAM,EAAE,CAAMA,GAAAA,CAAAA,CAAM,EAAE,CACpCE,CAAAA,CAAAA,CAAiBF,EAAM,CAAE,CAAA,CAAA,GAAMA,EAAM,CAAE,CAAA,CAAA,CAE7C,OAAIC,CAAe,EAAA,CAACC,EACX,eACE,CAAA,CAACD,GAAeC,CAClB,CAAA,YAAA,CAEA,cAEX,CAQO,SAASC,EACdC,CACAC,CAAAA,CAAAA,CACgB,CAChB,GAAI,CAACD,EAAM,SAAS,CAAA,EAAK,CAACC,CAAS,EAAA,iBAAA,CACjC,OAAOD,CAIT,CAAA,IAAME,EAAiB,IAAK,CAAA,KAAA,CAAM,KAAK,SAAUF,CAAAA,CAAK,CAAC,CACjDG,CAAAA,CAAAA,CAAS,CAAC,GAAIH,CAAAA,CAAM,SAAS,CAAK,EAAA,EAAG,CAG3C,CAAA,OAAAI,EAAkBF,CAAgBC,CAAAA,CAAAA,CAAQF,EAAS,CACjD,YAAA,CACA,gBACF,CAAC,CAAA,CAGDG,EAAkBF,CAAgBC,CAAAA,CAAAA,CAAQF,EAAS,CAAC,aAAa,CAAC,CAGlEG,CAAAA,CAAAA,CAAkBF,EAAgBC,CAAQF,CAAAA,CAAAA,CAAS,CAAC,UAAU,CAAC,EAG/DC,CAAe,CAAA,SAAS,EAAI,EAAC,CAEtBA,CACT,CASA,SAASE,EACPJ,CACAG,CAAAA,CAAAA,CACAF,EACAI,CACM,CAAA,CAENC,EAAwBN,CAAOG,CAAAA,CAAAA,CAAQF,EAAS,cAAgBI,CAAAA,CAAU,EAG1EC,CAAwBN,CAAAA,CAAAA,CAAOG,EAAQF,CAAS,CAAA,YAAA,CAAcI,CAAU,CAGxEC,CAAAA,CAAAA,CAAwBN,EAAOG,CAAQF,CAAAA,CAAAA,CAAS,gBAAiBI,CAAU,EAC7E,CAUA,SAASC,CAAAA,CACPN,EACAG,CACAF,CAAAA,CAAAA,CACAM,EACAF,CACM,CAAA,CACN,IAAMG,CAAgBP,CAAAA,CAAAA,CAAQ,oBAAoBM,CAAM,CAAA,CACxD,GAAI,CAACC,CAAAA,EAAiB,CAACH,CAAW,CAAA,QAAA,CAASG,EAAc,SAAS,CAAA,CAChE,OAIF,IAAMC,CAAAA,CAAeN,EAAO,MACzBP,CAAAA,CAAAA,EAAUD,EAAqBC,CAAK,CAAA,GAAMW,CAC7C,CAEA,CAAA,IAAA,IAAWX,KAASa,CAAc,CAAA,CAChC,IAAMC,CAAYF,CAAAA,CAAAA,CAAc,UAG1BG,CAA0B,CAAAC,YAAA,CAAA,KAAA,CAAM,YAAY,CAChD,CAAA,CAAGhB,EAAM,CAAE,CAAA,CAAA,CACX,EAAGA,CAAM,CAAA,CAAA,CAAE,CACb,CAAC,CACKiB,CAAAA,CAAAA,CAAcb,EAAMW,CAAkB,CAAA,CAEvCE,IAKDH,CAAc,GAAA,YAAA,CAChBI,EAAgBd,CAAOJ,CAAAA,CAAAA,CAAOiB,CAAW,CAChCH,CAAAA,CAAAA,GAAc,iBACvBK,CAAmBf,CAAAA,CAAAA,CAAOJ,CAAK,CAE/Bc,CAAAA,CAAAA,GAAc,aACbH,CAAW,GAAA,YAAA,EAAgBA,IAAW,eAEvCS,CAAAA,CAAAA,CAAAA,CAAchB,EAAOJ,CAAOW,CAAAA,CAAM,EAElCG,CAAc,GAAA,aAAA,GACbH,IAAW,YAAgBA,EAAAA,CAAAA,GAAW,kBAEvCU,CAAiBjB,CAAAA,CAAAA,CAAOJ,EAAOW,CAAQC,CAAAA,CAAAA,CAAc,WAAa,GAAG,CAAA,EAEzE,CACF,CAQA,SAASM,EACPd,CACAJ,CAAAA,CAAAA,CACAiB,EACM,CACN,IAAA,IAASK,EAAItB,CAAM,CAAA,CAAA,CAAE,EAAGsB,CAAKtB,EAAAA,CAAAA,CAAM,EAAE,CAAGsB,CAAAA,CAAAA,EAAAA,CACtC,QAASC,CAAIvB,CAAAA,CAAAA,CAAM,EAAE,CAAGuB,CAAAA,CAAAA,EAAKvB,EAAM,CAAE,CAAA,CAAA,CAAGuB,IAAK,CAC3C,IAAMC,EAAmBR,YAAM,CAAA,KAAA,CAAA,WAAA,CAAY,CAAE,CAAAM,CAAAA,CAAAA,CAAG,EAAAC,CAAE,CAAC,EACnDnB,CAAMoB,CAAAA,CAAW,EAAI,CAAE,GAAGP,CAAY,EACxC,CAEJ,CAQA,SAASE,CAAAA,CACPf,CACAJ,CAAAA,CAAAA,CACM,CACN,IAAA,IAASsB,EAAItB,CAAM,CAAA,CAAA,CAAE,EAAGsB,CAAKtB,EAAAA,CAAAA,CAAM,EAAE,CAAGsB,CAAAA,CAAAA,EAAAA,CACtC,QAASC,CAAIvB,CAAAA,CAAAA,CAAM,EAAE,CAAGuB,CAAAA,CAAAA,EAAKvB,EAAM,CAAE,CAAA,CAAA,CAAGuB,IACtC,GAAID,CAAAA,GAAMtB,EAAM,CAAE,CAAA,CAAA,EAAKuB,IAAMvB,CAAM,CAAA,CAAA,CAAE,EAAG,CACtC,IAAMwB,EAAmBR,YAAM,CAAA,KAAA,CAAA,WAAA,CAAY,CAAE,CAAAM,CAAAA,CAAAA,CAAG,EAAAC,CAAE,CAAC,EACnDnB,CAAMoB,CAAAA,CAAW,EAAI,CAAE,CAAA,CAAG,IAAK,CAAG,CAAA,EAAG,EACvC,CAGN,CAQA,SAASJ,CACPhB,CAAAA,CAAAA,CACAJ,EACAW,CACM,CAAA,CACN,GAAIA,CAAW,GAAA,YAAA,CAEb,QAASW,CAAItB,CAAAA,CAAAA,CAAM,EAAE,CAAI,CAAA,CAAA,CAAGsB,GAAKtB,CAAM,CAAA,CAAA,CAAE,EAAGsB,CAC1C,EAAA,CAAA,IAAA,IAASC,EAAIvB,CAAM,CAAA,CAAA,CAAE,EAAGuB,CAAKvB,EAAAA,CAAAA,CAAM,EAAE,CAAGuB,CAAAA,CAAAA,EAAAA,CAAK,CAC3C,IAAMC,CAAAA,CAAmBR,mBAAM,WAAY,CAAA,CAAE,EAAAM,CAAG,CAAA,CAAA,CAAAC,CAAE,CAAC,CAAA,CACnD,OAAOnB,CAAAA,CAAMoB,CAAW,EAC1B,SAEOb,CAAW,GAAA,eAAA,CAEpB,QAASW,CAAItB,CAAAA,CAAAA,CAAM,EAAE,CAAGsB,CAAAA,CAAAA,EAAKtB,EAAM,CAAE,CAAA,CAAA,CAAGsB,IACtC,IAASC,IAAAA,CAAAA,CAAIvB,EAAM,CAAE,CAAA,CAAA,CAAI,EAAGuB,CAAKvB,EAAAA,CAAAA,CAAM,EAAE,CAAGuB,CAAAA,CAAAA,EAAAA,CAAK,CAC/C,IAAMC,CAAAA,CAAmBR,mBAAM,WAAY,CAAA,CAAE,EAAAM,CAAG,CAAA,CAAA,CAAAC,CAAE,CAAC,CAAA,CACnD,OAAOnB,CAAMoB,CAAAA,CAAW,EAC1B,CAGN,CASA,SAASH,CACPjB,CAAAA,CAAAA,CACAJ,EACAW,CACAc,CAAAA,CAAAA,CACM,CACN,GAAId,CAAAA,GAAW,aAEb,IAASY,IAAAA,CAAAA,CAAIvB,EAAM,CAAE,CAAA,CAAA,CAAGuB,GAAKvB,CAAM,CAAA,CAAA,CAAE,EAAGuB,CAAK,EAAA,CAAA,CAC3C,IAAMG,CAAmB,CAAA,GAGzB,IAASJ,IAAAA,CAAAA,CAAItB,EAAM,CAAE,CAAA,CAAA,CAAGsB,GAAKtB,CAAM,CAAA,CAAA,CAAE,EAAGsB,CAAK,EAAA,CAAA,CAC3C,IAAME,CAAmB,CAAAR,YAAA,CAAA,KAAA,CAAM,YAAY,CAAE,CAAA,CAAAM,EAAG,CAAAC,CAAAA,CAAE,CAAC,CAC7CI,CAAAA,CAAAA,CAAOvB,EAAMoB,CAAW,CAAA,CAE1BG,GAAQA,CAAK,CAAA,CAAA,GAAM,MAAQA,CAAK,CAAA,CAAA,GAAM,QAAaA,CAAK,CAAA,CAAA,GAAM,IAChED,CAAO,CAAA,IAAA,CAAK,OAAOC,CAAK,CAAA,CAAC,CAAC,EAE9B,CAGA,GAAID,CAAO,CAAA,MAAA,CAAS,EAAG,CACrB,IAAME,EAAwBZ,YAAM,CAAA,KAAA,CAAA,WAAA,CAAY,CAAE,CAAGhB,CAAAA,CAAAA,CAAM,EAAE,CAAG,CAAA,CAAA,CAAAuB,CAAE,CAAC,CAAA,CACnEnB,EAAMwB,CAAgB,CAAA,CAAI,CACxB,CAAG,CAAA,GAAA,CACH,EAAGF,CAAO,CAAA,IAAA,CAAKD,CAAS,CACxB,CAAA,CAAA,CAAGC,EAAO,IAAKD,CAAAA,CAAS,EACxB,CAAGC,CAAAA,CAAAA,CAAO,KAAKD,CAAS,CAAA,CACxB,EAAG,CAAMC,GAAAA,EAAAA,CAAAA,CAAO,KAAKD,CAAS,CAAC,MACjC,CAGA,CAAA,IAAA,IAASH,EAAItB,CAAM,CAAA,CAAA,CAAE,EAAI,CAAGsB,CAAAA,CAAAA,EAAKtB,EAAM,CAAE,CAAA,CAAA,CAAGsB,IAAK,CAC/C,IAAME,EAAmBR,YAAM,CAAA,KAAA,CAAA,WAAA,CAAY,CAAE,CAAAM,CAAAA,CAAAA,CAAG,EAAAC,CAAE,CAAC,EACnD,OAAOnB,CAAAA,CAAMoB,CAAW,EAC1B,CACF,CACF,CACSb,KAAAA,GAAAA,CAAAA,GAAW,eAEpB,CAAA,IAAA,IAAS,CAAIX,CAAAA,CAAAA,CAAM,EAAE,CAAG,CAAA,CAAA,EAAKA,EAAM,CAAE,CAAA,CAAA,CAAG,IAAK,CAC3C,IAAM0B,EAAmB,EAAC,CAG1B,QAASH,CAAIvB,CAAAA,CAAAA,CAAM,EAAE,CAAGuB,CAAAA,CAAAA,EAAKvB,EAAM,CAAE,CAAA,CAAA,CAAGuB,IAAK,CAC3C,IAAMC,EAAmBR,YAAM,CAAA,KAAA,CAAA,WAAA,CAAY,CAAE,CAAG,CAAA,CAAA,CAAAO,CAAE,CAAC,CAAA,CAC7CI,EAAOvB,CAAMoB,CAAAA,CAAW,EAE1BG,CAAQA,EAAAA,CAAAA,CAAK,IAAM,IAAQA,EAAAA,CAAAA,CAAK,IAAM,KAAaA,CAAAA,EAAAA,CAAAA,CAAK,IAAM,EAChED,EAAAA,CAAAA,CAAO,KAAK,MAAOC,CAAAA,CAAAA,CAAK,CAAC,CAAC,EAE9B,CAGA,GAAID,CAAAA,CAAO,OAAS,CAAG,CAAA,CACrB,IAAME,CAAwB,CAAAZ,YAAA,CAAA,KAAA,CAAM,YAAY,CAAE,CAAA,CAAG,EAAGhB,CAAM,CAAA,CAAA,CAAE,CAAE,CAAC,CAAA,CACnEI,EAAMwB,CAAgB,CAAA,CAAI,CACxB,CAAG,CAAA,GAAA,CACH,EAAGF,CAAO,CAAA,IAAA,CAAKD,CAAS,CACxB,CAAA,CAAA,CAAGC,EAAO,IAAKD,CAAAA,CAAS,EACxB,CAAGC,CAAAA,CAAAA,CAAO,KAAKD,CAAS,CAAA,CACxB,CAAG,CAAA,CAAA,GAAA,EAAMC,CAAO,CAAA,IAAA,CAAKD,CAAS,CAAC,CAAA,IAAA,CACjC,EAGA,IAASF,IAAAA,CAAAA,CAAIvB,EAAM,CAAE,CAAA,CAAA,CAAI,EAAGuB,CAAKvB,EAAAA,CAAAA,CAAM,EAAE,CAAGuB,CAAAA,CAAAA,EAAAA,CAAK,CAC/C,IAAMC,CAAAA,CAAmBR,mBAAM,WAAY,CAAA,CAAE,EAAG,CAAAO,CAAAA,CAAE,CAAC,CACnD,CAAA,OAAOnB,EAAMoB,CAAW,EAC1B,CACF,CACF,CAEJ,CCzSO,SAASK,CAAAA,CAA8BC,EAA6B,CACzE,IAAMC,EAAS,IAAI,GAAA,CAEnB,OAAOD,CAAQ,CAAA,GAAA,CAAKE,GAAU,CAC5B,IAAMC,GAAcD,CAAS,EAAA,OAAA,EAAS,QAAQ,GAAK,CAAA,EAAE,EAC/CE,CAAQH,CAAAA,CAAAA,CAAO,IAAIE,CAAU,CAAA,EAAK,EACxC,OAAAF,CAAAA,CAAO,IAAIE,CAAYC,CAAAA,CAAAA,CAAQ,CAAC,CAEzBA,CAAAA,CAAAA,CAAQ,GAAGD,CAAU,CAAA,CAAA,EAAIC,CAAK,CAAKD,CAAAA,CAAAA,CAC5C,CAAC,CACH,CAEO,IAAME,CAAsBH,CAAAA,CAAAA,EACjCA,IAAU,IAAS,EAAA,OAAOA,GAAU,QAAYA,EAAAA,CAAAA,CAAM,MAAW,GAAA,EAAA,CAEtDI,EAA0BC,CAA4B,EAAA,CACjE,IAAIC,CAAAA,CAAmB,CACvB,CAAA,IAAA,IAAStB,EAAI,CAAGA,CAAAA,CAAAA,CAAIqB,EAAI,MAAQrB,CAAAA,CAAAA,EAAAA,CACzBmB,EAAmBE,CAAIrB,CAAAA,CAAC,CAAC,CAC5BsB,GAAAA,CAAAA,CAAmBtB,GAGvB,OAAOqB,CAAAA,CAAI,MAAM,CAAGC,CAAAA,CAAAA,CAAmB,CAAC,CAC1C,CAAA,CAOaC,EAAoBC,CAA2B,EAAA,CAC1D,GAAI,CAACA,CAAAA,EAAQA,EAAK,MAAW,GAAA,CAAA,CAAG,OAAOA,CAEvC,CAAA,IAAMC,EAAS,CAAC,GAAGD,CAAI,CACjBE,CAAAA,CAAAA,CAAc,KAAK,GAAI,CAAA,GAAGF,EAAK,GAAKH,CAAAA,CAAAA,EAAQA,EAAI,MAAM,CAAC,EAG7D,IAASM,IAAAA,CAAAA,CAAM,EAAGA,CAAMD,CAAAA,CAAAA,CAAaC,IAAO,CAC1C,IAAIC,EAAY,IAGhB,CAAA,IAAA,IAASP,EAAM,CAAGA,CAAAA,CAAAA,CAAMI,EAAO,MAAQJ,CAAAA,CAAAA,EAAAA,CAAO,CAE5C,GAAII,CAAAA,CAAOJ,CAAG,CAAE,CAAA,KAAA,CAAMF,CAAkB,CAAG,CAAA,CACzCS,EAAY,IACZ,CAAA,QACF,CAGIT,CAAmBM,CAAAA,CAAAA,CAAOJ,CAAG,CAAEM,CAAAA,CAAG,CAAC,CAAKC,EAAAA,CAAAA,GAAc,KACxDH,CAAOJ,CAAAA,CAAG,EAAEM,CAAG,CAAA,CAAIC,EAGXT,CAAmBM,CAAAA,CAAAA,CAAOJ,CAAG,CAAEM,CAAAA,CAAG,CAAC,CAC3CC,GAAAA,CAAAA,CAAYH,EAAOJ,CAAG,CAAA,CAAEM,CAAG,CAE/B,EAAA,CACF,CAEA,OAAOF,CACT,EAOaI,CAAqBR,CAAAA,CAAAA,EAAwB,CACxD,GAAI,CAACA,GAAOA,CAAI,CAAA,MAAA,GAAW,EAAG,OAAO,CAAA,CAAA,CAGhBA,CAAAA,CAAI,OACtBV,CACC,EAAA,OAAOA,GAAS,QACf,EAAA,OAAOA,GAAS,QAAY,EAAA,CAAC,MAAM,MAAOA,CAAAA,CAAI,CAAC,CAAKA,EAAAA,CAAAA,CAAK,MAAW,GAAA,EACzE,EAAE,MAGImB,KAAAA,CAAAA,CAAYT,EAAI,MACnBV,CAAAA,CAAAA,EACC,OAAOA,CAAS,EAAA,QAAA,EAAYA,EAAK,IAAK,EAAA,GAAM,IAAM,KAAM,CAAA,MAAA,CAAOA,CAAI,CAAC,CACxE,EAAE,MAGIoB,CAAAA,CAAAA,CAAgBV,EAAI,MAAQV,CAAAA,CAAAA,EAAS,CAACQ,CAAmBR,CAAAA,CAAI,CAAC,CAAE,CAAA,OAGtE,OAAIoB,CAAgB,CAAA,CAAA,CAAU,GAGvBA,CAAgB,CAAA,CAAA,EAAKD,EAAYC,CAAiB,EAAA,EAC3D,EAOaC,CAAuBC,CAAAA,CAAAA,EAAiC,CACnE,GAAI,CAACA,CAAcA,EAAAA,CAAAA,CAAW,MAAW,GAAA,CAAA,CAAG,OAAOA,CAGnD,CAAA,IAAMC,EAAgBD,CAChBR,CAAAA,CAAAA,CAAS,CAAC,GAAGS,CAAa,EAG1BC,CAAmBD,CAAAA,CAAAA,CAAc,OAAOL,CAAiB,CAAA,CAG/D,GAAIM,CAAiB,CAAA,MAAA,GAAW,EAAG,OAAOD,CAAAA,CAG1C,QAASE,CAAW,CAAA,CAAA,CAAGA,EAAWD,CAAiB,CAAA,MAAA,CAAQC,IAAY,CACrE,IAAIR,EAAY,IACVP,CAAAA,CAAAA,CAAMc,EAAiBC,CAAQ,CAAA,CAGrC,QAAST,CAAM,CAAA,CAAA,CAAGA,EAAMN,CAAI,CAAA,MAAA,CAAQM,IAAO,CAEzC,GAAIR,EAAmBE,CAAIM,CAAAA,CAAG,CAAC,CAAKC,EAAAA,CAAAA,GAAc,KAAM,CAEtD,IAAMS,EAAiBH,CAAc,CAAA,SAAA,CAAW5B,GAAMA,CAAMe,GAAAA,CAAG,EAC3DgB,CAAkB,EAAA,CAAA,GACpBZ,EAAOY,CAAc,CAAA,CAAEV,CAAG,CAAIC,CAAAA,CAAAA,EAElC,MAEUT,CAAmBE,CAAAA,CAAAA,CAAIM,CAAG,CAAC,CAAA,GACnCC,EAAYP,CAAIM,CAAAA,CAAG,GAGCO,CAAc,CAAA,KAAA,CACjC5B,GAAMqB,CAAOrB,EAAAA,CAAAA,CAAE,QAAUa,CAAmBb,CAAAA,CAAAA,CAAEqB,CAAG,CAAC,CACrD,IAEEC,CAAY,CAAA,IAAA,EAEhB,CACF,CAEA,OAAOH,CACT,EClHA,eAAsBa,CAAAA,CACpBC,EACAlD,CAC0B,CAAA,CAC1B,IAAImD,CACJ,CAAA,GAAI,CACFA,CAAgB,CAAAC,YAAA,CAAA,IAAA,CAAKF,EAAQ,CAC3B,IAAA,CAAM,SACN,SAAW,CAAA,CAAA,CAAA,CACX,MAAO,CACP,CAAA,CAAA,MAAA,CAAQlD,GAAS,MAAU,EAAA,KAAA,CAAA,CAG3B,IAAK,CACP,CAAA,CAAC,EACH,CAASqD,MAAAA,CAAAA,CAAG,CAIV,GAAIA,CAAAA,CAAE,OAAS,qBACb,CAAA,MAAIrD,GAAS,KACX,EAAA,OAAA,CAAQ,IACN,8DACF,CAAA,CAEI,IAAI,KAAM,CAAA,iCAAiC,EAInDmD,CAAgB,CAAAC,YAAA,CAAA,IAAA,CAAKF,EAAQ,CAC3B,IAAA,CAAM,SACN,SAAW,CAAA,CAAA,CAAA,CACX,MAAO,CACP,CAAA,CAAA,MAAA,CAAQlD,GAAS,MAAU,EAAA,KAAA,CAC7B,CAAC,EACH,CAGA,GAAIA,CAAS,EAAA,iBAAA,CACX,QAAWsD,CAAaH,IAAAA,CAAAA,CAAS,WAAY,CAC3C,IAAMpD,EAAQoD,CAAS,CAAA,MAAA,CAAOG,CAAS,CACvCH,CAAAA,CAAAA,CAAS,OAAOG,CAAS,CAAA,CAAIxD,EAAmBC,CAAOC,CAAAA,CAAO,EAChE,CA4BF,OAAA,CAxBwB,MAAM,OAAQ,CAAA,GAAA,CACpCmD,EAAS,UAAW,CAAA,GAAA,CAAI,MAAOG,CAAAA,EAAc,CAC3C,IAAMvD,EAAQoD,CAAS,CAAA,MAAA,CAAOG,CAAS,CACjCC,CAAAA,CAAAA,CAAe,MAAMC,CAAa,CAAA,CACtC,MAAAzD,CACA,CAAA,SAAA,CAAAuD,EACA,UAAYtD,CAAAA,CAAAA,EAAS,YAAc,CACnC,CAAA,CAAA,GAAA,CAAKA,GAAS,GAAO,EAAA,CAAA,CAAA,CACrB,uBAAwBA,CAAS,EAAA,sBAAA,EAA0B,CACzD,SAAW,CAAA,SACb,EACA,sBAAwBA,CAAAA,CAAAA,EAAS,wBAA0B,CAC3D,CAAA,CAAA,cAAA,CAAgBA,GAAS,cAAkB,EAAA,CAAA,CAAA,CAC3C,MAAOA,CAAS,EAAA,KAAA,CAChB,iBAAkBA,CAAS,EAAA,gBAAA,CAC3B,aAAcA,CAAS,EAAA,YAAA,EAAgB,GACvC,mBAAqBA,CAAAA,CAAAA,EAAS,oBAC9B,UAAYA,CAAAA,CAAAA,EAAS,UACvB,CAAC,CAAA,CACD,OAAO,CAACsD,CAAAA,CAAWC,CAAY,CACjC,CAAC,CACH,CAGuB,EAAA,MAAA,CAAO,CAACE,CAAK,CAAA,CAACH,EAAWC,CAAY,CAAA,IACtDA,IACFE,CAAIH,CAAAA,CAAmB,EAAIC,CAEtBE,CAAAA,CAAAA,CAAAA,CAAAA,CACN,EAAqB,CAC1B,CAsBA,eAAeD,CAAAA,CAAa,CAC1B,KAAAzD,CAAAA,CAAAA,CACA,UAAAuD,CACA,CAAA,UAAA,CAAAI,EACA,GAAAC,CAAAA,CAAAA,CACA,uBAAAC,CACA,CAAA,sBAAA,CAAAC,EACA,cAAAC,CAAAA,CAAAA,CACA,MAAAC,CACA,CAAA,gBAAA,CAAkBC,EAClB,mBAAqBC,CAAAA,CAAAA,CACrB,aAAAC,CACA,CAAA,UAAA,CAAAC,CACF,CAAwD,CAAA,CAEtD,IAAMC,CAAe,CAAAhB,YAAA,CAAA,KAAA,CAAM,cAAmCrD,CAAO,CAAA,CACnE,OAAQ,GACR,CAAA,MAAA,CAAQ,KACR,UAAA2D,CAAAA,CAAAA,CACA,IAAAC,CACA,CAAA,SAAA,CAAWE,GAA0B,CAACC,CACxC,CAAC,CAED,CAAA,GAAIM,EAAQ,MAAW,GAAA,CAAA,CACrB,OAIF,IAAMC,CAAAA,CAAqB,OAAO,IAAKD,CAAAA,CAAAA,CAAQ,CAAC,CAAC,CAAA,CAG7CE,EAAWF,CAAQ,CAAA,GAAA,CAAKpC,GAAQ,MAAO,CAAA,MAAA,CAAOA,CAAG,CAAC,CAAA,CAGtD,KACEsC,CAAS,CAAA,MAAA,CAAS,GAClBA,CAASA,CAAAA,CAAAA,CAAS,OAAS,CAAC,CAAA,CAAE,MAAMxC,CAAkB,CAAA,EAEtDwC,EAAS,GAAI,EAAA,CAGf,GAAIA,CAAS,CAAA,MAAA,GAAW,EAAG,CACrBP,CAAAA,EACF,QAAQ,GAAI,CAAA,CAAA,uBAAA,EAA0BT,CAAS,CAAG,CAAA,CAAA,CAAA,CAEpD,MACF,CAGA,IAAIiB,EAAsB,CAAC,GAAGD,EAAS,KAAM,CAAA,CAAA,CAAGJ,CAAY,CAAC,CAAA,CAGzDD,CACFM,GAAAA,CAAAA,CAAsB5B,CAAoB4B,CAAAA,CAAmB,EACzDR,CACF,EAAA,OAAA,CAAQ,IAAI,CAAmCT,gCAAAA,EAAAA,CAAS,GAAG,CAK/D,CAAA,CAAA,IAAMkB,EAAsBD,CAAoB,CAAA,GAAA,CAAKvC,GACnDA,CAAI,CAAA,GAAA,CAAKV,GAAUA,CAAS,GAAA,IAAA,CAAO,GAAK,MAAOA,CAAAA,CAAI,CAAE,CACvD,CAAA,CAGM,CAAE,SAAWmD,CAAAA,CAAAA,CAAY,OAAQC,CAAkB,CAAA,CAAI,MAAMP,CACjEP,CAAAA,CAAAA,CACAY,CACF,CAEIT,CAAAA,CAAAA,GACF,QAAQ,GAAI,CAAA,CAAA,gBAAA,EAAmBU,CAAU,CAAe,aAAA,CAAA,CAAA,CACxD,QAAQ,GAAI,CAAA,6BAAA,CAA+BC,CAAiB,CAI9D,CAAA,CAAA,IAAMC,EAAwB5C,CAAuB2C,CAAAA,CAAiB,EAElEX,CACF,EAAA,OAAA,CAAQ,IAAI,iCAAmCY,CAAAA,CAAqB,EAItE,IAAIC,CAAAA,CAAgBN,EAOpB,GALKT,CAAAA,GAEHe,EAAgBN,CAAS,CAAA,KAAA,CAAMG,CAAU,CAGvCG,CAAAA,CAAAA,CAAAA,CAAc,SAAW,CAAG,CAAA,CAC1Bb,GACF,OAAQ,CAAA,GAAA,CACN,6DAA6DT,CAAS,CAAA,CAAA,CACxE,EAEF,MACF,CAGIU,IACFY,CAAgB1C,CAAAA,CAAAA,CAAiB0C,CAAa,CAC1Cb,CAAAA,CAAAA,EACF,QAAQ,GAAI,CAAA,CAAA,oCAAA,EAAuCT,CAAS,CAAG,CAAA,CAAA,CAAA,CAAA,CAKnE,IAAIuB,CAAAA,CAEAhB,CAEFgB,CAAAA,CAAAA,CAAeR,EAAmB,KAAM,CAAA,CAAA,CAAGM,EAAsB,MAAM,CAAA,CAGvEE,EAAeF,CAIjB,CAAA,IAAMG,EAAgBtD,CAA8BqD,CAAAA,CAAY,EAKhE,GAJId,CAAAA,EACF,QAAQ,GAAI,CAAA,sBAAA,CAAwBe,CAAa,CAG/CA,CAAAA,CAAAA,CAAc,SAAW,CAAG,CAAA,CAC1Bf,GACF,OAAQ,CAAA,GAAA,CAAI,+BAA+BT,CAAS,CAAA,CAAA,CAAG,EAEzD,MACF,CAGA,IAAMyB,CAAeH,CAAAA,CAAAA,CAAc,IAAK5C,CACtCA,EAAAA,CAAAA,CAAI,OAAO,CAACyB,CAAAA,CAAK9B,EAAOqD,CAAU,GAAA,CAChC,IAAMC,CAAaH,CAAAA,CAAAA,CAAcE,CAAK,CACtC,CAAA,OAAIC,IACFxB,CAAIwB,CAAAA,CAAU,EAAI,CAAE,KAAA,CAAAtD,CAAM,CAErB8B,CAAAA,CAAAA,CACT,EAAG,EAAE,CACP,CAGIyB,CAAAA,CAAAA,CACJ,OAAIrB,CACFqB,GAAAA,CAAAA,CAAgB,CACd,UAAY,CAAA,CAACT,CAAU,CACzB,CAAA,CAAA,CAGK,CACL,OAASK,CAAAA,CAAAA,CACT,KAAMC,CACN,CAAA,QAAA,CAAUG,CACZ,CACF,KC3OaC,CAAkBnF,CAAAA,CAAAA,EACtBoF,wBACL,kCACA,CAAA,OAAA,CACAnC,EACAjD,CACF,CAAA,CAGWqF,EAAcpC,CAAAA,CAAAA,CAKdqC,EAAsBH,CAAAA","file":"index.cjs","sourcesContent":["import * as XLSX from 'xlsx'\nimport { ExcelExtractorOptions } from '.'\n\n/**\n * Represents a merged cell range\n */\ninterface MergedCellRange {\n s: { r: number; c: number } // start row and column\n e: { r: number; c: number } // end row and column\n}\n\n/**\n * Categorizes a merged cell range as across columns, across rows, or across a range\n * @param merge The merged cell range to categorize\n * @returns The category of the merged cell range\n */\nfunction categorizeMergedCell(\n merge: MergedCellRange\n): 'acrossColumns' | 'acrossRows' | 'acrossRanges' {\n const isSingleRow = merge.s.r === merge.e.r\n const isSingleColumn = merge.s.c === merge.e.c\n\n if (isSingleRow && !isSingleColumn) {\n return 'acrossColumns'\n } else if (!isSingleRow && isSingleColumn) {\n return 'acrossRows'\n } else {\n return 'acrossRanges'\n }\n}\n\n/**\n * Processes merged cells in a worksheet according to the specified options\n * @param sheet The worksheet to process\n * @param options The options for handling merged cells\n * @returns The processed worksheet\n */\nexport function processMergedCells(\n sheet: XLSX.WorkSheet,\n options?: ExcelExtractorOptions\n): XLSX.WorkSheet {\n if (!sheet['!merges'] || !options?.mergedCellOptions) {\n return sheet\n }\n\n // Create a deep copy of the sheet to avoid modifying the original\n const processedSheet = JSON.parse(JSON.stringify(sheet)) as XLSX.WorkSheet\n const merges = [...(sheet['!merges'] || [])] as MergedCellRange[]\n\n // First, process 'applyToAll' and 'applyToTopLeft' treatments for all vectors\n processTreatments(processedSheet, merges, options, [\n 'applyToAll',\n 'applyToTopLeft',\n ])\n\n // Then, process 'concatenate' treatments\n processTreatments(processedSheet, merges, options, ['concatenate'])\n\n // Finally, process 'coalesce' treatments\n processTreatments(processedSheet, merges, options, ['coalesce'])\n\n // Remove the merges that have been processed\n processedSheet['!merges'] = []\n\n return processedSheet\n}\n\n/**\n * Processes merged cells with the specified treatments\n * @param sheet The worksheet to process\n * @param merges The merged cell ranges to process\n * @param options The options for handling merged cells\n * @param treatments The treatments to process\n */\nfunction processTreatments(\n sheet: XLSX.WorkSheet,\n merges: MergedCellRange[],\n options: ExcelExtractorOptions,\n treatments: ('applyToAll' | 'applyToTopLeft' | 'coalesce' | 'concatenate')[]\n): void {\n // Process across ranges first\n processVectorTreatments(sheet, merges, options, 'acrossRanges', treatments)\n\n // Then process across rows\n processVectorTreatments(sheet, merges, options, 'acrossRows', treatments)\n\n // Finally process across columns\n processVectorTreatments(sheet, merges, options, 'acrossColumns', treatments)\n}\n\n/**\n * Processes merged cells for a specific vector with the specified treatments\n * @param sheet The worksheet to process\n * @param merges The merged cell ranges to process\n * @param options The options for handling merged cells\n * @param vector The vector to process\n * @param treatments The treatments to process\n */\nfunction processVectorTreatments(\n sheet: XLSX.WorkSheet,\n merges: MergedCellRange[],\n options: ExcelExtractorOptions,\n vector: 'acrossColumns' | 'acrossRows' | 'acrossRanges',\n treatments: ('applyToAll' | 'applyToTopLeft' | 'coalesce' | 'concatenate')[]\n): void {\n const vectorOptions = options.mergedCellOptions?.[vector]\n if (!vectorOptions || !treatments.includes(vectorOptions.treatment)) {\n return\n }\n\n // Filter merges by vector\n const vectorMerges = merges.filter(\n (merge) => categorizeMergedCell(merge) === vector\n )\n\n for (const merge of vectorMerges) {\n const treatment = vectorOptions.treatment\n\n // Get the cell address of the top-left cell in the merge\n const topLeftCellAddress = XLSX.utils.encode_cell({\n r: merge.s.r,\n c: merge.s.c,\n })\n const topLeftCell = sheet[topLeftCellAddress]\n\n if (!topLeftCell) {\n continue\n }\n\n // Apply the treatment based on the vector and treatment type\n if (treatment === 'applyToAll') {\n applyToAllCells(sheet, merge, topLeftCell)\n } else if (treatment === 'applyToTopLeft') {\n applyToTopLeftCell(sheet, merge)\n } else if (\n treatment === 'coalesce' &&\n (vector === 'acrossRows' || vector === 'acrossColumns')\n ) {\n coalesceCells(sheet, merge, vector)\n } else if (\n treatment === 'concatenate' &&\n (vector === 'acrossRows' || vector === 'acrossColumns')\n ) {\n concatenateCells(sheet, merge, vector, vectorOptions.separator || ',')\n }\n }\n}\n\n/**\n * Applies the value of the top-left cell to all cells in the merged range\n * @param sheet The worksheet to process\n * @param merge The merged cell range to process\n * @param topLeftCell The top-left cell in the merged range\n */\nfunction applyToAllCells(\n sheet: XLSX.WorkSheet,\n merge: MergedCellRange,\n topLeftCell: XLSX.CellObject\n): void {\n for (let r = merge.s.r; r <= merge.e.r; r++) {\n for (let c = merge.s.c; c <= merge.e.c; c++) {\n const cellAddress = XLSX.utils.encode_cell({ r, c })\n sheet[cellAddress] = { ...topLeftCell }\n }\n }\n}\n\n/**\n * Applies the value of the top-left cell to only the top-left cell in the merged range\n * and clears all other cells in the range\n * @param sheet The worksheet to process\n * @param merge The merged cell range to process\n */\nfunction applyToTopLeftCell(\n sheet: XLSX.WorkSheet,\n merge: MergedCellRange\n): void {\n for (let r = merge.s.r; r <= merge.e.r; r++) {\n for (let c = merge.s.c; c <= merge.e.c; c++) {\n if (r !== merge.s.r || c !== merge.s.c) {\n const cellAddress = XLSX.utils.encode_cell({ r, c })\n sheet[cellAddress] = { t: 's', v: '' }\n }\n }\n }\n}\n\n/**\n * Coalesces cells in the merged range\n * @param sheet The worksheet to process\n * @param merge The merged cell range to process\n * @param vector The vector to process\n */\nfunction coalesceCells(\n sheet: XLSX.WorkSheet,\n merge: MergedCellRange,\n vector: 'acrossRows' | 'acrossColumns'\n): void {\n if (vector === 'acrossRows') {\n // Keep only the first row and remove all other rows\n for (let r = merge.s.r + 1; r <= merge.e.r; r++) {\n for (let c = merge.s.c; c <= merge.e.c; c++) {\n const cellAddress = XLSX.utils.encode_cell({ r, c })\n delete sheet[cellAddress]\n }\n }\n } else if (vector === 'acrossColumns') {\n // Keep only the first column and remove all other columns\n for (let r = merge.s.r; r <= merge.e.r; r++) {\n for (let c = merge.s.c + 1; c <= merge.e.c; c++) {\n const cellAddress = XLSX.utils.encode_cell({ r, c })\n delete sheet[cellAddress]\n }\n }\n }\n}\n\n/**\n * Concatenates cells in the merged range\n * @param sheet The worksheet to process\n * @param merge The merged cell range to process\n * @param vector The vector to process\n * @param separator The separator to use for concatenation\n */\nfunction concatenateCells(\n sheet: XLSX.WorkSheet,\n merge: MergedCellRange,\n vector: 'acrossRows' | 'acrossColumns',\n separator: string\n): void {\n if (vector === 'acrossRows') {\n // For each column in the merge\n for (let c = merge.s.c; c <= merge.e.c; c++) {\n const values: string[] = []\n\n // Collect non-empty values from all rows in this column\n for (let r = merge.s.r; r <= merge.e.r; r++) {\n const cellAddress = XLSX.utils.encode_cell({ r, c })\n const cell = sheet[cellAddress]\n\n if (cell && cell.v !== null && cell.v !== undefined && cell.v !== '') {\n values.push(String(cell.v))\n }\n }\n\n // Set the concatenated value to the first row and clear other rows\n if (values.length > 0) {\n const firstCellAddress = XLSX.utils.encode_cell({ r: merge.s.r, c })\n sheet[firstCellAddress] = {\n t: 's',\n v: values.join(separator),\n w: values.join(separator),\n h: values.join(separator),\n r: `<t>${values.join(separator)}</t>`,\n }\n\n // Clear other rows\n for (let r = merge.s.r + 1; r <= merge.e.r; r++) {\n const cellAddress = XLSX.utils.encode_cell({ r, c })\n delete sheet[cellAddress]\n }\n }\n }\n } else if (vector === 'acrossColumns') {\n // For each row in the merge\n for (let r = merge.s.r; r <= merge.e.r; r++) {\n const values: string[] = []\n\n // Collect non-empty values from all columns in this row\n for (let c = merge.s.c; c <= merge.e.c; c++) {\n const cellAddress = XLSX.utils.encode_cell({ r, c })\n const cell = sheet[cellAddress]\n\n if (cell && cell.v !== null && cell.v !== undefined && cell.v !== '') {\n values.push(String(cell.v))\n }\n }\n\n // Set the concatenated value to the first column and clear other columns\n if (values.length > 0) {\n const firstCellAddress = XLSX.utils.encode_cell({ r, c: merge.s.c })\n sheet[firstCellAddress] = {\n t: 's',\n v: values.join(separator),\n w: values.join(separator),\n h: values.join(separator),\n r: `<t>${values.join(separator)}</t>`,\n }\n\n // Clear other columns\n for (let c = merge.s.c + 1; c <= merge.e.c; c++) {\n const cellAddress = XLSX.utils.encode_cell({ r, c })\n delete sheet[cellAddress]\n }\n }\n }\n }\n}\n","export function prependNonUniqueHeaderColumns(headers: string[]): string[] {\n const counts = new Map<string, number>()\n\n return headers.map((value) => {\n const cleanValue = (value || 'empty').replace('*', '')\n const count = counts.get(cleanValue) || 0\n counts.set(cleanValue, count + 1)\n\n return count ? `${cleanValue}_${count}` : cleanValue\n })\n}\n\nexport const isNullOrWhitespace = (value: any) =>\n value === null || (typeof value === 'string' && value.trim() === '')\n\nexport const trimTrailingEmptyCells = (row: string[]): string[] => {\n let lastNonNullIndex = 0\n for (let i = 0; i < row.length; i++) {\n if (!isNullOrWhitespace(row[i])) {\n lastNonNullIndex = i\n }\n }\n return row.slice(0, lastNonNullIndex + 1)\n}\n\n/**\n * Cascades values down the dataset until a blank row, new value, or end of dataset\n * @param rows Array of rows to process\n * @returns Processed rows with cascaded values\n */\nexport const cascadeRowValues = (rows: any[][]): any[][] => {\n if (!rows || rows.length === 0) return rows\n\n const result = [...rows]\n const columnCount = Math.max(...rows.map((row) => row.length))\n\n // For each column\n for (let col = 0; col < columnCount; col++) {\n let lastValue = null\n\n // For each row\n for (let row = 0; row < result.length; row++) {\n // Skip completely blank/null rows - they reset the cascade\n if (result[row].every(isNullOrWhitespace)) {\n lastValue = null\n continue\n }\n\n // If current cell is empty and we have a last value, fill it\n if (isNullOrWhitespace(result[row][col]) && lastValue !== null) {\n result[row][col] = lastValue\n }\n // Otherwise, update the last value if the current cell is not empty\n else if (!isNullOrWhitespace(result[row][col])) {\n lastValue = result[row][col]\n }\n }\n }\n\n return result\n}\n\n/**\n * Determines if a row is likely a header row based on heuristics\n * @param row Row to evaluate\n * @returns True if the row is likely a header row\n */\nexport const isLikelyHeaderRow = (row: any[]): boolean => {\n if (!row || row.length === 0) return false\n\n // Headers typically don't have many numeric values\n const numericCount = row.filter(\n (cell) =>\n typeof cell === 'number' ||\n (typeof cell === 'string' && !isNaN(Number(cell)) && cell.trim() !== '')\n ).length\n\n // Headers typically have more text values\n const textCount = row.filter(\n (cell) =>\n typeof cell === 'string' && cell.trim() !== '' && isNaN(Number(cell))\n ).length\n\n // If more than 80% of non-empty cells are text, it's likely a header\n const nonEmptyCount = row.filter((cell) => !isNullOrWhitespace(cell)).length\n\n // Require at least 2 non-empty cells to be considered a header\n if (nonEmptyCount < 2) return false\n\n // For mixed content rows, be more strict about the text percentage\n return nonEmptyCount > 0 && textCount / nonEmptyCount >= 0.8\n}\n\n/**\n * Cascades values across the header rows until a blank column, new value, or end of dataset\n * @param headerRows Array of potential header rows to process\n * @returns Processed header rows with cascaded values\n */\nexport const cascadeHeaderValues = (headerRows: any[][]): any[][] => {\n if (!headerRows || headerRows.length === 0) return headerRows\n\n // Only process up to 5 rows maximum\n const rowsToProcess = headerRows\n const result = [...rowsToProcess]\n\n // Filter to only include rows that are likely headers\n const likelyHeaderRows = rowsToProcess.filter(isLikelyHeaderRow)\n\n // If no likely header rows, return original rows\n if (likelyHeaderRows.length === 0) return rowsToProcess\n\n // For each likely header row\n for (let rowIndex = 0; rowIndex < likelyHeaderRows.length; rowIndex++) {\n let lastValue = null\n const row = likelyHeaderRows[rowIndex]\n\n // For each column in the row\n for (let col = 0; col < row.length; col++) {\n // If current cell is empty and we have a last value, fill it\n if (isNullOrWhitespace(row[col]) && lastValue !== null) {\n // Find the corresponding row in the result\n const resultRowIndex = rowsToProcess.findIndex((r) => r === row)\n if (resultRowIndex >= 0) {\n result[resultRowIndex][col] = lastValue\n }\n }\n // Otherwise, update the last value if the current cell is not empty\n else if (!isNullOrWhitespace(row[col])) {\n lastValue = row[col]\n }\n // If we encounter a completely blank/null column, reset the cascade\n const isColumnEmpty = rowsToProcess.every(\n (r) => col >= r.length || isNullOrWhitespace(r[col])\n )\n if (isColumnEmpty) {\n lastValue = null\n }\n }\n }\n\n return result\n}\n","import type { SheetCapture, WorkbookCapture } from '@flatfile/util-extractor'\nimport * as XLSX from 'xlsx'\nimport type { ExcelExtractorOptions } from '.'\nimport { processMergedCells } from './merged-cells'\nimport {\n cascadeHeaderValues,\n cascadeRowValues,\n isNullOrWhitespace,\n prependNonUniqueHeaderColumns,\n trimTrailingEmptyCells,\n} from './utils'\nimport {\n GetHeadersOptions,\n GetHeadersResult,\n ROWS_TO_SEARCH_FOR_HEADER,\n} from '../constants/headerDetection.const'\n\ntype ParseBufferOptions = Omit<\n ExcelExtractorOptions,\n 'chunkSize' | 'parallel'\n> & {\n readonly headerSelectionEnabled?: boolean\n getHeaders: (options: any, data: string[][]) => Promise<GetHeadersResult>\n rowsToSearch?: number\n}\ntype ProcessedSheet = [PropertyKey, SheetCapture]\n\nexport async function parseBuffer(\n buffer: Buffer,\n options?: ParseBufferOptions\n): Promise<WorkbookCapture> {\n let workbook: XLSX.WorkBook\n try {\n workbook = XLSX.read(buffer, {\n type: 'buffer',\n cellDates: true,\n dense: true,\n dateNF: options?.dateNF || undefined,\n // SheetJS intends the 'WTF' option to be used for development purposes only.\n // We use it here to specifically capture the ERR_STRING_TOO_LONG error.\n WTF: true,\n })\n } catch (e) {\n // catch the error if the file is too large to parse, and throw a more helpful error.\n // ref: https://docs.sheetjs.com/docs/miscellany/errors/#invalid-string-length-or-err_string_too_long\n // i.e. 'Cannot create a string longer than 0x1fffffe8 characters'\n if (e.code === 'ERR_STRING_TOO_LONG') {\n if (options?.debug) {\n console.log(\n 'File is too large to parse. Try converting this file to CSV.'\n )\n }\n throw new Error('plugins.extraction.fileTooLarge')\n }\n\n // Try reading the file again without the 'WTF' option.\n workbook = XLSX.read(buffer, {\n type: 'buffer',\n cellDates: true,\n dense: true,\n dateNF: options?.dateNF || undefined,\n })\n }\n\n // Process merged cells if options are provided\n if (options?.mergedCellOptions) {\n for (const sheetName of workbook.SheetNames) {\n const sheet = workbook.Sheets[sheetName]\n workbook.Sheets[sheetName] = processMergedCells(sheet, options)\n }\n }\n\n // Process each sheet\n const processedSheets = await Promise.all(\n workbook.SheetNames.map(async (sheetName) => {\n const sheet = workbook.Sheets[sheetName]\n const sheetCapture = await convertSheet({\n sheet,\n sheetName,\n rawNumbers: options?.rawNumbers ?? false,\n raw: options?.raw ?? false,\n headerDetectionOptions: options?.headerDetectionOptions ?? {\n algorithm: 'default',\n },\n headerSelectionEnabled: options?.headerSelectionEnabled ?? false,\n skipEmptyLines: options?.skipEmptyLines ?? false,\n debug: options?.debug,\n cascadeRowValues: options?.cascadeRowValues,\n rowsToSearch: options?.rowsToSearch ?? ROWS_TO_SEARCH_FOR_HEADER,\n cascadeHeaderValues: options?.cascadeHeaderValues,\n getHeaders: options?.getHeaders,\n })\n return [sheetName, sheetCapture] as ProcessedSheet\n })\n )\n\n // Filter out undefined sheets and convert to an object\n return processedSheets.reduce((acc, [sheetName, sheetCapture]) => {\n if (sheetCapture) {\n acc[sheetName as string] = sheetCapture\n }\n return acc\n }, {} as WorkbookCapture)\n}\n\ntype ConvertSheetArgs = {\n sheet: XLSX.WorkSheet\n sheetName: string\n rawNumbers: boolean\n raw: boolean\n headerDetectionOptions: GetHeadersOptions\n headerSelectionEnabled: boolean\n skipEmptyLines: boolean\n debug?: boolean\n cascadeRowValues?: boolean\n cascadeHeaderValues?: boolean\n rowsToSearch?: number\n getHeaders: (options: any, data: string[][]) => Promise<GetHeadersResult>\n}\n\n/**\n * Convert a template sheet using a special template format\n *\n * @param sheet\n */\nasync function convertSheet({\n sheet,\n sheetName,\n rawNumbers,\n raw,\n headerDetectionOptions,\n headerSelectionEnabled,\n skipEmptyLines,\n debug,\n cascadeRowValues: shouldCascadeRowValues,\n cascadeHeaderValues: shouldCascadeHeaderValues,\n rowsToSearch,\n getHeaders,\n}: ConvertSheetArgs): Promise<SheetCapture | undefined> {\n // Step 1: Extract raw data from Excel sheet\n const rawRows = XLSX.utils.sheet_to_json<Record<string, any>>(sheet, {\n header: 'A',\n defval: null,\n rawNumbers,\n raw,\n blankrows: headerSelectionEnabled || !skipEmptyLines,\n })\n\n if (rawRows.length === 0) {\n return\n }\n\n // Extract Excel column letters (A, B, C, etc.)\n const excelColumnLetters = Object.keys(rawRows[0])\n\n // Convert to array of arrays for easier processing\n let dataRows = rawRows.map((row) => Object.values(row))\n\n // Remove trailing empty rows\n while (\n dataRows.length > 0 &&\n dataRows[dataRows.length - 1].every(isNullOrWhitespace)\n ) {\n dataRows.pop()\n }\n\n if (dataRows.length === 0) {\n if (debug) {\n console.log(`No data rows found in '${sheetName}'`)\n }\n return\n }\n\n // Step 2: Prepare data for header detection\n let headerCandidateRows = [...dataRows.slice(0, rowsToSearch)]\n\n // Apply header cascading if enabled\n if (shouldCascadeHeaderValues) {\n headerCandidateRows = cascadeHeaderValues(headerCandidateRows)\n if (debug) {\n console.log(`Applied cascadeHeaderValues to '${sheetName}'`)\n }\n }\n\n // Convert nulls to empty strings for header detection\n const headerDetectionData = headerCandidateRows.map((row) =>\n row.map((cell) => (cell === null ? '' : String(cell)))\n )\n\n // Step 3: Detect headers\n const { headerRow: rowsToSkip, header: detectedHeaderRow } = await getHeaders(\n headerDetectionOptions,\n headerDetectionData\n )\n\n if (debug) {\n console.log(`@debug Detected ${rowsToSkip} rows to skip`)\n console.log(`@debug Detected header row:`, detectedHeaderRow)\n }\n\n // Remove trailing empty cells from detected header\n const cleanedDetectedHeader = trimTrailingEmptyCells(detectedHeaderRow)\n\n if (debug) {\n console.log(`@debug Cleaned detected header:`, cleanedDetectedHeader)\n }\n\n // Step 4: Process data rows based on header selection mode\n let finalDataRows = dataRows\n\n if (!headerSelectionEnabled) {\n // In normal mode, remove the header rows from data\n finalDataRows = dataRows.slice(rowsToSkip)\n }\n\n if (finalDataRows.length === 0) {\n if (debug) {\n console.log(\n `@debug No data rows remaining after header processing in '${sheetName}'`\n )\n }\n return\n }\n\n // Apply row value cascading if enabled\n if (shouldCascadeRowValues) {\n finalDataRows = cascadeRowValues(finalDataRows)\n if (debug) {\n console.log(`@debug Applied cascadeRowValues to '${sheetName}'`)\n }\n }\n\n // Step 5: Determine final headers\n let finalHeaders: string[]\n\n if (headerSelectionEnabled) {\n // In header selection mode, use Excel column letters but limit to detected header length\n finalHeaders = excelColumnLetters.slice(0, cleanedDetectedHeader.length)\n } else {\n // In normal mode, use the detected header values\n finalHeaders = cleanedDetectedHeader\n }\n\n // Handle duplicate headers by appending numbers\n const uniqueHeaders = prependNonUniqueHeaderColumns(finalHeaders)\n if (debug) {\n console.log('@debug uniqueHeaders', uniqueHeaders)\n }\n\n if (uniqueHeaders.length === 0) {\n if (debug) {\n console.log(`@debug No headers found in '${sheetName}'`)\n }\n return\n }\n\n // Step 6: Convert data to Flatfile format\n const flatfileData = finalDataRows.map((row) =>\n row.reduce((acc, value, index) => {\n const headerName = uniqueHeaders[index]\n if (headerName) {\n acc[headerName] = { value }\n }\n return acc\n }, {})\n )\n\n // Step 7: Build metadata if needed\n let sheetMetadata: { rowHeaders: number[] } | undefined = undefined\n if (headerSelectionEnabled) {\n sheetMetadata = {\n rowHeaders: [rowsToSkip],\n }\n }\n\n return {\n headers: uniqueHeaders,\n data: flatfileData,\n metadata: sheetMetadata,\n }\n}\n","import { Extractor } from '@flatfile/util-extractor'\nimport { parseBuffer } from './parser'\nimport { GetHeadersOptions } from '../constants/headerDetection.const'\n\n/**\n * Plugin config options.\n *\n * @property {boolean} raw - if true, return raw data; if false, return formatted text.\n * @property {boolean} rawNumbers - if true, return raw numbers; if false, return formatted numbers.\n * @property {string} dateNF - the date format.\n * @property {number} chunkSize - the size of chunk to process when inserting records.\n * @property {number} parallel - the quantity of parallel process when inserting records.\n * @property {GetHeadersOptions} headerDetectionOptions - the options for header detection.\n * @property {boolean} skipEmptyLines - if true, skip empty lines; if false, include empty lines.\n * @property {boolean} debug - if true, display helpful console logs.\n * @property {object} mergedCellOptions - the options for merged cell handling.\n * @property {boolean} cascadeRowValues - if true, cascade values down the dataset until a blank row, new value, or end of dataset.\n * @property {boolean} cascadeHeaderValues - if true, cascade values across the header rows until a blank column, new value, or end of dataset.\n */\nexport interface ExcelExtractorOptions {\n readonly raw?: boolean\n readonly rawNumbers?: boolean\n readonly dateNF?: string\n readonly headerDetectionOptions?: GetHeadersOptions\n readonly skipEmptyLines?: boolean\n readonly chunkSize?: number\n readonly parallel?: number\n readonly debug?: boolean\n readonly mergedCellOptions?: {\n acrossColumns?: {\n treatment: 'applyToAll' | 'applyToTopLeft' | 'coalesce' | 'concatenate'\n separator?: string\n }\n acrossRows?: {\n treatment: 'applyToAll' | 'applyToTopLeft' | 'coalesce' | 'concatenate'\n separator?: string\n }\n acrossRanges?: {\n treatment: 'applyToAll' | 'applyToTopLeft'\n }\n }\n readonly cascadeRowValues?: boolean\n readonly cascadeHeaderValues?: boolean\n}\n\nexport const ExcelExtractor = (options?: ExcelExtractorOptions) => {\n return Extractor(\n /\\.(xlsx?|xlsm|xlsb|xltx?|xltm)$/i,\n 'excel',\n parseBuffer,\n options\n )\n}\n\nexport const excelParser = parseBuffer\n\n/*\n * @deprecated use `ExcelExtractor` instead\n */\nexport const xlsxExtractorPlugin = ExcelExtractor\n"]}