UNPKG

@flatfile/plugin-delimiter-extractor

Version:

A plugin for parsing .delimiter files in Flatfile.

1 lines 26 kB
{"version":3,"sources":["../src/header.detection.ts","../src/parser.ts","../src/index.ts"],"names":["ROWS_TO_SEARCH_FOR_HEADER","indexToLetters","index","letters","result","headersToLetters","headers","_","Headerizer","options","ExplicitHeaders","SpecificRows","DataRowAndSubHeaderDetection","OriginalDetector","countNonEmptyCells","row","cell","likelyContainsData","dataStream","currentRow","skip","header","detector","stream","encoding","callback","resolve","reject","error","maxRow","rows","previousRow","fuzzyHeader","fuzzySkip","i","word","parseBuffer","buffer","skipEmptyLines","fileContents","Papa","transform","value","extractValues","data","headerizer","headerStream","Readable","isNullOrWhitespace","columnHeaders","prependNonUniqueHeaderColumns","mappedRow","mapKeys","key","mapValues","metadata","record","counts","cleanValue","NativeFileTypes","DelimiterExtractor","fileExt","Extractor","delimiterParser"],"mappings":";;;;;;;;;;;;AAEO,IAAMA,CAAAA,CAA4B,GAyC5BC,CAAkBC,CAAAA,CAAAA,EAA0B,CACvD,IAAMC,CAAU,CAAA,4BAAA,CACZC,CAAS,CAAA,EAAA,CAEb,KAAOF,CAAS,EAAA,CAAA,EACdE,CAASD,CAAAA,CAAAA,CAAQD,CAAQ,CAAA,EAAE,CAAIE,CAAAA,CAAAA,CAC/BF,CAAQ,CAAA,IAAA,CAAK,KAAMA,CAAAA,CAAAA,CAAQ,EAAE,CAAA,CAAI,CAGnC,CAAA,OAAOE,CACT,CAEaC,CAAAA,CAAAA,CAAoBC,CACxBA,EAAAA,CAAAA,CAAQ,GAAI,CAAA,CAACC,CAAGL,CAAAA,CAAAA,GAAUD,EAAeC,CAAK,CAAC,CAIlCM,CAAAA,CAAAA,CAAf,KAA0B,CAC/B,WAAc,EAAA,EAGd,OAAO,MAAA,CAAOC,CAAwC,CAAA,CACpD,OAAQA,CAAAA,CAAQ,SAAW,EACzB,IAAK,iBAAA,CACH,OAAO,IAAIC,CAAgBD,CAAAA,CAAO,CACpC,CAAA,IAAK,eACH,OAAO,IAAIE,CAAaF,CAAAA,CAAO,CACjC,CAAA,IAAK,8BACH,CAAA,OAAO,IAAIG,CAA6BH,CAAAA,CAAO,CACjD,CAAA,IAAK,YACH,CAAA,MAAM,IAAI,KAAA,CAAM,iBAAiB,CACnC,CAAA,QACE,OAAO,IAAII,CAAiBJ,CAAAA,CAAO,CACvC,CACF,CACF,CAAA,CAEaK,CAAsBC,CAAAA,CAAAA,EAC1BA,CAAI,CAAA,MAAA,CAAQC,CAAS,EAAA,CAAA,EAAGA,CAAI,CAAG,CAAA,CAAA,IAAA,EAAW,GAAA,EAAE,CAAE,CAAA,MAAA,CAG1CC,CAAsBF,CAAAA,CAAAA,EAC1BA,EAAI,IACRC,CAAAA,CAAAA,EACCA,CAAS,GAAA,IAAA,EACTA,CAAK,CAAA,IAAA,EAAW,GAAA,EAAA,EAChB,CAAC,KAAM,CAAA,MAAA,CAAOA,CAAK,CAAA,IAAA,EAAM,CAAC,CAC1BA,EAAAA,CAAAA,CAAK,IAAK,EAAA,CAAE,WAAY,EAAA,GAAM,MAC9BA,EAAAA,CAAAA,CAAK,IAAK,EAAA,CAAE,aAAkB,GAAA,OAClC,CAOIH,CAAAA,CAAAA,CAAN,cAA+BL,CAAW,CAGxC,WAAA,CAAoBC,EAAyB,CAC3C,KAAA,EADkB,CAAA,IAAA,CAAA,OAAA,CAAAA,CAElB,CAAA,IAAA,CAAK,YAAeA,CAAAA,CAAAA,CAAQ,cAAgBT,EAC9C,CALQ,YAOR,CAAA,MAAM,UAAWkB,CAAAA,CAAAA,CAAwD,CACvE,IAAIC,CAAa,CAAA,CAAA,CACbC,CAAO,CAAA,CAAA,CACPC,CAAmB,CAAA,EACnBlB,CAAAA,CAAAA,CAAoB,EAGlBmB,CAAAA,CAAAA,CAAW,IAAIC,kBAAAA,CAAO,QAAS,CAAA,CACnC,UAAY,CAAA,CAAA,CAAA,CACZ,MAAO,CAACR,CAAAA,CAAKS,CAAUC,CAAAA,CAAAA,GAAa,CAClCN,CAAAA,EAAAA,CACIA,CAAc,EAAA,IAAA,CAAK,cACrBD,CAAW,CAAA,OAAA,EAETJ,CAAAA,CAAAA,CAAmBC,CAAG,CAAA,CAAID,CAAmBO,CAAAA,CAAM,CACrDA,GAAAA,CAAAA,CAASN,CACTK,CAAAA,CAAAA,CAAOD,CACPhB,CAAAA,CAAAA,CAAUE,CAAiBgB,CAAAA,CAAM,GAEnCI,CAAS,GACX,CACF,CAAC,CAED,CAAA,OAAAP,CAAW,CAAA,IAAA,CAAKI,EAAU,CAAE,GAAA,CAAK,CAAK,CAAA,CAAC,CAEhC,CAAA,IAAI,OAAQ,CAAA,CAACI,EAASC,CAAW,GAAA,CACtCL,CAAS,CAAA,EAAA,CAAG,QAAU,CAAA,IAAM,CAC1BI,CAAAA,CAAQ,CAAE,MAAA,CAAAL,CAAQ,CAAA,IAAA,CAAAD,CAAM,CAAA,OAAA,CAAAjB,CAAQ,CAAC,EACnC,CAAC,CAAA,CACDe,CAAW,CAAA,EAAA,CAAG,OAAS,CAAA,IAAM,CAC3BQ,CAAAA,CAAQ,CAAE,MAAAL,CAAAA,CAAAA,CAAQ,IAAAD,CAAAA,CAAAA,CAAM,OAAAjB,CAAAA,CAAQ,CAAC,EACnC,CAAC,CACDe,CAAAA,CAAAA,CAAW,EAAG,CAAA,OAAA,CAAUU,CAAU,EAAA,CAChCD,CAAOC,CAAAA,CAAK,EACd,CAAC,EACH,CAAC,CACH,CACF,CAIMlB,CAAAA,CAAAA,CAAN,cAA8BF,CAAW,CAEvC,WAA6BC,CAAAA,CAAAA,CAAiC,CAC5D,KAAA,EAD2B,CAAA,IAAA,CAAA,OAAA,CAAAA,EAGvB,GAACA,CAAAA,CAAAA,CAAQ,OAAWA,EAAAA,CAAAA,CAAQ,OAAQ,CAAA,MAAA,GAAW,CACjD,CAAA,MAAM,IAAI,KAAM,CAAA,8CAA8C,CAElE,CAPA,OASA,CAAA,MAAM,UAAWS,CAAAA,CAAAA,CAAwD,CACvE,IAAMf,CAAUE,CAAAA,CAAAA,CAAiB,IAAK,CAAA,OAAA,CAAQ,OAAO,CAAA,CACrD,OAAO,CACL,MAAA,CAAQ,IAAK,CAAA,OAAA,CAAQ,OACrB,CAAA,IAAA,CAAM,IAAK,CAAA,OAAA,CAAQ,MAAQ,CAC3B,CAAA,OAAA,CAAAF,CACF,CACF,CACF,CAAA,CAKMQ,CAAN,CAAA,cAA2BH,CAAW,CACpC,WAAA,CAA6BC,CAA8B,CAAA,CACzD,KAAM,EAAA,CADqB,IAAAA,CAAAA,OAAAA,CAAAA,CAAAA,CAGvB,GAACA,CAAAA,CAAAA,CAAQ,UAAcA,EAAAA,CAAAA,CAAQ,UAAW,CAAA,MAAA,GAAW,CACvD,CAAA,MAAM,IAAI,KAAM,CAAA,+CAA+C,CAEnE,CAEA,MAAM,UAAA,CAAWS,CAAwD,CAAA,CACvE,IAAIC,CAAa,CAAA,CAAA,CACbU,CAAS,CAAA,IAAA,CAAK,GAAI,CAAA,GAAG,IAAK,CAAA,OAAA,CAAQ,UAAU,CAC5CR,CAAAA,CAAAA,CAAmB,EAAC,CACpBlB,CAAoB,CAAA,EAElBmB,CAAAA,CAAAA,CAAW,IAAIC,kBAAAA,CAAO,QAAS,CAAA,CACnC,UAAY,CAAA,CAAA,CAAA,CACZ,KAAO,CAAA,CAACR,EAAKS,CAAUC,CAAAA,CAAAA,GAAa,CAClC,GAAIN,CAAaU,CAAAA,CAAAA,CACfX,CAAW,CAAA,OAAA,WACF,IAAK,CAAA,OAAA,CAAQ,UAAW,CAAA,QAAA,CAASC,CAAU,CAAA,CACpD,GAAIE,CAAAA,CAAO,SAAW,CAEpBA,CAAAA,CAAAA,CAASN,CACTZ,CAAAA,CAAAA,CAAUE,CAAiBgB,CAAAA,CAAM,CAEjC,CAAA,KAAA,IAAA,IAAS,CAAI,CAAA,CAAA,CAAG,CAAIA,CAAAA,CAAAA,CAAO,MAAQ,CAAA,CAAA,EAAA,CAC7BA,CAAO,CAAA,CAAC,IAAM,EAChBA,CAAAA,CAAAA,CAAO,CAAC,CAAA,CAAIN,CAAI,CAAA,CAAC,CAAE,CAAA,IAAA,GAEnBM,CAAO,CAAA,CAAC,CAAI,CAAA,CAAA,EAAGA,CAAO,CAAA,CAAC,CAAE,CAAA,IAAA,EAAM,CAAIN,CAAAA,EAAAA,CAAAA,CAAI,CAAC,CAAA,CAAE,IAAK,EAAC,CAElDZ,CAAAA,CAAAA,CAAAA,CAAQ,CAAC,CAAA,CAAIF,CAAe,CAAA,CAAC,CAInCkB,CAAAA,CAAAA,EAAAA,CACAM,CAAS,GACX,CACF,CAAC,CAAA,CAEDP,CAAW,CAAA,IAAA,CAAKI,CAAU,CAAA,CAAE,GAAK,CAAA,CAAA,CAAK,CAAC,CAGvC,CAAA,IAAMF,CAAO,CAAA,IAAA,CAAK,OAAQ,CAAA,IAAA,EAAQS,CAAS,CAAA,CAAA,CAG3C,OAAO,IAAI,OAAA,CAAQ,CAACH,CAAAA,CAASC,CAAW,GAAA,CACtCL,CAAS,CAAA,EAAA,CAAG,QAAU,CAAA,IAAM,CAC1BI,CAAAA,CAAQ,CAAE,MAAA,CAAAL,CAAQ,CAAA,IAAA,CAAAD,EAAM,OAAAjB,CAAAA,CAAQ,CAAC,EACnC,CAAC,CAAA,CACDe,CAAW,CAAA,EAAA,CAAG,QAAS,IAAM,CAC3BQ,CAAQ,CAAA,CAAE,MAAAL,CAAAA,CAAAA,CAAQ,IAAAD,CAAAA,CAAAA,CAAM,QAAAjB,CAAQ,CAAC,EACnC,CAAC,CACDe,CAAAA,CAAAA,CAAW,EAAG,CAAA,OAAA,CAAUU,GAAU,CAChCD,CAAAA,CAAOC,CAAK,EACd,CAAC,EACH,CAAC,CACH,CACF,CASMhB,CAAAA,CAAAA,CAAN,cAA2CJ,CAAW,CAGpD,WAAA,CAAoBC,CAA8C,CAAA,CAChE,OADkB,CAAA,IAAA,CAAA,OAAA,CAAAA,CAElB,CAAA,IAAA,CAAK,YAAeA,CAAAA,CAAAA,CAAQ,YAAgBT,EAAAA,EAC9C,CALQ,YAOR,CAAA,MAAM,UAAWkB,CAAAA,CAAAA,CAAwD,CACvE,IAAIC,CAAa,CAAA,CAAA,CACbC,CAAO,CAAA,CAAA,CACPC,CAAmB,CAAA,EACjBS,CAAAA,CAAAA,CAAmB,EAAC,CACtB3B,EAAoB,EAAC,CAGnBmB,CAAW,CAAA,IAAIC,kBAAO,CAAA,QAAA,CAAS,CACnC,UAAA,CAAY,GACZ,KAAO,CAAA,CAACR,CAAKS,CAAAA,CAAAA,CAAUC,CAAa,GAAA,CAalC,GAZAN,CAAAA,EAAAA,CACIA,GAAc,IAAK,CAAA,YAAA,EACrBD,CAAW,CAAA,OAAA,EAEbY,CAAAA,CAAAA,CAAK,IAAKf,CAAAA,CAAG,CAETD,CAAAA,CAAAA,CAAmBC,CAAG,CAAA,CAAID,CAAmBO,CAAAA,CAAM,CACrDA,GAAAA,CAAAA,CAASN,EACTK,CAAOD,CAAAA,CAAAA,CACPhB,CAAUE,CAAAA,CAAAA,CAAiBgB,CAAM,CAAA,CAAA,CAG/BJ,CAAmBF,CAAAA,CAAG,EAAG,CAE3B,IAAMgB,CAAcD,CAAAA,CAAAA,CAAKA,CAAK,CAAA,MAAA,CAAS,CAAC,CAAA,CAEtCC,GACAjB,CAAmBO,CAAAA,CAAM,CAAMP,GAAAA,CAAAA,CAAmBiB,CAAW,CAAA,EAC7D,CAACd,CAAAA,CAAmBc,CAAW,CAAA,GAG/BV,CAASU,CAAAA,CAAAA,CACTX,CAAOD,CAAAA,CAAAA,CAAa,CACpBhB,CAAAA,CAAAA,CAAUE,EAAiBgB,CAAM,CAAA,EAErC,CAEAI,CAAAA,GACF,CACF,CAAC,CAAA,CAEDP,EAAW,IAAKI,CAAAA,CAAAA,CAAU,CAAE,GAAA,CAAK,CAAK,CAAA,CAAC,CAEvC,CAAA,MAAM,IAAI,OAAc,CAAA,CAACI,CAASC,CAAAA,CAAAA,GAAW,CAC3CL,CAAAA,CAAS,EAAG,CAAA,QAAA,CAAU,IAAM,CAC1BI,CAAQ,GACV,CAAC,CAAA,CACDR,CAAW,CAAA,EAAA,CAAG,QAAS,IAAM,CAC3BQ,CAAQ,GACV,CAAC,CAAA,CACDR,CAAW,CAAA,EAAA,CAAG,QAAUU,CAAU,EAAA,CAChCD,CAAOC,CAAAA,CAAK,EACd,CAAC,EACH,CAAC,EAED,IAAII,CAAAA,CACAC,CAGJ,CAAA,IAAA,IAASC,CAAId,CAAAA,CAAAA,CAAMc,CAAIJ,CAAAA,CAAAA,CAAK,MAAQI,CAAAA,CAAAA,EAAAA,CAAK,CACvC,IAAMnB,CAAMe,CAAAA,CAAAA,CAAKI,CAAC,CAAA,CACdpB,EAAmBO,CAAM,CAAA,GAAMP,CAAmBC,CAAAA,CAAG,CAClCM,EAAAA,CAAAA,CAAO,MAAO,CAAA,CAACL,EAAMd,CACxBa,GAAAA,CAAAA,CAAAA,CAAIb,CAAK,CAAA,EAAG,IAAK,EAAA,EAAK,EAEnC,EAAA,KAAA,CAAM,KAAK,CACX,CAAA,KAAA,CAAOiC,CAASnB,EAAAA,CAAAA,CAAK,WAAY,EAAA,CAAE,QAASmB,CAAAA,CAAAA,CAAK,WAAY,EAAC,CAAC,CACnE,CAEgB,CAAA,MAAA,CAASd,CAAO,CAAA,MAAA,CAAS,KACxCW,CAAcjB,CAAAA,CAAAA,CACdkB,CAAYC,CAAAA,CAAAA,CAAI,CAChB/B,CAAAA,CAAAA,CAAUE,CAAiB2B,CAAAA,CAAW,GAG5C,CAEA,OAAO,CAAE,MAAA,CAAQA,CAAeX,EAAAA,CAAAA,CAAQ,IAAMY,CAAAA,CAAAA,EAAab,EAAM,OAAAjB,CAAAA,CAAQ,CAC3E,CACF,CC3TA,CAAA,eAAsBiC,CACpBC,CAAAA,CAAAA,CACA5B,CAC0B,CAAA,CAC1B,GAAI,CACF,IAAM6B,CAAAA,CAAiB7B,CAAS,EAAA,sBAAA,CAC5B,GACCA,CAAS,EAAA,cAAA,EAAkB,CAC1B8B,CAAAA,CAAAA,CAAAA,CAAeF,CAAO,CAAA,QAAA,CAAS,MAAM,CAAA,CAqBvCP,EApBiDU,kBAAK,CAAA,KAAA,CACxDD,CACA,CAAA,CACE,SAAW9B,CAAAA,CAAAA,CAAQ,SACnB,CAAA,iBAAA,CAAmBA,EAAQ,eAAmB,EAAA,CAC5C,GACA,CAAA,GAAA,CACA,GACA,CAAA,GAAA,CACA,GACA,CAAA,GAAA,CACA,IACA,GACF,CAAA,CACA,aAAeA,CAAAA,CAAAA,EAAS,aAAiB,EAAA,CAAA,CAAA,CACzC,MAAQ,CAAA,CAAA,CAAA,CACR,eAAA6B,CACF,CACF,CAEmB,CAAA,IAAA,CACnB,GAAI,CAACR,CAAQ,EAAA,CAACA,EAAK,MACjB,CAAA,OAAA,OAAA,CAAQ,GAAI,CAAA,2BAA2B,CAChC,CAAA,EAET,CAAA,IAAMW,EAAYhC,CAAS,EAAA,SAAA,GAAeiC,CAAUA,EAAAA,CAAAA,CAAAA,CAE9CC,CAAiBC,CAAAA,CAAAA,EACrBA,CAAK,CAAA,GAAA,CAAK7B,CAAQ,EAAA,MAAA,CAAO,MAAOA,CAAAA,CAAG,CAAE,CAAA,MAAA,CAAQ2B,CAAUA,EAAAA,CAAAA,GAAU,IAAI,CAAC,CAAA,CAClEG,CAAarC,CAAAA,CAAAA,CAAW,MAC5BC,CAAAA,CAAAA,CAAQ,sBAA0B,EAAA,CAChC,UAAW,SACb,CACF,CACMqC,CAAAA,CAAAA,CAAeC,UAAS,CAAA,IAAA,CAAKJ,CAAcb,CAAAA,CAAI,CAAC,CAChD,CAAA,CAAE,MAAAT,CAAAA,CAAAA,CAAQ,IAAAD,CAAAA,CAAAA,CAAM,OAAAjB,CAAAA,CAAQ,CAAI,CAAA,MAAM0C,CAAW,CAAA,UAAA,CAAWC,CAAY,CAAA,CAK1E,GAHKrC,CAAAA,EAAS,wBAAwBqB,CAAK,CAAA,MAAA,CAAO,CAAGV,CAAAA,CAAI,CAGrDU,CAAAA,CAAAA,CAAK,MAAW,GAAA,CAAA,CAClB,OAGF,KACEA,CAAAA,CAAK,MAAS,CAAA,CAAA,EACd,MAAO,CAAA,MAAA,CAAOA,CAAKA,CAAAA,CAAAA,CAAK,OAAS,CAAC,CAAC,CAAE,CAAA,KAAA,CAAMkB,CAAkB,CAAA,EAE7DlB,CAAK,CAAA,GAAA,EAGP,CAAA,IAAMmB,CAAgBxC,CAAAA,CAAAA,EAAS,sBAAyBN,CAAAA,CAAAA,CAAUkB,CAE5Df,CAAAA,CAAAA,CAAU4C,EAA8BD,CAAa,CAAA,CAErDL,CAA8Bd,CAAAA,CAAAA,CACjC,MAAQf,CAAAA,CAAAA,EACFuB,CAME,CAAA,CALS,OAAO,MAAOvB,CAAAA,CAAG,CAAE,CAAA,KAAA,CACjCuB,CAAmB,GAAA,QAAA,CACfU,CACCN,CAAAA,CAAAA,EAAUA,IAAU,EAC3B,CAAA,CAL4B,CAO7B,CAAA,CAAA,CACA,GAAK3B,CAAAA,CAAAA,EAAQ,CACZ,IAAMoC,CAAYC,CAAAA,cAAAA,CAAQrC,CAAMsC,CAAAA,CAAAA,EAAQ/C,CAAQ+C,CAAAA,CAAG,CAAC,CAAA,CACpD,OAAOC,gBAAUH,CAAAA,CAAAA,CAAYT,CAAW,GAAA,CACtC,KAAOD,CAAAA,CAAAA,CAAUC,CAAK,CACxB,EAAE,CACJ,CAAC,CAECa,CAAAA,CAAAA,CAEJ,OAAI9C,CAAAA,EAAS,sBACX8C,GAAAA,CAAAA,CAAW,CACT,UAAY,CAAA,CAACnC,CAAI,CACnB,CAIK,CAAA,CAAA,CACL,CAFgB,QAEN,EAAG,CACX,OAAAd,CAAAA,CAAAA,CACA,IAAAsC,CAAAA,CAAAA,CACA,QAAAW,CAAAA,CACF,CACF,CACF,CAAA,MAAS3B,CAAO,CAAA,CACd,MAAQ,OAAA,CAAA,GAAA,CAAI,oBAAsBA,CAAAA,CAAK,EACjCA,CACR,CACF,CAEA,SAASsB,CAA8BM,CAAAA,CAAAA,CAA4B,CACjE,IAAMC,EAAiC,EAAC,CAClCrD,CAAmB,CAAA,EACzB,CAAA,IAAA,GAAW,CAACiD,CAAAA,CAAKX,CAAK,CAAA,GAAK,MAAO,CAAA,OAAA,CAAQc,CAAM,CAAA,CAAG,CACjD,IAAME,EAAahB,CAAO,EAAA,QAAA,EAAW,CAAA,OAAA,CAAQ,GAAK,CAAA,EAAE,CAChDgB,CAAAA,CAAAA,EAAcD,EAAOf,CAAK,CAAA,EAC5BtC,CAAOiD,CAAAA,CAAG,CAAI,CAAA,CAAA,EAAGK,CAAU,CAAA,CAAA,EAAID,EAAOf,CAAK,CAAC,CAC5Ce,CAAAA,CAAAA,CAAAA,CAAOf,CAAK,CAAA,EAAA,GAEZtC,CAAOiD,CAAAA,CAAG,CAAIK,CAAAA,CAAAA,CACdD,CAAOf,CAAAA,CAAK,CAAI,CAAA,CAAA,EAEpB,CAEA,OAAOtC,CACT,CAEA,IAAM4C,CAAsBN,CAAAA,CAAAA,EAC1BA,CAAU,GAAA,IAAA,EAAS,OAAOA,CAAAA,EAAU,UAAYA,CAAM,CAAA,IAAA,EAAW,GAAA,EAAA,CChIvDiB,IAAAA,CAAAA,CAAAA,CAAAA,CAAAA,GACVA,CAAA,CAAA,GAAA,CAAM,MACNA,CAAA,CAAA,GAAA,CAAM,KACNA,CAAAA,CAAAA,CAAA,GAAM,CAAA,KAAA,CAHIA,CAAA,CAAA,EAAA,CAAA,EAAA,EAAA,CAAA,CAoBCC,EAAqB,CAChCC,CAAAA,CACApD,CACG,GAAA,CACH,GAAI,MAAA,CAAO,MAAOkD,CAAAA,CAAe,EAAE,QAASE,CAAAA,CAA0B,CACpE,CAAA,MAAM,IAAI,KAAA,CACR,CAAGA,EAAAA,CAAO,sEACZ,CAGF,CAAA,OAAOC,uBAAUD,CAAAA,CAAAA,CAAS,WAAazB,CAAAA,CAAAA,CAAa3B,CAAO,CAC7D,EAEasD,CAAkB3B,CAAAA","file":"index.cjs","sourcesContent":["import stream from 'stream'\n\nexport const ROWS_TO_SEARCH_FOR_HEADER = 10\n\ninterface DefaultOptions {\n algorithm: 'default'\n rowsToSearch?: number\n}\n\ninterface ExplicitHeadersOptions {\n algorithm: 'explicitHeaders'\n headers: string[]\n skip?: number\n}\n\ninterface SpecificRowsOptions {\n algorithm: 'specificRows'\n rowNumbers: number[]\n skip?: number\n}\n\ninterface DataRowAndSubHeaderDetectionOptions {\n algorithm: 'dataRowAndSubHeaderDetection'\n rowsToSearch?: number\n}\n\ninterface NewfangledOptions {\n algorithm: 'newfangled'\n}\n\nexport type GetHeadersOptions =\n | DefaultOptions\n | ExplicitHeadersOptions\n | SpecificRowsOptions\n | DataRowAndSubHeaderDetectionOptions\n | NewfangledOptions\n\ninterface GetHeadersResult {\n header: string[]\n skip: number\n letters: string[]\n}\n\nexport const indexToLetters = (index: number): string => {\n const letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'\n let result = ''\n\n while (index >= 0) {\n result = letters[index % 26] + result\n index = Math.floor(index / 26) - 1\n }\n\n return result\n}\n\nexport const headersToLetters = (headers: string[]): string[] => {\n return headers.map((_, index) => indexToLetters(index))\n}\n\n// Takes a datastream (representing a CSV) and returns the header row and the number of rows to skip\nexport abstract class Headerizer {\n constructor() {}\n abstract getHeaders(dataStream: stream.Readable): Promise<GetHeadersResult>\n\n static create(options: GetHeadersOptions): Headerizer {\n switch (options.algorithm) {\n case 'explicitHeaders':\n return new ExplicitHeaders(options)\n case 'specificRows':\n return new SpecificRows(options)\n case 'dataRowAndSubHeaderDetection':\n return new DataRowAndSubHeaderDetection(options)\n case 'newfangled':\n throw new Error('Not implemented')\n default:\n return new OriginalDetector(options)\n }\n }\n}\n\nexport const countNonEmptyCells = (row: string[]): number => {\n return row.filter((cell) => `${cell}`.trim() !== '').length\n}\n\nexport const likelyContainsData = (row: string[]): boolean => {\n return row.some(\n (cell) =>\n cell === null ||\n cell.trim() === '' ||\n !isNaN(Number(cell.trim())) ||\n cell.trim().toLowerCase() === 'true' ||\n cell.trim().toLowerCase() === 'false'\n )\n}\n\n// This is the original / default implementation of detectHeader.\n// It looks at the first `rowsToSearch` rows and takes the row\n// with the most non-empty cells as the header, preferring the earliest\n// such row in the case of a tie.\nclass OriginalDetector extends Headerizer {\n private rowsToSearch: number\n\n constructor(private options: DefaultOptions) {\n super()\n this.rowsToSearch = options.rowsToSearch || ROWS_TO_SEARCH_FOR_HEADER\n }\n\n async getHeaders(dataStream: stream.Readable): Promise<GetHeadersResult> {\n let currentRow = 0\n let skip = 0\n let header: string[] = []\n let letters: string[] = []\n\n // This is the original implementation of detectHeader\n const detector = new stream.Writable({\n objectMode: true,\n write: (row, encoding, callback) => {\n currentRow++\n if (currentRow >= this.rowsToSearch) {\n dataStream.destroy()\n }\n if (countNonEmptyCells(row) > countNonEmptyCells(header)) {\n header = row\n skip = currentRow\n letters = headersToLetters(header)\n }\n callback()\n },\n })\n\n dataStream.pipe(detector, { end: true })\n\n return new Promise((resolve, reject) => {\n detector.on('finish', () => {\n resolve({ header, skip, letters })\n })\n dataStream.on('close', () => {\n resolve({ header, skip, letters })\n })\n dataStream.on('error', (error) => {\n reject(error)\n })\n })\n }\n}\n\n// This implementation simply returns an explicit list of headers\n// it was provided with.\nclass ExplicitHeaders extends Headerizer {\n headers: string[]\n constructor(private readonly options: ExplicitHeadersOptions) {\n super()\n\n if (!options.headers || options.headers.length === 0) {\n throw new Error('ExplicitHeaders requires at least one header')\n }\n }\n\n async getHeaders(dataStream: stream.Readable): Promise<GetHeadersResult> {\n const letters = headersToLetters(this.options.headers)\n return {\n header: this.options.headers,\n skip: this.options.skip || 0,\n letters,\n }\n }\n}\n\n// This implementation looks at specific rows and combines them into a single header.\n// For example, if you knew that the header was in the third row, you could pass it\n// { rowNumbers: [2] }\nclass SpecificRows extends Headerizer {\n constructor(private readonly options: SpecificRowsOptions) {\n super()\n\n if (!options.rowNumbers || options.rowNumbers.length === 0) {\n throw new Error('SpecificRows requires at least one row number')\n }\n }\n\n async getHeaders(dataStream: stream.Readable): Promise<GetHeadersResult> {\n let currentRow = 0\n let maxRow = Math.max(...this.options.rowNumbers)\n let header: string[] = []\n let letters: string[] = []\n\n const detector = new stream.Writable({\n objectMode: true,\n write: (row, encoding, callback) => {\n if (currentRow > maxRow) {\n dataStream.destroy()\n } else if (this.options.rowNumbers.includes(currentRow)) {\n if (header.length === 0) {\n // This is the first header row we've seen, so just remember it\n header = row\n letters = headersToLetters(header)\n } else {\n for (let i = 0; i < header.length; i++) {\n if (header[i] === '') {\n header[i] = row[i].trim()\n } else {\n header[i] = `${header[i].trim()} ${row[i].trim()}`\n }\n letters[i] = indexToLetters(i)\n }\n }\n }\n currentRow++\n callback()\n },\n })\n\n dataStream.pipe(detector, { end: true })\n\n // If we have an explicit skip, use it, otherwise skip past the last header row\n const skip = this.options.skip ?? maxRow + 1\n\n // TODO: this logic is duplicated, factor it out?\n return new Promise((resolve, reject) => {\n detector.on('finish', () => {\n resolve({ header, skip, letters })\n })\n dataStream.on('close', () => {\n resolve({ header, skip, letters })\n })\n dataStream.on('error', (error) => {\n reject(error)\n })\n })\n }\n}\n\n// This implementation attempts to detect the first data row and select the previous\n// row as the header. If the data row cannot be detected due to all of the sample\n// rows being full and not castable to a number or boolean type, it also will attempt\n// to detect a sub header row by checking following rows after a header is detected\n// for significant fuzzy matching. If over half of the fields in a possible sub header\n// row fuzzy match with the originally detected header row, the sub header row becomes\n// the new header.\nclass DataRowAndSubHeaderDetection extends Headerizer {\n private rowsToSearch: number\n\n constructor(private options: DataRowAndSubHeaderDetectionOptions) {\n super()\n this.rowsToSearch = options.rowsToSearch || ROWS_TO_SEARCH_FOR_HEADER\n }\n\n async getHeaders(dataStream: stream.Readable): Promise<GetHeadersResult> {\n let currentRow = 0\n let skip = 0\n let header: string[] = []\n const rows: string[][] = []\n let letters: string[] = []\n\n // This is the original implementation of detectHeader\n const detector = new stream.Writable({\n objectMode: true,\n write: (row, encoding, callback) => {\n currentRow++\n if (currentRow >= this.rowsToSearch) {\n dataStream.destroy()\n }\n rows.push(row)\n\n if (countNonEmptyCells(row) > countNonEmptyCells(header)) {\n header = row\n skip = currentRow\n letters = headersToLetters(header)\n }\n // check if row has numeric, boolean, or empty values\n if (likelyContainsData(row)) {\n // if so, check if the row before is as long as the current header and only contains strings\n const previousRow = rows[rows.length - 2]\n if (\n previousRow &&\n countNonEmptyCells(header) === countNonEmptyCells(previousRow) &&\n !likelyContainsData(previousRow)\n ) {\n // if it is, make it the header\n header = previousRow\n skip = currentRow - 1\n letters = headersToLetters(header)\n }\n }\n\n callback()\n },\n })\n\n dataStream.pipe(detector, { end: true })\n\n await new Promise<void>((resolve, reject) => {\n detector.on('finish', () => {\n resolve()\n })\n dataStream.on('close', () => {\n resolve()\n })\n dataStream.on('error', (error) => {\n reject(error)\n })\n })\n\n let fuzzyHeader: string[] | undefined\n let fuzzySkip: number | undefined\n // check if any rows after the header fuzzy match with the\n // chosen header, indicating it's a sub header\n for (let i = skip; i < rows.length; i++) {\n const row = rows[i]\n if (countNonEmptyCells(header) === countNonEmptyCells(row)) {\n const fuzzyMatches = header.filter((cell, index) => {\n const rowCell = row[index]?.trim() ?? ''\n return rowCell\n .split(/\\s+/)\n .every((word) => cell.toLowerCase().includes(word.toLowerCase()))\n })\n\n if (fuzzyMatches.length / header.length > 0.5) {\n fuzzyHeader = row\n fuzzySkip = i + 1\n letters = headersToLetters(fuzzyHeader)\n }\n }\n }\n\n return { header: fuzzyHeader ?? header, skip: fuzzySkip ?? skip, letters }\n }\n}\n","import { Flatfile } from '@flatfile/api'\nimport { WorkbookCapture } from '@flatfile/util-extractor'\nimport Papa, { ParseResult } from 'papaparse'\nimport { mapKeys, mapValues } from 'remeda'\nimport { Readable } from 'stream'\nimport { DelimiterOptions } from '.'\nimport { Headerizer } from './header.detection'\n\ntype ParseBufferOptions = Omit<DelimiterOptions, 'chunkSize' | 'parallel'> & {\n readonly headerSelectionEnabled?: boolean\n}\n\nexport async function parseBuffer(\n buffer: Buffer,\n options: ParseBufferOptions\n): Promise<WorkbookCapture> {\n try {\n const skipEmptyLines = options?.headerSelectionEnabled\n ? false\n : (options?.skipEmptyLines ?? false)\n const fileContents = buffer.toString('utf8')\n const results: ParseResult<Record<string, string>> = Papa.parse(\n fileContents,\n {\n delimiter: options.delimiter,\n delimitersToGuess: options.guessDelimiters || [\n ',',\n '|',\n '\\t',\n ';',\n ':',\n '~',\n '^',\n '#',\n ],\n dynamicTyping: options?.dynamicTyping || false,\n header: false,\n skipEmptyLines,\n }\n )\n\n let rows = results.data\n if (!rows || !rows.length) {\n console.log('No data found in the file')\n return {} as WorkbookCapture\n }\n const transform = options?.transform || ((value) => value)\n\n const extractValues = (data: Record<string, any>[]) =>\n data.map((row) => Object.values(row).filter((value) => value !== null))\n const headerizer = Headerizer.create(\n options.headerDetectionOptions || {\n algorithm: 'default',\n }\n )\n const headerStream = Readable.from(extractValues(rows))\n const { header, skip, letters } = await headerizer.getHeaders(headerStream)\n\n if (!options?.headerSelectionEnabled) rows.splice(0, skip)\n\n // return if there are no rows\n if (rows.length === 0) {\n return\n }\n\n while (\n rows.length > 0 &&\n Object.values(rows[rows.length - 1]).every(isNullOrWhitespace)\n ) {\n rows.pop()\n }\n\n const columnHeaders = options?.headerSelectionEnabled ? letters : header\n\n const headers = prependNonUniqueHeaderColumns(columnHeaders)\n\n const data: Flatfile.RecordData[] = rows\n .filter((row) => {\n if (!skipEmptyLines) return true\n const isEmpty = Object.values(row).every(\n skipEmptyLines === 'greedy'\n ? isNullOrWhitespace\n : (value) => value === ''\n )\n return !isEmpty\n })\n .map((row) => {\n const mappedRow = mapKeys(row, (key) => headers[key])\n return mapValues(mappedRow, (value) => ({\n value: transform(value),\n })) as Flatfile.RecordData\n })\n\n let metadata: { rowHeaders: number[] } | null\n\n if (options?.headerSelectionEnabled) {\n metadata = {\n rowHeaders: [skip],\n }\n }\n\n const sheetName = 'Sheet1'\n return {\n [sheetName]: {\n headers,\n data,\n metadata,\n },\n } as WorkbookCapture\n } catch (error) {\n console.log('An error occurred:', error)\n throw error\n }\n}\n\nfunction prependNonUniqueHeaderColumns(record: string[]): string[] {\n const counts: Record<string, number> = {}\n const result: string[] = []\n for (const [key, value] of Object.entries(record)) {\n const cleanValue = value?.toString().replace('*', '')\n if (cleanValue && counts[value]) {\n result[key] = `${cleanValue}_${counts[value]}`\n counts[value]++\n } else {\n result[key] = cleanValue\n counts[value] = 1\n }\n }\n\n return result\n}\n\nconst isNullOrWhitespace = (value: any) =>\n value === null || (typeof value === 'string' && value.trim() === '')\n","import { Flatfile } from '@flatfile/api'\nimport { Extractor } from '@flatfile/util-extractor'\nimport { GetHeadersOptions } from './header.detection'\nimport { parseBuffer } from './parser'\n\nexport enum NativeFileTypes {\n CSV = 'csv',\n TSV = 'tsv',\n PSV = 'psv',\n}\n\nexport type Delimiters = ',' | '|' | '\\t' | ';' | ':' | '~' | '^' | '#'\n\nexport interface DelimiterOptions {\n readonly delimiter?: Delimiters\n readonly guessDelimiters?: Delimiters[]\n readonly dynamicTyping?: boolean\n readonly skipEmptyLines?: boolean | 'greedy'\n readonly transform?: (value: any) => Flatfile.CellValueUnion\n readonly chunkSize?: number\n readonly parallel?: number\n readonly headerDetectionOptions?: GetHeadersOptions\n readonly debug?: boolean\n}\n\nexport const DelimiterExtractor = (\n fileExt: string,\n options: DelimiterOptions\n) => {\n if (Object.values(NativeFileTypes).includes(fileExt as NativeFileTypes)) {\n throw new Error(\n `${fileExt} is a native file type and not supported by the delimiter extractor.`\n )\n }\n\n return Extractor(fileExt, 'delimiter', parseBuffer, options)\n}\n\nexport const delimiterParser = parseBuffer\n"]}