@flatfile/plugin-delimiter-extractor
Version:
A plugin for parsing .delimiter files in Flatfile.
10 lines (8 loc) • 4.53 kB
JavaScript
import { Extractor } from '@flatfile/util-extractor';
import L from 'papaparse';
import { mapKeys, mapValues } from 'remeda';
import E, { Readable } from 'stream';
var N=10,v=s=>{let t="ABCDEFGHIJKLMNOPQRSTUVWXYZ",e="";for(;s>=0;)e=t[s%26]+e,s=Math.floor(s/26)-1;return e},y=s=>s.map((t,e)=>v(e)),h=class{constructor(){}static create(t){switch(t.algorithm){case"explicitHeaders":return new k(t);case"specificRows":return new D(t);case"dataRowAndSubHeaderDetection":return new x(t);case"newfangled":throw new Error("Not implemented");default:return new S(t)}}},f=s=>s.filter(t=>`${t}`.trim()!=="").length,T=s=>s.some(t=>t===null||t.trim()===""||!isNaN(Number(t.trim()))||t.trim().toLowerCase()==="true"||t.trim().toLowerCase()==="false"),S=class extends h{constructor(e){super();this.options=e;this.rowsToSearch=e.rowsToSearch||N;}rowsToSearch;async getHeaders(e){let o=0,a=0,r=[],l=[],u=new E.Writable({objectMode:!0,write:(d,c,p)=>{o++,o>=this.rowsToSearch&&e.destroy(),f(d)>f(r)&&(r=d,a=o,l=y(r)),p();}});return e.pipe(u,{end:!0}),new Promise((d,c)=>{u.on("finish",()=>{d({header:r,skip:a,letters:l});}),e.on("close",()=>{d({header:r,skip:a,letters:l});}),e.on("error",p=>{c(p);});})}},k=class extends h{constructor(e){super();this.options=e;if(!e.headers||e.headers.length===0)throw new Error("ExplicitHeaders requires at least one header")}headers;async getHeaders(e){let o=y(this.options.headers);return {header:this.options.headers,skip:this.options.skip||0,letters:o}}},D=class extends h{constructor(e){super();this.options=e;if(!e.rowNumbers||e.rowNumbers.length===0)throw new Error("SpecificRows requires at least one row number")}async getHeaders(e){let o=0,a=Math.max(...this.options.rowNumbers),r=[],l=[],u=new E.Writable({objectMode:!0,write:(c,p,n)=>{if(o>a)e.destroy();else if(this.options.rowNumbers.includes(o))if(r.length===0)r=c,l=y(r);else for(let i=0;i<r.length;i++)r[i]===""?r[i]=c[i].trim():r[i]=`${r[i].trim()} ${c[i].trim()}`,l[i]=v(i);o++,n();}});e.pipe(u,{end:!0});let d=this.options.skip??a+1;return new Promise((c,p)=>{u.on("finish",()=>{c({header:r,skip:d,letters:l});}),e.on("close",()=>{c({header:r,skip:d,letters:l});}),e.on("error",n=>{p(n);});})}},x=class extends h{constructor(e){super();this.options=e;this.rowsToSearch=e.rowsToSearch||N;}rowsToSearch;async getHeaders(e){let o=0,a=0,r=[],l=[],u=[],d=new E.Writable({objectMode:!0,write:(n,i,b)=>{if(o++,o>=this.rowsToSearch&&e.destroy(),l.push(n),f(n)>f(r)&&(r=n,a=o,u=y(r)),T(n)){let m=l[l.length-2];m&&f(r)===f(m)&&!T(m)&&(r=m,a=o-1,u=y(r));}b();}});e.pipe(d,{end:!0}),await new Promise((n,i)=>{d.on("finish",()=>{n();}),e.on("close",()=>{n();}),e.on("error",b=>{i(b);});});let c,p;for(let n=a;n<l.length;n++){let i=l[n];f(r)===f(i)&&r.filter((m,O)=>(i[O]?.trim()??"").split(/\s+/).every(P=>m.toLowerCase().includes(P.toLowerCase()))).length/r.length>.5&&(c=i,p=n+1,u=y(c));}return {header:c??r,skip:p??a,letters:u}}};async function C(s,t){try{let e=t?.headerSelectionEnabled?!1:t?.skipEmptyLines??!1,o=s.toString("utf8"),r=L.parse(o,{delimiter:t.delimiter,delimitersToGuess:t.guessDelimiters||[",","|"," ",";",":","~","^","#"],dynamicTyping:t?.dynamicTyping||!1,header:!1,skipEmptyLines:e}).data;if(!r||!r.length)return console.log("No data found in the file"),{};let l=t?.transform||(g=>g),u=g=>g.map(R=>Object.values(R).filter(w=>w!==null)),d=h.create(t.headerDetectionOptions||{algorithm:"default"}),c=Readable.from(u(r)),{header:p,skip:n,letters:i}=await d.getHeaders(c);if(t?.headerSelectionEnabled||r.splice(0,n),r.length===0)return;for(;r.length>0&&Object.values(r[r.length-1]).every(z);)r.pop();let b=t?.headerSelectionEnabled?i:p,m=M(b),O=r.filter(g=>e?!Object.values(g).every(e==="greedy"?z:w=>w===""):!0).map(g=>{let R=mapKeys(g,w=>m[w]);return mapValues(R,w=>({value:l(w)}))}),H;return t?.headerSelectionEnabled&&(H={rowHeaders:[n]}),{["Sheet1"]:{headers:m,data:O,metadata:H}}}catch(e){throw console.log("An error occurred:",e),e}}function M(s){let t={},e=[];for(let[o,a]of Object.entries(s)){let r=a?.toString().replace("*","");r&&t[a]?(e[o]=`${r}_${t[a]}`,t[a]++):(e[o]=r,t[a]=1);}return e}var z=s=>s===null||typeof s=="string"&&s.trim()==="";var G=(o=>(o.CSV="csv",o.TSV="tsv",o.PSV="psv",o))(G||{}),X=(s,t)=>{if(Object.values(G).includes(s))throw new Error(`${s} is a native file type and not supported by the delimiter extractor.`);return Extractor(s,"delimiter",C,t)},Y=C;
export { X as DelimiterExtractor, G as NativeFileTypes, Y as delimiterParser };
//# sourceMappingURL=index.js.map
//# sourceMappingURL=index.js.map