@flatfile/util-extractor
Version:
A library containing common utilities and helpers for extractors.
1 lines • 15.1 kB
Source Map (JSON)
{"version":3,"sources":["../src/index.ts"],"names":["api","FlatfileClient","WORKBOOK_CREATION_DELAY","Extractor","fileExt","extractorType","parseBuffer","options","listener","event","fileId","spaceId","file","jobs","Flatfile","jobId","chunkSize","parallel","debug","tick","progress","info","entitlements","headerSelectionEnabled","e","sourceEditorEnabled","buffer","getFileBuffer","getHeaders","data","headers","capture","workbook","createWorkbook","resolve","sheet","createAllRecords","normalizeRecordKeys","_progress","part","totalParts","updateSheetMetadata","environmentId","workbookCapture","workbookConfig","getWorkbookConfig","name","sheets","sheetName","getSheetConfig","descriptions","normalizeSheetConfig","slugify","keysToFields","normalizeKey","key","record","normalizedRecord","keys","metadata"],"mappings":";;;;;;IAQMA,CAAM,CAAA,IAAIC,kBAEVC,CAAAA,CAAAA,CAA0B,IAEnBC,CAAY,CAAA,CACvBC,EACAC,CACAC,CAAAA,CAAAA,CAIAC,IAEQC,CAA+B,EAAA,CACrCA,EAAS,EAAG,CAAA,cAAA,CAAgB,MAAOC,CAAU,EAAA,CAC3C,GAAM,CAAE,MAAA,CAAAC,EAAQ,OAAAC,CAAAA,CAAQ,CAAIF,CAAAA,CAAAA,CAAM,QAC5B,CAAE,IAAA,CAAMG,CAAK,CAAI,CAAA,MAAMZ,EAAI,KAAM,CAAA,GAAA,CAAIU,CAAM,CAAA,CASjD,GARIE,CAAK,CAAA,IAAA,GAAS,UAId,OAAOR,CAAAA,EAAY,UAAY,CAACQ,CAAAA,CAAK,IAAK,CAAA,QAAA,CAASR,CAAO,CAI1DA,EAAAA,CAAAA,YAAmB,QAAU,CAACA,CAAAA,CAAQ,KAAKQ,CAAK,CAAA,IAAI,EACtD,OAEF,IAAMC,EAAO,MAAMb,CAAAA,CAAI,KAAK,MAAO,CAAA,CACjC,KAAMc,YAAS,CAAA,OAAA,CAAQ,IACvB,CAAA,SAAA,CAAW,kBAAkBT,CAAa,CAAA,CAAA,CAC1C,OAAQS,YAAS,CAAA,SAAA,CAAU,MAC3B,MAAQJ,CAAAA,CACV,CAAC,CAAA,CACD,MAAMV,CAAI,CAAA,IAAA,CAAK,QAAQa,CAAK,CAAA,IAAA,CAAK,EAAE,EACrC,CAAC,CACDL,CAAAA,CAAAA,CAAS,GACP,WACA,CAAA,CAAE,UAAW,CAAkBH,eAAAA,EAAAA,CAAa,EAAG,CAC/C,CAAA,MAAOI,GAAU,CACf,GAAM,CAAE,MAAAC,CAAAA,CAAAA,CAAQ,MAAAK,CAAO,CAAA,OAAA,CAAAJ,CAAQ,CAAIF,CAAAA,CAAAA,CAAM,OACnC,CAAA,CAAE,UAAAO,CAAW,CAAA,QAAA,CAAAC,EAAU,KAAAC,CAAAA,CAAM,EAAI,CACrC,SAAA,CAAW,GACX,CAAA,QAAA,CAAU,EACV,KAAO,CAAA,CAAA,CAAA,CACP,GAAGX,CACL,CAAA,CAEMY,EAAO,MAAOC,CAAAA,CAAkBC,CAAkB,GAAA,CACtD,MAAMrB,CAAI,CAAA,IAAA,CAAK,IAAIe,CAAO,CAAA,CAAE,SAAAK,CAAU,CAAA,IAAA,CAAAC,CAAK,CAAC,CAAA,CACxCH,GACF,OAAQ,CAAA,GAAA,CAAI,iBAAiBE,CAAQ,CAAA,QAAA,EAAWC,CAAI,CAAE,CAAA,EAE1D,CAEA,CAAA,GAAI,CACF,MAAMF,CAAAA,CAAK,EAAG,iCAAiC,CAAA,CAC/C,GAAM,CAAE,IAAA,CAAMP,CAAK,CAAA,CAAI,MAAMZ,CAAI,CAAA,KAAA,CAAM,IAAIU,CAAM,CAAA,CAE3C,CAAE,IAAMY,CAAAA,CAAa,CAAI,CAAA,MAAMtB,EAAI,YAAa,CAAA,IAAA,CAAK,CACzD,UAAYW,CAAAA,CACd,CAAC,CACKY,CAAAA,CAAAA,CAAyB,CAAC,CAACD,CAAAA,CAAa,KAC3CE,CAAMA,EAAAA,CAAAA,CAAE,MAAQ,iBACnB,CAAA,CAEMC,EAAsB,CAAC,CAACH,CAAa,CAAA,IAAA,CACxCE,GAAMA,CAAE,CAAA,GAAA,GAAQ,cACnB,CAEME,CAAAA,CAAAA,CAAS,MAAMC,4BAAclB,CAAAA,CAAK,CAGlCmB,CAAAA,CAAAA,CAAa,MAAOrB,CAAcsB,CAAAA,CAAAA,GAAqB,CAC3D,GAAI,CACF,GAAM,CAAE,IAAA,CAAMC,CAAQ,CAAA,CAAI,MAAM9B,CAAI,CAAA,KAAA,CAAM,aAAa,CACrD,OAAA,CAAS,CACP,GAAGO,CACL,EACA,IAAAsB,CAAAA,CACF,CAAC,CACD,CAAA,OAAOC,CACT,CAASN,MAAAA,CAAAA,CAAG,CACV,OAAQ,OAAA,CAAA,GAAA,CAAIA,CAAG,CAAA,CAAE,MAAO,IAAK,CAAC,EACvB,CAAE,IAAA,CAAM,EAAG,MAAQ,CAAA,EAAG,CAC/B,CACF,CAEA,CAAA,MAAML,EAAK,CAAG,CAAA,gCAAgC,EAC9C,IAAMY,CAAAA,CAAU,MAAMzB,CAAAA,CAAYoB,EAAQ,CACxC,GAAGnB,EACH,MAAAG,CAAAA,CAAAA,CACA,QAASE,CAAK,CAAA,GAAA,CACd,uBAAAW,CACA,CAAA,UAAA,CAAAK,CACF,CAAC,CAAA,CAED,MAAMT,CAAK,CAAA,CAAA,CAAG,mCAAmC,CACjD,CAAA,IAAMa,CAAW,CAAA,MAAMC,EACrBxB,CAAM,CAAA,OAAA,CAAQ,cACdG,CACAmB,CAAAA,CAAAA,CACAN,CACF,CAQA,CAAA,GAJA,MAAMzB,CAAAA,CAAI,MAAM,MAAOY,CAAAA,CAAAA,CAAK,GAAI,CAC9B,UAAA,CAAYoB,EAAS,EACvB,CAAC,CAEG,CAAA,CAACA,EAAS,MAAUA,EAAAA,CAAAA,CAAS,OAAO,MAAW,GAAA,CAAA,CACjD,MAAM,IAAI,KAAA,CAAM,6BAA6B,CAG/C,CAAA,MAAMb,EAAK,EAAI,CAAA,kCAAkC,EAEjD,MAAM,IAAI,QAASe,CAAY,EAAA,CAC7B,UAAWA,CAAAA,CAAAA,CAAShC,CAAuB,EAC7C,CAAC,EAED,IAAWiC,IAAAA,CAAAA,IAASH,EAAS,MACtBD,CAAAA,CAAAA,CAAQI,CAAM,CAAA,IAAI,GAGvB,MAAMC,2BAAAA,CACJD,EAAM,EACNJ,CAAAA,CAAAA,CAAQI,EAAM,IAAI,CAAA,CAAE,IAAK,CAAA,GAAA,CAAIE,CAAmB,CAChD,CAAA,MAAOC,EAAWC,CAAMC,CAAAA,CAAAA,GAAe,CACrC,MAAMrB,CAAAA,CACJ,KAAK,GAAI,CAAA,EAAA,CAAI,KAAK,KAAM,CAAA,EAAA,CAAK,IAAMoB,CAAOC,CAAAA,CAAAA,CAAW,CAAC,CACtD,CAAA,kCACF,EACF,CACF,EAIEjB,CACF,EAAA,MAAMkB,EAAoBT,CAAUD,CAAAA,CAAO,EAG7C,MAAM/B,CAAAA,CAAI,KAAM,CAAA,MAAA,CAAOY,EAAK,EAAI,CAAA,CAC9B,OAAQ,UACV,CAAC,EACD,MAAMZ,CAAAA,CAAI,IAAK,CAAA,QAAA,CAASe,EAAO,CAC7B,IAAA,CAAM,gDACN,OAAS,CAAA,CACP,QAAS,kCACX,CACF,CAAC,EACH,CAAA,MAASS,EAAG,CACNN,CAAAA,EACF,QAAQ,GAAI,CAAA,CAAA,iBAAA,EAAoBM,EAAE,OAAO,CAAA,CAAE,CAE7C,CAAA,MAAMxB,EAAI,KAAM,CAAA,MAAA,CAAOU,EAAQ,CAC7B,MAAA,CAAQ,QACV,CAAC,CAAA,CACD,MAAMV,CAAAA,CAAI,KAAK,IAAKe,CAAAA,CAAAA,CAAO,CACzB,IAAM,CAAA,qCAAA,CACN,QAAS,CACP,OAAA,CAASS,CAAE,CAAA,OACb,CACF,CAAC,EACH,CACF,CACF,EACF,EAGF,eAAeS,CAAAA,CACbS,EACA9B,CACA+B,CAAAA,CAAAA,CACAlB,EAC4B,CAC5B,IAAMmB,EAAiBC,CACrBjC,CAAAA,CAAAA,CAAK,KACLA,CAAK,CAAA,OAAA,CACL8B,CACAC,CAAAA,CAAAA,CACAlB,CACF,CACM,CAAA,CAAE,KAAMO,CAAS,CAAA,CAAI,MAAMhC,CAAI,CAAA,SAAA,CAAU,MAAO4C,CAAAA,CAAc,EACpE,OAAOZ,CACT,CAEA,SAASa,CAAAA,CACPC,EACAnC,CACA+B,CAAAA,CAAAA,CACAC,CACAlB,CAAAA,CAAAA,CAC+B,CAC/B,IAAMsB,CAAAA,CAAS,OAAO,OAAQJ,CAAAA,CAAe,EAAE,GAAI,CAAA,CAAC,CAACK,CAAWb,CAAAA,CAAK,IAC5Dc,CAAeD,CAAAA,CAAAA,CAAWb,EAAOV,CAAmB,CAC5D,EAED,OAAO,CACL,IAAM,CAAA,CAAA,OAAA,EAAUqB,CAAI,CACpB,CAAA,CAAA,MAAA,CAAQ,CAAC,MAAM,CAAA,CACf,QAAAnC,CACA,CAAA,aAAA,CAAA+B,CACA,CAAA,MAAA,CAAAK,EACA,UAAY,CAAA,CAACjC,aAAS,kBAAmB,CAAA,mBAAmB,CAC9D,CACF,CAEA,SAASmC,CAAAA,CACPH,EACA,CAAE,OAAA,CAAAhB,EAAS,YAAAoB,CAAAA,CAAa,EACxBzB,CACsB,CAAA,CAEtB,OAAO0B,+BAAqB,CAAA,CAC1B,KAAAL,CACA,CAAA,IAAA,CAAMM,mBAAQN,CAAI,CAAA,CAClB,OAAQO,CAAa,CAAA,CAAE,IAAMvB,CAAAA,CAAAA,CAAS,aAAAoB,CAAa,CAAC,EACpD,qBAAuBzB,CAAAA,CACzB,CAAC,CACH,CAEA,SAAS6B,CAAAA,CAAaC,EAAqB,CACzC,OAAOA,EAAI,IAAK,EAAA,CAAE,QAAQ,IAAM,CAAA,WAAW,CAAE,CAAA,OAAA,CAAQ,MAAO,UAAU,CACxE,CAEA,SAASlB,CAAAA,CAAoBmB,EAAkD,CAC7E,IAAMC,EAAmB,EAAC,CAC1B,QAAWF,CAAOC,IAAAA,CAAAA,CACZA,EAAO,cAAeD,CAAAA,CAAG,IAC3BE,CAAiBH,CAAAA,CAAAA,CAAaC,CAAG,CAAC,EAAIC,CAAOD,CAAAA,CAAG,GAGpD,OAAOE,CACT,CAEO,SAASJ,CAAAA,CAAa,CAC3B,IAAA,CAAAK,EACA,YAAAR,CAAAA,CAAAA,CAAe,EACjB,CAAA,CAGwB,CACtB,OAAOQ,CAAAA,CAAK,GAAKH,CAAAA,CAAAA,GACVA,IAAKA,CAAM,CAAA,EAAA,CAAA,CACZ,OAAOA,CAAQ,EAAA,QAAA,GACjBA,EAAM,MAAOA,CAAAA,CAAG,GAElBA,CAAMA,CAAAA,CAAAA,CAAI,MAEH,CAAA,CACL,IAAAA,CACA,CAAA,KAAA,CAAOA,EACP,WAAaL,CAAAA,CAAAA,GAAeK,CAAG,CAAA,EAAK,GACpC,IAAM,CAAA,QACR,EACD,CACH,CAEA,eAAed,CACbT,CAAAA,CAAAA,CACAW,EACe,CACf,MAAM,QAAQ,GACZX,CAAAA,CAAAA,CAAS,OAAO,GAAI,CAAA,MAAOG,GAAU,CACnC,GAAM,CAAE,QAAA,CAAAwB,CAAS,CAAIhB,CAAAA,CAAAA,CAAgBR,EAAM,IAAI,CAAA,CAC/C,MAAMnC,CAAI,CAAA,MAAA,CAAO,YAAYmC,CAAM,CAAA,EAAA,CAAI,CACrC,QAAAwB,CAAAA,CACF,CAAC,EACH,CAAC,CACH,EACF","file":"index.cjs","sourcesContent":["import { Flatfile, FlatfileClient } from '@flatfile/api'\nimport type { FlatfileListener } from '@flatfile/listener'\nimport {\n createAllRecords,\n slugify,\n normalizeSheetConfig,\n} from '@flatfile/util-common'\nimport { getFileBuffer } from '@flatfile/util-file-buffer'\nconst api = new FlatfileClient()\n\nconst WORKBOOK_CREATION_DELAY = 3_000\n\nexport const Extractor = (\n fileExt: string | RegExp,\n extractorType: string,\n parseBuffer: (\n buffer: Buffer,\n options: any\n ) => WorkbookCapture | Promise<WorkbookCapture>,\n options?: Record<string, any>\n) => {\n return (listener: FlatfileListener) => {\n listener.on('file:created', async (event) => {\n const { fileId, spaceId } = event.context\n const { data: file } = await api.files.get(fileId)\n if (file.mode === 'export') {\n return\n }\n\n if (typeof fileExt === 'string' && !file.name.endsWith(fileExt)) {\n return\n }\n\n if (fileExt instanceof RegExp && !fileExt.test(file.name)) {\n return\n }\n const jobs = await api.jobs.create({\n type: Flatfile.JobType.File,\n operation: `extract-plugin-${extractorType}`,\n status: Flatfile.JobStatus.Ready,\n source: fileId,\n })\n await api.jobs.execute(jobs.data.id)\n })\n listener.on(\n 'job:ready',\n { operation: `extract-plugin-${extractorType}` },\n async (event) => {\n const { fileId, jobId, spaceId } = event.context\n const { chunkSize, parallel, debug } = {\n chunkSize: 5_000,\n parallel: 1,\n debug: false,\n ...options,\n }\n\n const tick = async (progress: number, info?: string) => {\n await api.jobs.ack(jobId, { progress, info })\n if (debug) {\n console.log(`Job progress: ${progress}, Info: ${info}`)\n }\n }\n\n try {\n await tick(1, 'plugins.extraction.retrieveFile')\n const { data: file } = await api.files.get(fileId)\n\n const { data: entitlements } = await api.entitlements.list({\n resourceId: spaceId,\n })\n const headerSelectionEnabled = !!entitlements.find(\n (e) => e.key === 'headerSelection'\n )\n\n const sourceEditorEnabled = !!entitlements.find(\n (e) => e.key === 'sourceEditor'\n )\n\n const buffer = await getFileBuffer(event)\n\n // inject the getHeaders function into the options\n const getHeaders = async (options: any, data: string[][]) => {\n try {\n const { data: headers } = await api.files.detectHeader({\n options: {\n ...options,\n },\n data,\n })\n return headers\n } catch (e) {\n console.dir(e, { depth: null })\n return { skip: 0, header: [] }\n }\n }\n\n await tick(3, 'plugins.extraction.parseSheets')\n const capture = await parseBuffer(buffer, {\n ...options,\n fileId,\n fileExt: file.ext,\n headerSelectionEnabled,\n getHeaders,\n })\n\n await tick(5, 'plugins.extraction.createWorkbook')\n const workbook = await createWorkbook(\n event.context.environmentId,\n file,\n capture,\n sourceEditorEnabled\n )\n\n // Add workbook to file so if the extraction fails and the file is deleted, the workbook is also deleted\n // instead of being orphaned\n await api.files.update(file.id, {\n workbookId: workbook.id,\n })\n\n if (!workbook.sheets || workbook.sheets.length === 0) {\n throw new Error('plugins.extraction.noSheets')\n }\n\n await tick(10, 'plugins.extraction.addingRecords')\n\n await new Promise((resolve) => {\n setTimeout(resolve, WORKBOOK_CREATION_DELAY)\n })\n\n for (const sheet of workbook.sheets) {\n if (!capture[sheet.name]) {\n continue\n }\n await createAllRecords(\n sheet.id,\n capture[sheet.name].data.map(normalizeRecordKeys),\n async (_progress, part, totalParts) => {\n await tick(\n Math.min(99, Math.round(10 + 90 * (part / totalParts))),\n 'plugins.extraction.addingRecords'\n )\n }\n )\n }\n\n // After all records are added, update the sheet metadata\n if (headerSelectionEnabled) {\n await updateSheetMetadata(workbook, capture)\n }\n\n await api.files.update(file.id, {\n status: 'complete',\n })\n await api.jobs.complete(jobId, {\n info: 'files.uploadFile.popovers.extractionCompleted',\n outcome: {\n message: 'plugins.extraction.extractedFile',\n },\n })\n } catch (e) {\n if (debug) {\n console.log(`Extractor error: ${e.message}`)\n }\n await api.files.update(fileId, {\n status: 'failed',\n })\n await api.jobs.fail(jobId, {\n info: 'plugins.extraction.extractionFailed',\n outcome: {\n message: e.message,\n },\n })\n }\n }\n )\n }\n}\n\nasync function createWorkbook(\n environmentId: string,\n file: Flatfile.File_,\n workbookCapture: WorkbookCapture,\n sourceEditorEnabled: boolean\n): Promise<Flatfile.Workbook> {\n const workbookConfig = getWorkbookConfig(\n file.name,\n file.spaceId,\n environmentId,\n workbookCapture,\n sourceEditorEnabled\n )\n const { data: workbook } = await api.workbooks.create(workbookConfig)\n return workbook\n}\n\nfunction getWorkbookConfig(\n name: string,\n spaceId: string,\n environmentId: string,\n workbookCapture: WorkbookCapture,\n sourceEditorEnabled: boolean\n): Flatfile.CreateWorkbookConfig {\n const sheets = Object.entries(workbookCapture).map(([sheetName, sheet]) => {\n return getSheetConfig(sheetName, sheet, sourceEditorEnabled)\n })\n\n return {\n name: `[file] ${name}`,\n labels: ['file'], // we use this on the backend to add the EXTRACTED_FROM_SOURCE treatment to the workbook on previous versions of the plugin\n spaceId,\n environmentId,\n sheets,\n treatments: [Flatfile.WorkbookTreatments.ExtractedFromSource],\n }\n}\n\nfunction getSheetConfig(\n name: string,\n { headers, descriptions }: SheetCapture,\n sourceEditorEnabled: boolean\n): Flatfile.SheetConfig {\n // normalizeSheetConfig the keys to ensure they are unique and valid\n return normalizeSheetConfig({\n name,\n slug: slugify(name),\n fields: keysToFields({ keys: headers, descriptions }),\n allowAdditionalFields: sourceEditorEnabled,\n })\n}\n\nfunction normalizeKey(key: string): string {\n return key.trim().replace(/%/g, '_PERCENT_').replace(/\\$/g, '_DOLLAR_')\n}\n\nfunction normalizeRecordKeys(record: Flatfile.RecordData): Flatfile.RecordData {\n const normalizedRecord = {} as Flatfile.RecordData\n for (const key in record) {\n if (record.hasOwnProperty(key)) {\n normalizedRecord[normalizeKey(key)] = record[key]\n }\n }\n return normalizedRecord\n}\n\nexport function keysToFields({\n keys,\n descriptions = {},\n}: {\n keys: string[]\n descriptions?: Record<string, string>\n}): Flatfile.Property[] {\n return keys.map((key) => {\n if (!key) key = ''\n if (typeof key !== 'string') {\n key = String(key)\n }\n key = key.trim()\n\n return {\n key,\n label: key,\n description: descriptions?.[key] || '',\n type: 'string',\n }\n })\n}\n\nasync function updateSheetMetadata(\n workbook: Flatfile.Workbook,\n workbookCapture: WorkbookCapture\n): Promise<void> {\n await Promise.all(\n workbook.sheets.map(async (sheet) => {\n const { metadata } = workbookCapture[sheet.name]\n await api.sheets.updateSheet(sheet.id, {\n metadata,\n })\n })\n )\n}\n/**\n * Generic structure for capturing a workbook\n */\nexport type WorkbookCapture = Record<string, SheetCapture>\n\n/**\n * Generic structure for capturing a sheet\n */\nexport type SheetCapture = {\n headers: string[]\n descriptions?: Record<string, null | string> | null\n data: Flatfile.RecordData[]\n metadata?: { rowHeaders: number[] }\n}\n"]}