UNPKG

@flatfile/util-extractor

Version:

A library containing common utilities and helpers for extractors.

1 lines 15.2 kB
{"version":3,"sources":["../src/index.ts"],"names":["api","FlatfileClient","WORKBOOK_CREATION_DELAY","Extractor","fileExt","extractorType","parseBuffer","options","listener","event","fileId","spaceId","file","jobs","Flatfile","jobId","chunkSize","parallel","debug","tick","progress","info","entitlements","headerSelectionEnabled","e","sourceEditorEnabled","customRecordsPageSize","buffer","getFileBuffer","getHeaders","data","headers","capture","workbook","createWorkbook","resolve","sheet","createAllRecords","normalizeRecordKeys","_progress","part","totalParts","updateSheetMetadata","environmentId","workbookCapture","workbookConfig","getWorkbookConfig","name","sheets","sheetName","getSheetConfig","descriptions","normalizeSheetConfig","slugify","keysToFields","record","normalizedRecord","key","normalizeKey","keys","metadata"],"mappings":";;;;;;AAUA,IAAMA,CAAM,CAAA,IAAIC,mBAEVC,CAA0B,CAAA,GAAA,CAEnBC,EAAY,CACvBC,CAAAA,CACAC,EACAC,CAIAC,CAAAA,CAAAA,GAEQC,GAA+B,CACrCA,CAAAA,CAAS,GAAG,cAAgB,CAAA,MAAOC,GAAU,CAC3C,GAAM,CAAE,MAAAC,CAAAA,CAAAA,CAAQ,OAAAC,CAAAA,CAAQ,EAAIF,CAAM,CAAA,OAAA,CAC5B,CAAE,IAAMG,CAAAA,CAAK,EAAI,MAAMZ,CAAAA,CAAI,KAAM,CAAA,GAAA,CAAIU,CAAM,CASjD,CAAA,GARIE,EAAK,IAAS,GAAA,QAAA,EAId,OAAOR,CAAY,EAAA,QAAA,EAAY,CAACQ,CAAAA,CAAK,KAAK,QAASR,CAAAA,CAAO,GAI1DA,CAAmB,YAAA,MAAA,EAAU,CAACA,CAAQ,CAAA,IAAA,CAAKQ,EAAK,IAAI,CAAA,CACtD,OAEF,IAAMC,CAAAA,CAAO,MAAMb,CAAI,CAAA,IAAA,CAAK,OAAO,CACjC,IAAA,CAAMc,YAAS,CAAA,OAAA,CAAQ,KACvB,SAAW,CAAA,CAAA,eAAA,EAAkBT,CAAa,CAC1C,CAAA,CAAA,MAAA,CAAQS,aAAS,SAAU,CAAA,KAAA,CAC3B,MAAQJ,CAAAA,CACV,CAAC,CACD,CAAA,MAAMV,EAAI,IAAK,CAAA,OAAA,CAAQa,EAAK,IAAK,CAAA,EAAE,EACrC,CAAC,EACDL,CAAS,CAAA,EAAA,CACP,YACA,CAAE,SAAA,CAAW,kBAAkBH,CAAa,CAAA,CAAG,EAC/C,MAAOI,CAAAA,EAAU,CACf,GAAM,CAAE,OAAAC,CAAQ,CAAA,KAAA,CAAAK,EAAO,OAAAJ,CAAAA,CAAQ,CAAIF,CAAAA,CAAAA,CAAM,QACnC,CAAE,SAAA,CAAAO,EAAW,QAAAC,CAAAA,CAAAA,CAAU,MAAAC,CAAM,CAAA,CAAI,CACrC,SAAA,CAAW,IACX,QAAU,CAAA,CAAA,CACV,MAAO,CACP,CAAA,CAAA,GAAGX,CACL,CAEMY,CAAAA,CAAAA,CAAO,MAAOC,CAAAA,CAAkBC,IAAkB,CACtD,MAAMrB,EAAI,IAAK,CAAA,GAAA,CAAIe,EAAO,CAAE,QAAA,CAAAK,EAAU,IAAAC,CAAAA,CAAK,CAAC,CACxCH,CAAAA,CAAAA,EACF,QAAQ,GAAI,CAAA,CAAA,cAAA,EAAiBE,CAAQ,CAAWC,QAAAA,EAAAA,CAAI,CAAE,CAAA,EAE1D,EAEA,GAAI,CACF,MAAMF,CAAK,CAAA,CAAA,CAAG,iCAAiC,CAC/C,CAAA,GAAM,CAAE,IAAA,CAAMP,CAAK,CAAI,CAAA,MAAMZ,EAAI,KAAM,CAAA,GAAA,CAAIU,CAAM,CAE3C,CAAA,CAAE,IAAMY,CAAAA,CAAa,EAAI,MAAMtB,CAAAA,CAAI,aAAa,IAAK,CAAA,CACzD,WAAYW,CACd,CAAC,EACKY,CAAyB,CAAA,CAAC,CAACD,CAAa,CAAA,IAAA,CAC3CE,GAAMA,CAAE,CAAA,GAAA,GAAQ,iBACnB,CAEMC,CAAAA,CAAAA,CAAsB,CAAC,CAACH,EAAa,IACxCE,CAAAA,CAAAA,EAAMA,EAAE,GAAQ,GAAA,cACnB,EAEME,CAAwBJ,CAAAA,CAAAA,CAAa,IACxCE,CAAAA,CAAAA,EAAMA,EAAE,GAAQ,GAAA,uBACnB,EAEMG,CAAS,CAAA,MAAMC,6BAAcnB,CAAK,CAAA,CAGlCoB,CAAa,CAAA,MAAOtB,EAAcuB,CAAqB,GAAA,CAC3D,GAAI,CACF,GAAM,CAAE,IAAMC,CAAAA,CAAQ,EAAI,MAAM/B,CAAAA,CAAI,MAAM,YAAa,CAAA,CACrD,QAAS,CACP,GAAGO,CACL,CACA,CAAA,IAAA,CAAAuB,CACF,CAAC,EACD,OAAOC,CACT,OAASP,CAAG,CAAA,CACV,eAAQ,GAAIA,CAAAA,CAAAA,CAAG,CAAE,KAAA,CAAO,IAAK,CAAC,CAAA,CACvB,CAAE,IAAM,CAAA,CAAA,CAAG,OAAQ,EAAG,CAC/B,CACF,EAEA,MAAML,CAAAA,CAAK,EAAG,gCAAgC,CAAA,CAC9C,IAAMa,CAAU,CAAA,MAAM1B,EAAYqB,CAAQ,CAAA,CACxC,GAAGpB,CACH,CAAA,MAAA,CAAAG,EACA,OAASE,CAAAA,CAAAA,CAAK,IACd,sBAAAW,CAAAA,CAAAA,CACA,UAAAM,CAAAA,CACF,CAAC,CAED,CAAA,MAAMV,EAAK,CAAG,CAAA,mCAAmC,EACjD,IAAMc,CAAAA,CAAW,MAAMC,CAAAA,CACrBzB,EAAM,OAAQ,CAAA,aAAA,CACdG,EACAoB,CACAP,CAAAA,CACF,EAQA,GAJA,MAAMzB,CAAI,CAAA,KAAA,CAAM,OAAOY,CAAK,CAAA,EAAA,CAAI,CAC9B,UAAYqB,CAAAA,CAAAA,CAAS,EACvB,CAAC,CAAA,CAEG,CAACA,CAAS,CAAA,MAAA,EAAUA,EAAS,MAAO,CAAA,MAAA,GAAW,EACjD,MAAM,IAAI,MAAM,6BAA6B,CAAA,CAG/C,MAAMd,CAAAA,CAAK,GAAI,kCAAkC,CAAA,CAEjD,MAAM,IAAI,OAAA,CAASgB,GAAY,CAC7B,UAAA,CAAWA,CAASjC,CAAAA,CAAuB,EAC7C,CAAC,CAAA,CAED,QAAWkC,CAASH,IAAAA,CAAAA,CAAS,OACtBD,CAAQI,CAAAA,CAAAA,CAAM,IAAI,CAAA,EAGvB,MAAMC,2BACJD,CAAAA,CAAAA,CAAM,GACNJ,CAAQI,CAAAA,CAAAA,CAAM,IAAI,CAAE,CAAA,IAAA,CAAK,IAAIE,CAAmB,CAAA,CAChD,MAAOC,CAAWC,CAAAA,CAAAA,CAAMC,IAAe,CACrC,MAAMtB,EACJ,IAAK,CAAA,GAAA,CAAI,EAAI,CAAA,IAAA,CAAK,MAAM,EAAK,CAAA,EAAA,EAAMqB,EAAOC,CAAW,CAAA,CAAC,EACtD,kCACF,EACF,CACA,CAAA,CACE,SAAUf,CAAuB,EAAA,QAAA,EAAU,QAC7C,CACF,CAAA,CAIEH,GACF,MAAMmB,CAAAA,CAAoBT,CAAUD,CAAAA,CAAO,EAG7C,MAAMhC,CAAAA,CAAI,MAAM,MAAOY,CAAAA,CAAAA,CAAK,GAAI,CAC9B,MAAA,CAAQ,UACV,CAAC,CAAA,CACD,MAAMZ,CAAI,CAAA,IAAA,CAAK,SAASe,CAAO,CAAA,CAC7B,KAAM,+CACN,CAAA,OAAA,CAAS,CACP,OAAA,CAAS,kCACX,CACF,CAAC,EACH,CAASS,MAAAA,CAAAA,CAAG,CACNN,CACF,EAAA,OAAA,CAAQ,GAAI,CAAA,CAAA,iBAAA,EAAoBM,EAAE,OAAO,CAAA,CAAE,EAE7C,MAAMxB,CAAAA,CAAI,MAAM,MAAOU,CAAAA,CAAAA,CAAQ,CAC7B,MAAA,CAAQ,QACV,CAAC,CAAA,CACD,MAAMV,CAAI,CAAA,IAAA,CAAK,KAAKe,CAAO,CAAA,CACzB,KAAM,qCACN,CAAA,OAAA,CAAS,CACP,OAASS,CAAAA,CAAAA,CAAE,OACb,CACF,CAAC,EACH,CACF,CACF,EACF,EAGF,eAAeU,CACbS,CAAAA,CAAAA,CACA/B,EACAgC,CACAnB,CAAAA,CAAAA,CAC4B,CAC5B,IAAMoB,CAAAA,CAAiBC,CACrBlC,CAAAA,CAAAA,CAAK,KACLA,CAAK,CAAA,OAAA,CACL+B,EACAC,CACAnB,CAAAA,CACF,EACM,CAAE,IAAA,CAAMQ,CAAS,CAAA,CAAI,MAAMjC,CAAI,CAAA,SAAA,CAAU,OAAO6C,CAAc,CAAA,CACpE,OAAOZ,CACT,CAEA,SAASa,CACPC,CAAAA,CAAAA,CACApC,EACAgC,CACAC,CAAAA,CAAAA,CACAnB,EAC+B,CAC/B,IAAMuB,EAAS,MAAO,CAAA,OAAA,CAAQJ,CAAe,CAAA,CAAE,IAAI,CAAC,CAACK,EAAWb,CAAK,CAAA,GAC5Dc,EAAeD,CAAWb,CAAAA,CAAAA,CAAOX,CAAmB,CAC5D,EAED,OAAO,CACL,KAAM,CAAUsB,OAAAA,EAAAA,CAAI,GACpB,MAAQ,CAAA,CAAC,MAAM,CAAA,CACf,QAAApC,CACA,CAAA,aAAA,CAAAgC,EACA,MAAAK,CAAAA,CAAAA,CACA,WAAY,CAAClC,YAAAA,CAAS,mBAAmB,mBAAmB,CAC9D,CACF,CAEA,SAASoC,EACPH,CACA,CAAA,CAAE,QAAAhB,CAAS,CAAA,YAAA,CAAAoB,CAAa,CAAA,CACxB1B,EACsB,CAEtB,OAAO2B,gCAAqB,CAC1B,IAAA,CAAAL,EACA,IAAMM,CAAAA,kBAAAA,CAAQN,CAAI,CAAA,CAClB,OAAQO,CAAa,CAAA,CAAE,KAAMvB,CAAS,CAAA,YAAA,CAAAoB,CAAa,CAAC,CAAA,CACpD,qBAAuB1B,CAAAA,CACzB,CAAC,CACH,CAEA,SAASa,CAAoBiB,CAAAA,CAAAA,CAAkD,CAC7E,IAAMC,CAAAA,CAAmB,EACzB,CAAA,IAAA,IAAWC,KAAOF,CACZ,CAAA,MAAA,CAAO,OAAOA,CAAQE,CAAAA,CAAG,IAC3BD,CAAiBE,CAAAA,uBAAAA,CAAaD,CAAG,CAAC,EAAIF,CAAOE,CAAAA,CAAG,GAGpD,OAAOD,CACT,CAEO,SAASF,CAAAA,CAAa,CAC3B,IAAA,CAAAK,EACA,YAAAR,CAAAA,CAAAA,CAAe,EACjB,CAAA,CAGwB,CACtB,OAAOQ,CAAAA,CAAK,GAAKF,CAAAA,CAAAA,GACVA,IAAKA,CAAM,CAAA,EAAA,CAAA,CACZ,OAAOA,CAAQ,EAAA,QAAA,GACjBA,EAAM,MAAOA,CAAAA,CAAG,GAElBA,CAAMA,CAAAA,CAAAA,CAAI,MAEH,CAAA,CACL,IAAAA,CACA,CAAA,KAAA,CAAOA,EACP,WAAaN,CAAAA,CAAAA,GAAeM,CAAG,CAAA,EAAK,GACpC,IAAM,CAAA,QACR,EACD,CACH,CAEA,eAAef,CACbT,CAAAA,CAAAA,CACAW,EACe,CACf,MAAM,QAAQ,GACZX,CAAAA,CAAAA,CAAS,OAAO,GAAI,CAAA,MAAOG,GAAU,CACnC,GAAM,CAAE,QAAA,CAAAwB,CAAS,CAAIhB,CAAAA,CAAAA,CAAgBR,EAAM,IAAI,CAAA,CAC/C,MAAMpC,CAAI,CAAA,MAAA,CAAO,YAAYoC,CAAM,CAAA,EAAA,CAAI,CACrC,QAAAwB,CAAAA,CACF,CAAC,EACH,CAAC,CACH,EACF","file":"index.browser.cjs","sourcesContent":["import { Flatfile, FlatfileClient } from '@flatfile/api'\nimport type { FlatfileListener } from '@flatfile/listener'\nimport {\n createAllRecords,\n normalizeKey,\n normalizeSheetConfig,\n slugify,\n} from '@flatfile/util-common'\nimport { getFileBuffer } from '@flatfile/util-file-buffer'\n\nconst api = new FlatfileClient()\n\nconst WORKBOOK_CREATION_DELAY = 3_000\n\nexport const Extractor = (\n fileExt: string | RegExp,\n extractorType: string,\n parseBuffer: (\n buffer: Buffer,\n options: any\n ) => WorkbookCapture | Promise<WorkbookCapture>,\n options?: Record<string, any>\n) => {\n return (listener: FlatfileListener) => {\n listener.on('file:created', async (event) => {\n const { fileId, spaceId } = event.context\n const { data: file } = await api.files.get(fileId)\n if (file.mode === 'export') {\n return\n }\n\n if (typeof fileExt === 'string' && !file.name.endsWith(fileExt)) {\n return\n }\n\n if (fileExt instanceof RegExp && !fileExt.test(file.name)) {\n return\n }\n const jobs = await api.jobs.create({\n type: Flatfile.JobType.File,\n operation: `extract-plugin-${extractorType}`,\n status: Flatfile.JobStatus.Ready,\n source: fileId,\n })\n await api.jobs.execute(jobs.data.id)\n })\n listener.on(\n 'job:ready',\n { operation: `extract-plugin-${extractorType}` },\n async (event) => {\n const { fileId, jobId, spaceId } = event.context\n const { chunkSize, parallel, debug } = {\n chunkSize: 5_000,\n parallel: 1,\n debug: false,\n ...options,\n }\n\n const tick = async (progress: number, info?: string) => {\n await api.jobs.ack(jobId, { progress, info })\n if (debug) {\n console.log(`Job progress: ${progress}, Info: ${info}`)\n }\n }\n\n try {\n await tick(1, 'plugins.extraction.retrieveFile')\n const { data: file } = await api.files.get(fileId)\n\n const { data: entitlements } = await api.entitlements.list({\n resourceId: spaceId,\n })\n const headerSelectionEnabled = !!entitlements.find(\n (e) => e.key === 'headerSelection'\n )\n\n const sourceEditorEnabled = !!entitlements.find(\n (e) => e.key === 'sourceEditor'\n )\n\n const customRecordsPageSize = entitlements.find(\n (e) => e.key === 'customRecordsPageSize'\n )\n\n const buffer = await getFileBuffer(event)\n\n // inject the getHeaders function into the options\n const getHeaders = async (options: any, data: string[][]) => {\n try {\n const { data: headers } = await api.files.detectHeader({\n options: {\n ...options,\n },\n data,\n })\n return headers\n } catch (e) {\n console.dir(e, { depth: null })\n return { skip: 0, header: [] }\n }\n }\n\n await tick(3, 'plugins.extraction.parseSheets')\n const capture = await parseBuffer(buffer, {\n ...options,\n fileId,\n fileExt: file.ext,\n headerSelectionEnabled,\n getHeaders,\n })\n\n await tick(5, 'plugins.extraction.createWorkbook')\n const workbook = await createWorkbook(\n event.context.environmentId,\n file,\n capture,\n sourceEditorEnabled\n )\n\n // Add workbook to file so if the extraction fails and the file is deleted, the workbook is also deleted\n // instead of being orphaned\n await api.files.update(file.id, {\n workbookId: workbook.id,\n })\n\n if (!workbook.sheets || workbook.sheets.length === 0) {\n throw new Error('plugins.extraction.noSheets')\n }\n\n await tick(10, 'plugins.extraction.addingRecords')\n\n await new Promise((resolve) => {\n setTimeout(resolve, WORKBOOK_CREATION_DELAY)\n })\n\n for (const sheet of workbook.sheets) {\n if (!capture[sheet.name]) {\n continue\n }\n await createAllRecords(\n sheet.id,\n capture[sheet.name].data.map(normalizeRecordKeys),\n async (_progress, part, totalParts) => {\n await tick(\n Math.min(99, Math.round(10 + 90 * (part / totalParts))),\n 'plugins.extraction.addingRecords'\n )\n },\n {\n pageSize: customRecordsPageSize?.metadata?.pageSize,\n }\n )\n }\n\n // After all records are added, update the sheet metadata\n if (headerSelectionEnabled) {\n await updateSheetMetadata(workbook, capture)\n }\n\n await api.files.update(file.id, {\n status: 'complete',\n })\n await api.jobs.complete(jobId, {\n info: 'files.uploadFile.popovers.extractionCompleted',\n outcome: {\n message: 'plugins.extraction.extractedFile',\n },\n })\n } catch (e) {\n if (debug) {\n console.log(`Extractor error: ${e.message}`)\n }\n await api.files.update(fileId, {\n status: 'failed',\n })\n await api.jobs.fail(jobId, {\n info: 'plugins.extraction.extractionFailed',\n outcome: {\n message: e.message,\n },\n })\n }\n }\n )\n }\n}\n\nasync function createWorkbook(\n environmentId: string,\n file: Flatfile.File_,\n workbookCapture: WorkbookCapture,\n sourceEditorEnabled: boolean\n): Promise<Flatfile.Workbook> {\n const workbookConfig = getWorkbookConfig(\n file.name,\n file.spaceId,\n environmentId,\n workbookCapture,\n sourceEditorEnabled\n )\n const { data: workbook } = await api.workbooks.create(workbookConfig)\n return workbook\n}\n\nfunction getWorkbookConfig(\n name: string,\n spaceId: string,\n environmentId: string,\n workbookCapture: WorkbookCapture,\n sourceEditorEnabled: boolean\n): Flatfile.CreateWorkbookConfig {\n const sheets = Object.entries(workbookCapture).map(([sheetName, sheet]) => {\n return getSheetConfig(sheetName, sheet, sourceEditorEnabled)\n })\n\n return {\n name: `[file] ${name}`,\n labels: ['file'], // we use this on the backend to add the EXTRACTED_FROM_SOURCE treatment to the workbook on previous versions of the plugin\n spaceId,\n environmentId,\n sheets,\n treatments: [Flatfile.WorkbookTreatments.ExtractedFromSource],\n }\n}\n\nfunction getSheetConfig(\n name: string,\n { headers, descriptions }: SheetCapture,\n sourceEditorEnabled: boolean\n): Flatfile.SheetConfig {\n // normalizeSheetConfig the keys to ensure they are unique and valid\n return normalizeSheetConfig({\n name,\n slug: slugify(name),\n fields: keysToFields({ keys: headers, descriptions }),\n allowAdditionalFields: sourceEditorEnabled,\n })\n}\n\nfunction normalizeRecordKeys(record: Flatfile.RecordData): Flatfile.RecordData {\n const normalizedRecord = {} as Flatfile.RecordData\n for (const key in record) {\n if (Object.hasOwn(record, key)) {\n normalizedRecord[normalizeKey(key)] = record[key]\n }\n }\n return normalizedRecord\n}\n\nexport function keysToFields({\n keys,\n descriptions = {},\n}: {\n keys: string[]\n descriptions?: Record<string, string>\n}): Flatfile.Property[] {\n return keys.map((key) => {\n if (!key) key = ''\n if (typeof key !== 'string') {\n key = String(key)\n }\n key = key.trim()\n\n return {\n key,\n label: key,\n description: descriptions?.[key] || '',\n type: 'string',\n }\n })\n}\n\nasync function updateSheetMetadata(\n workbook: Flatfile.Workbook,\n workbookCapture: WorkbookCapture\n): Promise<void> {\n await Promise.all(\n workbook.sheets.map(async (sheet) => {\n const { metadata } = workbookCapture[sheet.name]\n await api.sheets.updateSheet(sheet.id, {\n metadata,\n })\n })\n )\n}\n/**\n * Generic structure for capturing a workbook\n */\nexport type WorkbookCapture = Record<string, SheetCapture>\n\n/**\n * Generic structure for capturing a sheet\n */\nexport type SheetCapture = {\n headers: string[]\n descriptions?: Record<string, null | string> | null\n data: Flatfile.RecordData[]\n metadata?: { rowHeaders: number[] }\n}\n"]}