@budibase/server
Version:
Budibase Web Server
100 lines (89 loc) • 2.78 kB
text/typescript
import { ai, LLMPromptResponse } from "@budibase/pro"
import * as automationUtils from "../../automationUtils"
import {
DocumentSourceType,
ExtractFileDataStepInputs,
ExtractFileDataStepOutputs,
} from "@budibase/types"
import { objectStore } from "@budibase/backend-core"
import { Readable } from "stream"
import fetch from "node-fetch"
async function processUrlFile(
fileUrl: string,
fileType: string | undefined,
llm: ai.LLM
): Promise<string> {
const response = await fetch(fileUrl)
if (!response.ok) {
throw new Error(`Failed to fetch file from URL: ${response.statusText}`)
}
const stream = response.body as Readable
const contentType = response.headers.get("content-type") || fileType
const filename = `document.${fileType}`
return await llm.uploadFile(stream, filename, contentType)
}
async function processAttachmentFile(
attachment: any,
llm: ai.LLM
): Promise<string> {
const bucket = objectStore.ObjectStoreBuckets.APPS
const stream = await objectStore.getReadStream(bucket, attachment.key!)
const filename = attachment.name || "document"
return await llm.uploadFile(stream, filename, attachment.extension)
}
async function parseAIResponse(
llmResponse: LLMPromptResponse
): Promise<Record<string, any>> {
try {
const data = JSON.parse(llmResponse.message)
return data.data
} catch (err: any) {
console.error("Error parsing JSON response:", err)
throw new Error("Could not parse AI response as valid JSON.")
}
}
export async function run({
inputs,
}: {
inputs: ExtractFileDataStepInputs
}): Promise<ExtractFileDataStepOutputs> {
if (!inputs.file || !inputs.schema) {
return {
success: false,
data: {},
response:
"Extract Document Data AI Step Failed: File and Schema are required.",
}
}
try {
const llm = await ai.getLLMOrThrow()
let fileIdOrDataUrl: string
if (
inputs.source === DocumentSourceType.URL &&
typeof inputs.file === "string"
) {
fileIdOrDataUrl = await processUrlFile(inputs.file, inputs.fileType, llm)
} else if (
inputs.source === DocumentSourceType.ATTACHMENT &&
typeof inputs.file !== "string"
) {
fileIdOrDataUrl = await processAttachmentFile(inputs.file, llm)
} else {
throw new Error("Invalid file input – source and file type do not match")
}
const request = ai.extractFileData(inputs.schema, fileIdOrDataUrl)
const llmResponse = await llm.prompt(request)
const data = await parseAIResponse(llmResponse)
return {
data,
success: true,
}
} catch (err: any) {
console.error("Document extraction error:", err)
return {
success: false,
data: {},
response: automationUtils.getError(err),
}
}
}