UNPKG

genaiscript

Version:

A CLI for GenAIScript, a generative AI scripting framework.

274 lines (265 loc) 11 kB
import { delay, uniq, uniqBy, chunk, groupBy } from "es-toolkit"; import { z } from "zod"; export { delay, uniq, uniqBy, z, chunk, groupBy }; export async function classify(text, labels, options) { const { other, explanations, ...rest } = options || {}; const entries = Object.entries({ ...labels, ...(other ? { other: "This label is used when the text does not fit any of the available labels.", } : {}), }).map(([k, v]) => [k.trim().toLowerCase(), v]); if (entries.length < 2) throw Error("classify must have at least two label (including other)"); const choices = entries.map(([k]) => k); const allChoices = uniq(choices); const ctx = options?.ctx || env.generator; const res = await ctx.runPrompt(async (_) => { _.$ `## Expert Classifier You are a specialized text classification system. Your task is to carefully read and classify any input text or image into one of the predefined labels below. For each label, you will find a short description. Use these descriptions to guide your decision. `.role("system"); _.$ `## Labels You must classify the data as one of the following labels. ${entries.map(([id, descr]) => `- Label '${id}': ${descr}`).join("\n")} ## Output ${explanations ? "Provide a single short sentence justification for your choice." : ""} Output the label as a single word on the last line (do not emit "Label"). `; _.fence(`- Label 'yes': funny - Label 'no': not funny DATA: Why did the chicken cross the road? Because moo. Output: ${explanations ? "It's a classic joke but the ending does not relate to the start of the joke." : ""} no `, { language: "example" }); if (typeof text === "function") await text(_); else _.def("DATA", text); }, { model: "classify", choices: choices, label: `classify ${choices.join(", ")}`, logprobs: true, topLogprobs: Math.min(3, choices.length), maxTokens: explanations ? 100 : 1, system: [ "system.output_plaintext", "system.safety_jailbreak", "system.safety_harmful_content", "system.safety_protected_material", ], ...rest, }); const answer = res.text.toLowerCase(); const indexes = choices.map((l) => answer.lastIndexOf(l)); const labeli = indexes.reduce((previ, label, i) => { if (indexes[i] > indexes[previ]) return i; else return previ; }, 0); const label = entries[labeli][0]; const logprobs = res.choices ? Object.fromEntries(res.choices .filter((c) => !isNaN(c?.logprob)) .map((c, i) => [allChoices[i], c])) : undefined; const logprob = logprobs?.[label]; const usage = res.usage; return { label, entropy: logprob?.entropy, logprob: logprob?.logprob, probPercent: logprob?.probPercent, answer, logprobs, usage, }; } export function makeItBetter(options) { const { repeat = 1, instructions = "Make it better!" } = options || {}; const ctx = options?.ctx || env.generator; let round = 0; ctx.defChatParticipant((cctx) => { if (round++ < repeat) { cctx.console.log(`make it better (round ${round})`); cctx.$ `${instructions}`; } }); } export async function cast(data, itemSchema, options) { const { ctx = env.generator, multiple, instructions, label = `cast text to schema`, ...rest } = options || {}; const responseSchema = multiple ? { type: "array", items: itemSchema, } : itemSchema; const res = await ctx.runPrompt(async (_) => { if (typeof data === "function") await data(_); else _.def("SOURCE", data); _.defSchema("SCHEMA", responseSchema, { format: "json" }); _.$ `You are an expert data converter specializing in transforming unstructured text source into structured data. Convert the contents of <SOURCE> to JSON using schema <SCHEMA>. - Treat images as <SOURCE> and convert them to JSON. - Make sure the returned data matches the schema in <SCHEMA>.`; if (typeof instructions === "string") _.$ `${instructions}`; else if (typeof instructions === "function") await instructions(_); }, { responseType: "json", responseSchema, ...rest, label, }); const text = parsers.unfence(res.text, "json"); return res.json ? { text, data: res.json } : { text, error: res.error?.message }; } export async function markdownifyPdf(file, options) { const { ctx = env.generator, label = `markdownify PDF`, model = "ocr", responseType = "markdown", systemSafety = true, instructions, ...rest } = options || {}; const { pages, images = [] } = await parsers.PDF(file, { ...rest, renderAsImage: true, }); const markdowns = []; for (let i = 0; i < pages.length; ++i) { const page = pages[i]; const image = images[i]; const res = await ctx.runPrompt(async (_) => { const previousPages = markdowns.slice(-2).join("\n\n"); if (previousPages.length) _.def("PREVIOUS_PAGES", previousPages); if (page) _.def("PAGE", page); if (image) _.defImages(image, { autoCrop: true, greyscale: true }); _.$ `You are an expert at converting PDFs to markdown. ## Task Your task is to analyze the image and extract textual content in markdown format. The image is a screenshot of the current page in the PDF document. We used pdfjs-dist to extract the text of the current page in <PAGE>, use it to help with the conversion. The text from the previous pages is in <PREVIOUS_PAGES>, use it to ensure consistency in the conversion. ## Instructions - Ensure markdown text formatting for the extracted text is applied properly by analyzing the image. - Do not change any content in the original extracted text while applying markdown formatting and do not repeat the extracted text. - Preserve markdown text formatting if present such as horizontal lines, header levels, footers, bullet points, links/urls, or other markdown elements. - Extract source code snippets in code fences. - Do not omit any textual content from the markdown formatted extracted text. - Do not generate page breaks - Do not repeat the <PREVIOUS_PAGES> content. - Do not include any additional explanations or comments in the markdown formatted extracted text. `; if (image) $ `- For images, generate a short alt-text description.`; if (typeof instructions === "string") _.$ `${instructions}`; else if (typeof instructions === "function") await instructions(_); }, { ...rest, model, label: `${label}: page ${i + 1}`, responseType, system: ["system", "system.assistant"], }); if (res.error) throw new Error(res.error?.message); markdowns.push(res.text); } return { pages, images, markdowns }; } export async function fileTree(glob, options) { const { frontmatter, preview, query, size, ignore, ...rest } = options || {}; const readText = !!(frontmatter || preview); const files = query ? (await workspace.grep(query, glob, { ...rest, readText })).files : await workspace.findFiles(glob, { ignore, readText, }); const tree = await buildTree(files); return renderTree(tree); async function buildTree(files) { const root = []; for (const file of files) { const { filename } = file; const parts = filename.split(/[/\\]/); let currentLevel = root; for (let index = 0; index < parts.length; index++) { const part = parts[index]; let node = currentLevel.find((n) => n.filename === part); if (!node) { const stats = await workspace.stat(filename); let metadata = []; if (frontmatter && /\.mdx?$/i.test(filename)) { const fm = parsers.frontmatter(file) || {}; if (fm) metadata.push(...frontmatter .map((field) => [field, fm[field]]) .filter(([_, v]) => v !== undefined) .map(([k, v]) => `${k}: ${JSON.stringify(v)}`)); } if (preview) metadata.push(await preview(file, stats)); node = { filename: part, metadata: metadata .filter((f) => f !== undefined) .map((s) => String(s)) .map((s) => s.replace(/\n/g, " ")) .join(", "), stats, }; currentLevel.push(node); } if (index < parts.length - 1) { if (!node.children) { node.children = []; } currentLevel = node.children; } } } return root; } function renderTree(nodes, prefix = "") { return nodes .map((node, index) => { const isLast = index === nodes.length - 1; const newPrefix = prefix + (isLast ? " " : "│ "); const children = node.children?.length ? renderTree(node.children, newPrefix) : ""; const meta = [ size ? `${Math.ceil(node.stats.size / 1000)}kb ` : undefined, node.metadata, ] .filter((s) => !!s) .join(", "); return `${prefix}${isLast ? "└ " : "├ "}${node.filename}${meta ? ` - ${meta}` : ""}\n${children}`; }) .join(""); } } export async function parseReadableContent(page) { const results = await page.evaluate(async () => { const readability = await import("https://cdn.skypack.dev/@mozilla/readability"); const doc = document.cloneNode(true); return new readability.Readability(doc).parse(); }); return results; }