genaiscript
Version:
A CLI for GenAIScript, a generative AI scripting framework.
274 lines (265 loc) • 11 kB
JavaScript
import { delay, uniq, uniqBy, chunk, groupBy } from "es-toolkit";
import { z } from "zod";
export { delay, uniq, uniqBy, z, chunk, groupBy };
export async function classify(text, labels, options) {
const { other, explanations, ...rest } = options || {};
const entries = Object.entries({
...labels,
...(other
? {
other: "This label is used when the text does not fit any of the available labels.",
}
: {}),
}).map(([k, v]) => [k.trim().toLowerCase(), v]);
if (entries.length < 2)
throw Error("classify must have at least two label (including other)");
const choices = entries.map(([k]) => k);
const allChoices = uniq(choices);
const ctx = options?.ctx || env.generator;
const res = await ctx.runPrompt(async (_) => {
_.$ `## Expert Classifier
You are a specialized text classification system.
Your task is to carefully read and classify any input text or image into one
of the predefined labels below.
For each label, you will find a short description. Use these descriptions to guide your decision.
`.role("system");
_.$ `## Labels
You must classify the data as one of the following labels.
${entries.map(([id, descr]) => `- Label '${id}': ${descr}`).join("\n")}
## Output
${explanations ? "Provide a single short sentence justification for your choice." : ""}
Output the label as a single word on the last line (do not emit "Label").
`;
_.fence(`- Label 'yes': funny
- Label 'no': not funny
DATA:
Why did the chicken cross the road? Because moo.
Output:
${explanations ? "It's a classic joke but the ending does not relate to the start of the joke." : ""}
no
`, { language: "example" });
if (typeof text === "function")
await text(_);
else
_.def("DATA", text);
}, {
model: "classify",
choices: choices,
label: `classify ${choices.join(", ")}`,
logprobs: true,
topLogprobs: Math.min(3, choices.length),
maxTokens: explanations ? 100 : 1,
system: [
"system.output_plaintext",
"system.safety_jailbreak",
"system.safety_harmful_content",
"system.safety_protected_material",
],
...rest,
});
const answer = res.text.toLowerCase();
const indexes = choices.map((l) => answer.lastIndexOf(l));
const labeli = indexes.reduce((previ, label, i) => {
if (indexes[i] > indexes[previ])
return i;
else
return previ;
}, 0);
const label = entries[labeli][0];
const logprobs = res.choices
? Object.fromEntries(res.choices
.filter((c) => !isNaN(c?.logprob))
.map((c, i) => [allChoices[i], c]))
: undefined;
const logprob = logprobs?.[label];
const usage = res.usage;
return {
label,
entropy: logprob?.entropy,
logprob: logprob?.logprob,
probPercent: logprob?.probPercent,
answer,
logprobs,
usage,
};
}
export function makeItBetter(options) {
const { repeat = 1, instructions = "Make it better!" } = options || {};
const ctx = options?.ctx || env.generator;
let round = 0;
ctx.defChatParticipant((cctx) => {
if (round++ < repeat) {
cctx.console.log(`make it better (round ${round})`);
cctx.$ `${instructions}`;
}
});
}
export async function cast(data, itemSchema, options) {
const { ctx = env.generator, multiple, instructions, label = `cast text to schema`, ...rest } = options || {};
const responseSchema = multiple
? {
type: "array",
items: itemSchema,
}
: itemSchema;
const res = await ctx.runPrompt(async (_) => {
if (typeof data === "function")
await data(_);
else
_.def("SOURCE", data);
_.defSchema("SCHEMA", responseSchema, { format: "json" });
_.$ `You are an expert data converter specializing in transforming unstructured text source into structured data.
Convert the contents of <SOURCE> to JSON using schema <SCHEMA>.
- Treat images as <SOURCE> and convert them to JSON.
- Make sure the returned data matches the schema in <SCHEMA>.`;
if (typeof instructions === "string")
_.$ `${instructions}`;
else if (typeof instructions === "function")
await instructions(_);
}, {
responseType: "json",
responseSchema,
...rest,
label,
});
const text = parsers.unfence(res.text, "json");
return res.json
? { text, data: res.json }
: { text, error: res.error?.message };
}
export async function markdownifyPdf(file, options) {
const { ctx = env.generator, label = `markdownify PDF`, model = "ocr", responseType = "markdown", systemSafety = true, instructions, ...rest } = options || {};
const { pages, images = [] } = await parsers.PDF(file, {
...rest,
renderAsImage: true,
});
const markdowns = [];
for (let i = 0; i < pages.length; ++i) {
const page = pages[i];
const image = images[i];
const res = await ctx.runPrompt(async (_) => {
const previousPages = markdowns.slice(-2).join("\n\n");
if (previousPages.length)
_.def("PREVIOUS_PAGES", previousPages);
if (page)
_.def("PAGE", page);
if (image)
_.defImages(image, { autoCrop: true, greyscale: true });
_.$ `You are an expert at converting PDFs to markdown.
## Task
Your task is to analyze the image and extract textual content in markdown format.
The image is a screenshot of the current page in the PDF document.
We used pdfjs-dist to extract the text of the current page in <PAGE>, use it to help with the conversion.
The text from the previous pages is in <PREVIOUS_PAGES>, use it to ensure consistency in the conversion.
## Instructions
- Ensure markdown text formatting for the extracted text is applied properly by analyzing the image.
- Do not change any content in the original extracted text while applying markdown formatting and do not repeat the extracted text.
- Preserve markdown text formatting if present such as horizontal lines, header levels, footers, bullet points, links/urls, or other markdown elements.
- Extract source code snippets in code fences.
- Do not omit any textual content from the markdown formatted extracted text.
- Do not generate page breaks
- Do not repeat the <PREVIOUS_PAGES> content.
- Do not include any additional explanations or comments in the markdown formatted extracted text.
`;
if (image)
$ `- For images, generate a short alt-text description.`;
if (typeof instructions === "string")
_.$ `${instructions}`;
else if (typeof instructions === "function")
await instructions(_);
}, {
...rest,
model,
label: `${label}: page ${i + 1}`,
responseType,
system: ["system", "system.assistant"],
});
if (res.error)
throw new Error(res.error?.message);
markdowns.push(res.text);
}
return { pages, images, markdowns };
}
export async function fileTree(glob, options) {
const { frontmatter, preview, query, size, ignore, ...rest } = options || {};
const readText = !!(frontmatter || preview);
const files = query
? (await workspace.grep(query, glob, { ...rest, readText })).files
: await workspace.findFiles(glob, {
ignore,
readText,
});
const tree = await buildTree(files);
return renderTree(tree);
async function buildTree(files) {
const root = [];
for (const file of files) {
const { filename } = file;
const parts = filename.split(/[/\\]/);
let currentLevel = root;
for (let index = 0; index < parts.length; index++) {
const part = parts[index];
let node = currentLevel.find((n) => n.filename === part);
if (!node) {
const stats = await workspace.stat(filename);
let metadata = [];
if (frontmatter && /\.mdx?$/i.test(filename)) {
const fm = parsers.frontmatter(file) || {};
if (fm)
metadata.push(...frontmatter
.map((field) => [field, fm[field]])
.filter(([_, v]) => v !== undefined)
.map(([k, v]) => `${k}: ${JSON.stringify(v)}`));
}
if (preview)
metadata.push(await preview(file, stats));
node = {
filename: part,
metadata: metadata
.filter((f) => f !== undefined)
.map((s) => String(s))
.map((s) => s.replace(/\n/g, " "))
.join(", "),
stats,
};
currentLevel.push(node);
}
if (index < parts.length - 1) {
if (!node.children) {
node.children = [];
}
currentLevel = node.children;
}
}
}
return root;
}
function renderTree(nodes, prefix = "") {
return nodes
.map((node, index) => {
const isLast = index === nodes.length - 1;
const newPrefix = prefix + (isLast ? " " : "│ ");
const children = node.children?.length
? renderTree(node.children, newPrefix)
: "";
const meta = [
size
? `${Math.ceil(node.stats.size / 1000)}kb `
: undefined,
node.metadata,
]
.filter((s) => !!s)
.join(", ");
return `${prefix}${isLast ? "└ " : "├ "}${node.filename}${meta ? ` - ${meta}` : ""}\n${children}`;
})
.join("");
}
}
export async function parseReadableContent(page) {
const results = await page.evaluate(async () => {
const readability = await import("https://cdn.skypack.dev/@mozilla/readability");
const doc = document.cloneNode(true);
return new readability.Readability(doc).parse();
});
return results;
}