genaiscript
Version:
A CLI for GenAIScript, a generative AI scripting framework.
219 lines • 10 kB
JavaScript
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
import { FILES_NOT_FOUND_ERROR_CODE, GENAI_ANY_REGEX, HTTPS_REGEX, JSON5_REGEX, OUTPUT_FILENAME, TRACE_FILENAME, YAML_REGEX, GenerationStats, MarkdownTrace, PLimitPromiseQueue, YAMLStringify, applyModelOptions, ensureDotGenaiscriptPath, filePathOrUrlToWorkspaceFile, getConvertDir, hash, link, logError, logInfo, logVerbose, measure, normalizeInt, toSignal, tracePromptResult, tryReadText, unfence, writeText, resolveRuntimeHost, } from "@genaiscript/core";
import { buildProject } from "@genaiscript/core";
import { run } from "@genaiscript/api";
import { setupTraceWriting } from "@genaiscript/core";
import { dirname, join, resolve } from "node:path";
import { createCancellationController } from "@genaiscript/core";
/**
* Converts a set of files based on a specified script, applying transformations and generating output files.
*
* @param scriptId - Identifier of the script to use for file conversion.
* @param fileGlobs - Array of file paths or glob patterns identifying files to be transformed.
* @param options - Additional configuration for the conversion process:
* - `suffix` - Custom suffix for the output files.
* - `rewrite` - If true, overwrites existing files instead of creating new ones with a suffix.
* - `cancelWord` - A keyword that cancels processing if found in the result.
* - `concurrency` - Number of files to process concurrently.
* - `excludedFiles` - Array of file paths or glob patterns to exclude from processing.
* - `ignoreGitIgnore` - If true, ignores .gitignore rules during file resolution.
* - `runTrace` - If false, disables trace generation for individual files.
* - `outputTrace` - If false, disables output trace generation for individual files.
* - Other options passed to the transformation process.
*
* @throws Error if the script is not found or no files match the given patterns.
*
* Resolves files matching the provided patterns, filters them based on exclusion and rewrite options,
* applies AI transformations using the specified script, and writes results to output files.
*/
export async function convertFiles(scriptId, fileGlobs, options) {
const { excludedFiles, rewrite, cancelWord, concurrency, runTrace, outputTrace, ...restOptions } = options || {};
await ensureDotGenaiscriptPath();
const runtimeHost = resolveRuntimeHost();
const canceller = createCancellationController();
const cancellationToken = canceller.token;
const signal = toSignal(cancellationToken);
applyModelOptions(options, "cli");
const convertDir = getConvertDir(scriptId);
const convertTrace = new MarkdownTrace({
cancellationToken,
dir: convertDir,
});
const outTraceFilename = await setupTraceWriting(convertTrace, "trace", join(convertDir, TRACE_FILENAME), { ignoreInner: true });
const outTraceDir = dirname(outTraceFilename);
const fail = (msg, _exitCode, _url) => {
throw new Error(msg);
};
const toolFiles = [];
if (GENAI_ANY_REGEX.test(scriptId))
toolFiles.push(scriptId);
const prj = await buildProject({
toolFiles,
});
const script = prj.scripts.find((t) => t.id === scriptId ||
(t.filename && GENAI_ANY_REGEX.test(scriptId) && resolve(t.filename) === resolve(scriptId)));
if (!script) {
convertTrace.error(`script ${scriptId} not found`);
throw new Error(`script ${scriptId} not found`);
}
const { responseType, responseSchema } = script;
const ext = responseType === "yaml"
? ".yaml"
: responseType === "text"
? ".txt"
: /^json/.test(responseType) || responseSchema
? ".json"
: ".md";
const suffix = options?.suffix || `.genai.${script.id}${ext}`;
convertTrace.heading(2, `convert with ${script.id}`);
convertTrace.itemValue(`suffix`, suffix);
// resolve files
const applyGitIgnore = options.ignoreGitIgnore !== true && script.ignoreGitIgnore !== true;
const resolvedFiles = new Set();
for (let arg of fileGlobs) {
if (HTTPS_REGEX.test(arg)) {
resolvedFiles.add(arg);
continue;
}
const stats = await runtimeHost.statFile(arg);
if (stats?.type === "directory")
arg = join(arg, "**", "*");
const ffs = await runtimeHost.findFiles(arg, {
applyGitIgnore,
});
if (!ffs?.length) {
return fail(`no files matching ${arg} under ${process.cwd()} (all files might have been ignored)`, FILES_NOT_FOUND_ERROR_CODE);
}
for (const file of ffs) {
if (!rewrite && file.toLocaleLowerCase().endsWith(suffix))
continue;
resolvedFiles.add(filePathOrUrlToWorkspaceFile(file));
}
}
if (excludedFiles?.length) {
for (const arg of excludedFiles) {
const ffs = await runtimeHost.findFiles(arg);
for (const f of ffs)
resolvedFiles.delete(filePathOrUrlToWorkspaceFile(f));
}
}
// processing
const files = Array.from(resolvedFiles).map((filename) => ({ filename }));
const stats = [];
const usage = new GenerationStats("convert");
const results = {};
const p = new PLimitPromiseQueue(normalizeInt(concurrency) || 1);
await p.mapAll(files, async (file) => {
if (cancellationToken.isCancellationRequested)
return;
const outf = rewrite ? file.filename : file.filename + suffix;
logInfo(`${file.filename} -> ${outf}`);
const fni = await hash(file.filename, { length: 7 });
const fileOutTrace = runTrace === false ? undefined : join(outTraceDir, fni, TRACE_FILENAME);
const fileOutOutput = outputTrace === false ? undefined : join(outTraceDir, fni, OUTPUT_FILENAME);
const fileTrace = convertTrace.startTraceDetails(file.filename);
if (fileOutTrace) {
convertTrace.item(link("trace", fileOutTrace));
logVerbose(`trace: ${fileOutTrace}`);
}
if (fileOutOutput) {
convertTrace.item(link("output", fileOutOutput));
logVerbose(`output: ${fileOutOutput}`);
}
const m = measure("convert");
try {
// apply AI transformation
const result = await run(script.filename, file.filename, {
label: file.filename,
outTrace: fileOutTrace,
outOutput: fileOutOutput,
runTrace: false,
outputTrace: false,
signal,
...restOptions,
});
tracePromptResult(fileTrace, result);
const { error, json } = result || {};
if (error) {
logError(error);
fileTrace.error(undefined, error);
return;
}
if (result.status === "cancelled") {
logVerbose(`cancelled ${file.filename}`);
fileTrace.item(`cancelled`);
return;
}
// LLM canceled
if (cancelWord && result?.text?.includes(cancelWord)) {
logVerbose(`cancel word detected, skipping ${file.filename}`);
fileTrace.itemValue(`cancel word detected`, cancelWord);
return;
}
const end = m();
usage.addUsage({
total_tokens: result.usage?.total,
prompt_tokens: result.usage?.prompt,
completion_tokens: result.usage?.completion,
}, end);
if (result.usage)
stats.push(result.usage);
logVerbose(Object.keys(result.fileEdits || {}).join("\n"));
// structured extraction
const fileEdit = Object.entries(result.fileEdits || {}).find(([fn]) => resolve(fn) === resolve(file.filename))?.[1];
// if (!fileEdit) {
// console.log({
// filename: file.filename,
// edits: result.fileEdits,
// })
const suffixext = suffix.replace(/^.genai./i, ".");
const fence = result.fences.find((f) => f.language === suffixext);
let text = undefined;
if (fileEdit?.after) {
if (fileEdit.validation?.schemaError) {
logError("schema validation error");
logVerbose(fileEdit.validation.schemaError);
fileTrace.error(undefined, fileEdit.validation.schemaError);
return;
}
text = fileEdit.after;
}
if (text === undefined && fence) {
if (fence.validation?.schemaError) {
logError("schema validation error");
logVerbose(fence.validation.schemaError);
fileTrace.error(undefined, fence.validation.schemaError);
return;
}
text = fence.content;
}
if (text === undefined)
text = unfence(result.text, "markdown");
// normalize JSON
if (JSON5_REGEX.test(outf))
text = JSON.stringify(json, null, 2);
else if (YAML_REGEX.test(outf))
text = YAMLStringify(json);
// save file
const existing = await tryReadText(outf);
if (text && existing !== text) {
await writeText(outf, text);
}
results[file.filename] = text;
}
catch (error) {
logError(error);
fileTrace.error(undefined, error);
}
finally {
logVerbose("");
fileTrace.endDetails();
}
});
usage.log();
usage.trace(convertTrace);
convertTrace.table(stats);
logVerbose(`trace: ${outTraceFilename}`);
}
//# sourceMappingURL=convert.js.map