UNPKG

@promptbook/node

Version:

Promptbook: Run AI apps in plain human language across multiple models and platforms

webgptorg/promptbook

1,169 lines (1,090 loc) • 460 kB

JavaScript

import colors from 'colors'; import { stat, access, constants, readFile, writeFile, readdir, mkdir, unlink } from 'fs/promises'; import { basename, join, dirname, isAbsolute, relative } from 'path'; import spaceTrim, { spaceTrim as spaceTrim$1 } from 'spacetrim'; import JSZip from 'jszip'; import { randomBytes } from 'crypto'; import { Subject } from 'rxjs'; import { forTime } from 'waitasecond'; import { parse, unparse } from 'papaparse'; import hexEncoder from 'crypto-js/enc-hex'; import sha256 from 'crypto-js/sha256'; import { SHA256 } from 'crypto-js'; import { lookup, extension } from 'mime-types'; import { spawn } from 'child_process'; import * as dotenv from 'dotenv'; // ⚠️ WARNING: This code has been generated so that any manual changes will be overwritten /** * The version of the Book language * * @generated * @see https://github.com/webgptorg/book */ const BOOK_LANGUAGE_VERSION = '1.0.0'; /** * The version of the Promptbook engine * * @generated * @see https://github.com/webgptorg/promptbook */ const PROMPTBOOK_ENGINE_VERSION = '0.101.0-4'; /** * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine * Note: [💞] Ignore a discrepancy between file name and entity name */ /** * Returns the same value that is passed as argument. * No side effects. * * Note: It can be useful for: * * 1) Leveling indentation * 2) Putting always-true or always-false conditions without getting eslint errors * * @param value any values * @returns the same values * @private within the repository */ function just(value) { if (value === undefined) { return undefined; } return value; } /** * Name for the Promptbook * * TODO: [🗽] Unite branding and make single place for it * * @public exported from `@promptbook/core` */ const NAME = `Promptbook`; /** * Email of the responsible person * * @public exported from `@promptbook/core` */ const ADMIN_EMAIL = 'pavol@ptbk.io'; /** * Name of the responsible person for the Promptbook on GitHub * * @public exported from `@promptbook/core` */ const ADMIN_GITHUB_NAME = 'hejny'; // <- TODO: [🐊] Pick the best claim /** * When the title is not provided, the default title is used * * @public exported from `@promptbook/core` */ const DEFAULT_BOOK_TITLE = `✨ Untitled Book`; /** * When the title of task is not provided, the default title is used * * @public exported from `@promptbook/core` */ const DEFAULT_TASK_TITLE = `Task`; /** * When the pipeline is flat and no name of return parameter is provided, this name is used * * @public exported from `@promptbook/core` */ const DEFAULT_BOOK_OUTPUT_PARAMETER_NAME = 'result'; /** * Maximum file size limit * * @public exported from `@promptbook/core` */ const DEFAULT_MAX_FILE_SIZE = 100 * 1024 * 1024; // 100MB /** * Threshold value that determines when a dataset is considered "big" * and may require special handling or optimizations * * For example, when error occurs in one item of the big dataset, it will not fail the whole pipeline * * @public exported from `@promptbook/core` */ const BIG_DATASET_TRESHOLD = 50; /** * Placeholder text used to represent a placeholder value of failed operation * * @public exported from `@promptbook/core` */ const FAILED_VALUE_PLACEHOLDER = '!?'; // <- TODO: [🧠] Better system for generator warnings - not always "code" and "by `@promptbook/cli`" /** * The maximum number of iterations for a loops * * @private within the repository - too low-level in comparison with other `MAX_...` */ const LOOP_LIMIT = 1000; /** * Strings to represent various values in the context of parameter values * * @public exported from `@promptbook/utils` */ const VALUE_STRINGS = { empty: '(nothing; empty string)', null: '(no value; null)', undefined: '(unknown value; undefined)', nan: '(not a number; NaN)', infinity: '(infinity; ∞)', negativeInfinity: '(negative infinity; -∞)', unserializable: '(unserializable value)', circular: '(circular JSON)', }; /** * Small number limit * * @public exported from `@promptbook/utils` */ const SMALL_NUMBER = 0.001; /** * Short time interval to prevent race conditions in milliseconds * * @private within the repository - too low-level in comparison with other `MAX_...` */ const IMMEDIATE_TIME = 10; /** * The maximum length of the (generated) filename * * @public exported from `@promptbook/core` */ const MAX_FILENAME_LENGTH = 30; /** * Strategy for caching the intermediate results for knowledge sources * * @public exported from `@promptbook/core` */ const DEFAULT_INTERMEDIATE_FILES_STRATEGY = 'HIDE_AND_KEEP'; // <- TODO: [😡] Change to 'VISIBLE' /** * The maximum number of (LLM) tasks running in parallel * * @public exported from `@promptbook/core` */ const DEFAULT_MAX_PARALLEL_COUNT = 5; // <- TODO: [🤹‍♂️] /** * The maximum number of attempts to execute LLM task before giving up * * @public exported from `@promptbook/core` */ const DEFAULT_MAX_EXECUTION_ATTEMPTS = 7; // <- TODO: [🤹‍♂️] // <- TODO: [🕝] Make also `BOOKS_DIRNAME_ALTERNATIVES` // TODO: Just `.promptbook` in config, hardcode subfolders like `download-cache` or `execution-cache` /** * Where to store the temporary downloads * * Note: When the folder does not exist, it is created recursively * * @public exported from `@promptbook/core` */ const DEFAULT_DOWNLOAD_CACHE_DIRNAME = './.promptbook/download-cache'; /** * Where to store the scrape cache * * Note: When the folder does not exist, it is created recursively * * @public exported from `@promptbook/core` */ const DEFAULT_SCRAPE_CACHE_DIRNAME = './.promptbook/scrape-cache'; /* TODO: [🌃] /** * Id of application for the wizard when using remote server * * @public exported from `@promptbook/core` * / ex-port const WIZARD_APP_ID: string_app_id = 'wizard'; */ /** * The name of the builded pipeline collection made by CLI `ptbk make` and for lookup in `createCollectionFromDirectory` * * @public exported from `@promptbook/core` */ const DEFAULT_PIPELINE_COLLECTION_BASE_FILENAME = `index`; // <- TODO: [🧜‍♂️] /** * Default settings for parsing and generating CSV files in Promptbook. * * @public exported from `@promptbook/core` */ const DEFAULT_CSV_SETTINGS = Object.freeze({ delimiter: ',', quoteChar: '"', newline: '\n', skipEmptyLines: true, }); /** * Controls whether verbose logging is enabled by default throughout the application. * * @public exported from `@promptbook/core` */ let DEFAULT_IS_VERBOSE = false; /** * Controls whether auto-installation of dependencies is enabled by default. * * @public exported from `@promptbook/core` */ const DEFAULT_IS_AUTO_INSTALLED = false; /** * Default simulated duration for a task in milliseconds (used for progress reporting) * * @public exported from `@promptbook/core` */ const DEFAULT_TASK_SIMULATED_DURATION_MS = 5 * 60 * 1000; // 5 minutes /** * API request timeout in milliseconds * Can be overridden via API_REQUEST_TIMEOUT environment variable * * @public exported from `@promptbook/core` */ parseInt(process.env.API_REQUEST_TIMEOUT || '90000'); /** * Indicates whether pipeline logic validation is enabled. When true, the pipeline logic is checked for consistency. * * @private within the repository */ const IS_PIPELINE_LOGIC_VALIDATED = just( /**/ // Note: In normal situations, we check the pipeline logic: true); /** * Note: [💞] Ignore a discrepancy between file name and entity name * TODO: [🧠][🧜‍♂️] Maybe join remoteServerUrl and path into single value */ /** * Make error report URL for the given error * * @private private within the repository */ function getErrorReportUrl(error) { const report = { title: `🐜 Error report from ${NAME}`, body: spaceTrim((block) => ` \`${error.name || 'Error'}\` has occurred in the [${NAME}], please look into it @${ADMIN_GITHUB_NAME}. \`\`\` ${block(error.message || '(no error message)')} \`\`\` ## More info: - **Promptbook engine version:** ${PROMPTBOOK_ENGINE_VERSION} - **Book language version:** ${BOOK_LANGUAGE_VERSION} - **Time:** ${new Date().toISOString()} <details> <summary>Stack trace:</summary> ## Stack trace: \`\`\`stacktrace ${block(error.stack || '(empty)')} \`\`\` </details> `), }; const reportUrl = new URL(`https://github.com/webgptorg/promptbook/issues/new`); reportUrl.searchParams.set('labels', 'bug'); reportUrl.searchParams.set('assignees', ADMIN_GITHUB_NAME); reportUrl.searchParams.set('title', report.title); reportUrl.searchParams.set('body', report.body); return reportUrl; } /** * This error type indicates that the error should not happen and its last check before crashing with some other error * * @public exported from `@promptbook/core` */ class UnexpectedError extends Error { constructor(message) { super(spaceTrim$1((block) => ` ${block(message)} Note: This error should not happen. It's probably a bug in the pipeline collection Please report issue: ${block(getErrorReportUrl(new Error(message)).href)} Or contact us on ${ADMIN_EMAIL} `)); this.name = 'UnexpectedError'; Object.setPrototypeOf(this, UnexpectedError.prototype); } } /** * Converts a JavaScript Object Notation (JSON) string into an object. * * Note: This is wrapper around `JSON.parse()` with better error and type handling * * @public exported from `@promptbook/utils` */ function jsonParse(value) { if (value === undefined) { throw new Error(`Can not parse JSON from undefined value.`); } else if (typeof value !== 'string') { console.error('Can not parse JSON from non-string value.', { text: value }); throw new Error(spaceTrim(` Can not parse JSON from non-string value. The value type: ${typeof value} See more in console. `)); } try { return JSON.parse(value); } catch (error) { if (!(error instanceof Error)) { throw error; } throw new Error(spaceTrim((block) => ` ${block(error.message)} The expected JSON text: ${block(value)} `)); } } /** * Orders JSON object by keys * * @returns The same type of object as the input re-ordered * @public exported from `@promptbook/utils` */ function orderJson(options) { const { value, order } = options; const orderedValue = { ...(order === undefined ? {} : Object.fromEntries(order.map((key) => [key, undefined]))), ...value, }; return orderedValue; } /** * Freezes the given object and all its nested objects recursively * * Note: `$` is used to indicate that this function is not a pure function - it mutates given object * Note: This function mutates the object and returns the original (but mutated-deep-freezed) object * * @returns The same object as the input, but deeply frozen * @public exported from `@promptbook/utils` */ function $deepFreeze(objectValue) { if (Array.isArray(objectValue)) { return Object.freeze(objectValue.map((item) => $deepFreeze(item))); } const propertyNames = Object.getOwnPropertyNames(objectValue); for (const propertyName of propertyNames) { const value = objectValue[propertyName]; if (value && typeof value === 'object') { $deepFreeze(value); } } Object.freeze(objectValue); return objectValue; } /** * TODO: [🧠] Is there a way how to meaningfully test this utility */ /** * This error type indicates that somewhere in the code non-Error object was thrown and it was wrapped into the `WrappedError` * * @public exported from `@promptbook/core` */ class WrappedError extends Error { constructor(whatWasThrown) { const tag = `[🤮]`; console.error(tag, whatWasThrown); super(spaceTrim$1(` Non-Error object was thrown Note: Look for ${tag} in the console for more details Please report issue on ${ADMIN_EMAIL} `)); this.name = 'WrappedError'; Object.setPrototypeOf(this, WrappedError.prototype); } } /** * Helper used in catch blocks to assert that the error is an instance of `Error` * * @param whatWasThrown Any object that was thrown * @returns Nothing if the error is an instance of `Error` * @throws `WrappedError` or `UnexpectedError` if the error is not standard * * @private within the repository */ function assertsError(whatWasThrown) { // Case 1: Handle error which was rethrown as `WrappedError` if (whatWasThrown instanceof WrappedError) { const wrappedError = whatWasThrown; throw wrappedError; } // Case 2: Handle unexpected errors if (whatWasThrown instanceof UnexpectedError) { const unexpectedError = whatWasThrown; throw unexpectedError; } // Case 3: Handle standard errors - keep them up to consumer if (whatWasThrown instanceof Error) { return; } // Case 4: Handle non-standard errors - wrap them into `WrappedError` and throw throw new WrappedError(whatWasThrown); } /** * Checks if the value is [🚉] serializable as JSON * If not, throws an UnexpectedError with a rich error message and tracking * * - Almost all primitives are serializable BUT: * - `undefined` is not serializable * - `NaN` is not serializable * - Objects and arrays are serializable if all their properties are serializable * - Functions are not serializable * - Circular references are not serializable * - `Date` objects are not serializable * - `Map` and `Set` objects are not serializable * - `RegExp` objects are not serializable * - `Error` objects are not serializable * - `Symbol` objects are not serializable * - And much more... * * @throws UnexpectedError if the value is not serializable as JSON * @public exported from `@promptbook/utils` */ function checkSerializableAsJson(options) { const { value, name, message } = options; if (value === undefined) { throw new UnexpectedError(`${name} is undefined`); } else if (value === null) { return; } else if (typeof value === 'boolean') { return; } else if (typeof value === 'number' && !isNaN(value)) { return; } else if (typeof value === 'string') { return; } else if (typeof value === 'symbol') { throw new UnexpectedError(`${name} is symbol`); } else if (typeof value === 'function') { throw new UnexpectedError(`${name} is function`); } else if (typeof value === 'object' && Array.isArray(value)) { for (let i = 0; i < value.length; i++) { checkSerializableAsJson({ name: `${name}[${i}]`, value: value[i], message }); } } else if (typeof value === 'object') { if (value instanceof Date) { throw new UnexpectedError(spaceTrim((block) => ` \`${name}\` is Date Use \`string_date_iso8601\` instead Additional message for \`${name}\`: ${block(message || '(nothing)')} `)); } else if (value instanceof Map) { throw new UnexpectedError(`${name} is Map`); } else if (value instanceof Set) { throw new UnexpectedError(`${name} is Set`); } else if (value instanceof RegExp) { throw new UnexpectedError(`${name} is RegExp`); } else if (value instanceof Error) { throw new UnexpectedError(spaceTrim((block) => ` \`${name}\` is unserialized Error Use function \`serializeError\` Additional message for \`${name}\`: ${block(message || '(nothing)')} `)); } else { for (const [subName, subValue] of Object.entries(value)) { if (subValue === undefined) { // Note: undefined in object is serializable - it is just omitted continue; } checkSerializableAsJson({ name: `${name}.${subName}`, value: subValue, message }); } try { JSON.stringify(value); // <- TODO: [0] } catch (error) { assertsError(error); throw new UnexpectedError(spaceTrim((block) => ` \`${name}\` is not serializable ${block(error.stack || error.message)} Additional message for \`${name}\`: ${block(message || '(nothing)')} `)); } /* TODO: [0] Is there some more elegant way to check circular references? const seen = new Set(); const stack = [{ value }]; while (stack.length > 0) { const { value } = stack.pop()!; if (typeof value === 'object' && value !== null) { if (seen.has(value)) { throw new UnexpectedError(`${name} has circular reference`); } seen.add(value); if (Array.isArray(value)) { stack.push(...value.map((value) => ({ value }))); } else { stack.push(...Object.values(value).map((value) => ({ value }))); } } } */ return; } } else { throw new UnexpectedError(spaceTrim((block) => ` \`${name}\` is unknown type Additional message for \`${name}\`: ${block(message || '(nothing)')} `)); } } /** * TODO: Can be return type more type-safe? like `asserts options.value is JsonValue` * TODO: [🧠][main] !!3 In-memory cache of same values to prevent multiple checks * Note: [🐠] This is how `checkSerializableAsJson` + `isSerializableAsJson` together can just retun true/false or rich error message */ /** * Creates a deep clone of the given object * * Note: [🔂] This function is idempotent. * Note: This method only works for objects that are fully serializable to JSON and do not contain functions, Dates, or special types. * * @param objectValue The object to clone. * @returns A deep, writable clone of the input object. * @public exported from `@promptbook/utils` */ function deepClone(objectValue) { return JSON.parse(JSON.stringify(objectValue)); /* TODO: [🧠] Is there a better implementation? > const propertyNames = Object.getOwnPropertyNames(objectValue); > for (const propertyName of propertyNames) { > const value = (objectValue as really_any)[propertyName]; > if (value && typeof value === 'object') { > deepClone(value); > } > } > return Object.assign({}, objectValue); */ } /** * TODO: [🧠] Is there a way how to meaningfully test this utility */ /** * Utility to export a JSON object from a function * * 1) Checks if the value is serializable as JSON * 2) Makes a deep clone of the object * 2) Orders the object properties * 2) Deeply freezes the cloned object * * Note: This function does not mutates the given object * * @returns The same type of object as the input but read-only and re-ordered * @public exported from `@promptbook/utils` */ function exportJson(options) { const { name, value, order, message } = options; checkSerializableAsJson({ name, value, message }); const orderedValue = // TODO: Fix error "Type instantiation is excessively deep and possibly infinite." // eslint-disable-next-line @typescript-eslint/ban-ts-comment // @ts-ignore order === undefined ? deepClone(value) : orderJson({ value: value, // <- Note: checkSerializableAsJson asserts that the value is serializable as JSON order: order, }); $deepFreeze(orderedValue); return orderedValue; } /** * TODO: [🧠] Is there a way how to meaningfully test this utility */ /** * Order of keys in the pipeline JSON * * @public exported from `@promptbook/core` */ const ORDER_OF_PIPELINE_JSON = [ // Note: [🍙] In this order will be pipeline serialized 'title', 'pipelineUrl', 'bookVersion', 'description', 'formfactorName', 'parameters', 'tasks', 'personas', 'preparations', 'knowledgeSources', 'knowledgePieces', 'sources', // <- TODO: [🧠] Where should the `sources` be ]; /** * Nonce which is used for replacing things in strings * * @private within the repository */ const REPLACING_NONCE = 'ptbkauk42kV2dzao34faw7FudQUHYPtW'; /** * Placeholder value indicating a parameter is missing its value. * * @private within the repository */ const RESERVED_PARAMETER_MISSING_VALUE = 'MISSING-' + REPLACING_NONCE; /** * Placeholder value indicating a parameter is restricted and cannot be used directly. * * @private within the repository */ const RESERVED_PARAMETER_RESTRICTED = 'RESTRICTED-' + REPLACING_NONCE; /** * The names of the parameters that are reserved for special purposes * * @public exported from `@promptbook/core` */ const RESERVED_PARAMETER_NAMES = exportJson({ name: 'RESERVED_PARAMETER_NAMES', message: `The names of the parameters that are reserved for special purposes`, value: [ 'content', 'context', 'knowledge', 'examples', 'modelName', 'currentDate', // <- TODO: list here all command names // <- TODO: Add more like 'date', 'modelName',... // <- TODO: Add [emoji] + instructions ACRY when adding new reserved parameter ], }); /** * Note: [💞] Ignore a discrepancy between file name and entity name */ /** * This error indicates that the promptbook in a markdown format cannot be parsed into a valid promptbook object * * @public exported from `@promptbook/core` */ class ParseError extends Error { constructor(message) { super(message); this.name = 'ParseError'; Object.setPrototypeOf(this, ParseError.prototype); } } /** * TODO: Maybe split `ParseError` and `ApplyError` */ /** * This error indicates that the promptbook object has valid syntax (=can be parsed) but contains logical errors (like circular dependencies) * * @public exported from `@promptbook/core` */ class PipelineLogicError extends Error { constructor(message) { super(message); this.name = 'PipelineLogicError'; Object.setPrototypeOf(this, PipelineLogicError.prototype); } } /** * Tests if given string is valid semantic version * * Note: There are two similar functions: * - `isValidSemanticVersion` which tests any semantic version * - `isValidPromptbookVersion` *(this one)* which tests just Promptbook versions * * @public exported from `@promptbook/utils` */ function isValidSemanticVersion(version) { if (typeof version !== 'string') { return false; } if (version.startsWith('0.0.0')) { return false; } return /^\d+\.\d+\.\d+(-\d+)?$/i.test(version); } /** * Tests if given string is valid promptbook version * It looks into list of known promptbook versions. * * @see https://www.npmjs.com/package/promptbook?activeTab=versions * Note: When you are using for example promptbook 2.0.0 and there already is promptbook 3.0.0 it don`t know about it. * Note: There are two similar functions: * - `isValidSemanticVersion` which tests any semantic version * - `isValidPromptbookVersion` *(this one)* which tests just Promptbook versions * * @public exported from `@promptbook/utils` */ function isValidPromptbookVersion(version) { if (!isValidSemanticVersion(version)) { return false; } if ( /* version === '1.0.0' || */version === '2.0.0' || version === '3.0.0') { return false; } // <- TODO: [main] !!3 Check isValidPromptbookVersion against PROMPTBOOK_ENGINE_VERSIONS return true; } /** * Tests if given string is valid URL. * * Note: [🔂] This function is idempotent. * Note: Dataurl are considered perfectly valid. * Note: There are two similar functions: * - `isValidUrl` which tests any URL * - `isValidPipelineUrl` *(this one)* which tests just promptbook URL * * @public exported from `@promptbook/utils` */ function isValidUrl(url) { if (typeof url !== 'string') { return false; } try { if (url.startsWith('blob:')) { url = url.replace(/^blob:/, ''); } const urlObject = new URL(url /* because fail is handled */); if (!['http:', 'https:', 'data:'].includes(urlObject.protocol)) { return false; } return true; } catch (error) { return false; } } /** * Tests if given string is valid pipeline URL URL. * * Note: There are two similar functions: * - `isValidUrl` which tests any URL * - `isValidPipelineUrl` *(this one)* which tests just pipeline URL * * @public exported from `@promptbook/utils` */ function isValidPipelineUrl(url) { if (!isValidUrl(url)) { return false; } if (!url.startsWith('https://') && !url.startsWith('http://') /* <- Note: [👣] */) { return false; } if (url.includes('#')) { // TODO: [🐠] return false; } /* Note: [👣][🧠] Is it secure to allow pipeline URLs on private and unsecured networks? if (isUrlOnPrivateNetwork(url)) { return false; } */ return true; } /** * TODO: [🐠] Maybe more info why the URL is invalid */ /** * Validates PipelineJson if it is logically valid * * It checks: * - if it has correct parameters dependency * * It does NOT check: * - if it is valid json * - if it is meaningful * * @param pipeline valid or invalid PipelineJson * @returns the same pipeline if it is logically valid * @throws {PipelineLogicError} on logical error in the pipeline * @public exported from `@promptbook/core` */ function validatePipeline(pipeline) { if (IS_PIPELINE_LOGIC_VALIDATED) { validatePipeline_InnerFunction(pipeline); } else { try { validatePipeline_InnerFunction(pipeline); } catch (error) { if (!(error instanceof PipelineLogicError)) { throw error; } console.error(spaceTrim$1((block) => ` Pipeline is not valid but logic errors are temporarily disabled via \`IS_PIPELINE_LOGIC_VALIDATED\` ${block(error.message)} `)); } } return pipeline; } /** * @private internal function for `validatePipeline` */ function validatePipeline_InnerFunction(pipeline) { // TODO: [🧠] Maybe test if promptbook is a promise and make specific error case for that const pipelineIdentification = (() => { // Note: This is a 😐 implementation of [🚞] const _ = []; if (pipeline.sourceFile !== undefined) { _.push(`File: ${pipeline.sourceFile}`); } if (pipeline.pipelineUrl !== undefined) { _.push(`Url: ${pipeline.pipelineUrl}`); } return _.join('\n'); })(); if (pipeline.pipelineUrl !== undefined && !isValidPipelineUrl(pipeline.pipelineUrl)) { // <- Note: [🚲] throw new PipelineLogicError(spaceTrim$1((block) => ` Invalid promptbook URL "${pipeline.pipelineUrl}" ${block(pipelineIdentification)} `)); } if (pipeline.bookVersion !== undefined && !isValidPromptbookVersion(pipeline.bookVersion)) { // <- Note: [🚲] throw new PipelineLogicError(spaceTrim$1((block) => ` Invalid Promptbook Version "${pipeline.bookVersion}" ${block(pipelineIdentification)} `)); } // TODO: [🧠] Maybe do here some proper JSON-schema / ZOD checking if (!Array.isArray(pipeline.parameters)) { // TODO: [🧠] what is the correct error tp throw - maybe PromptbookSchemaError throw new ParseError(spaceTrim$1((block) => ` Pipeline is valid JSON but with wrong structure \`PipelineJson.parameters\` expected to be an array, but got ${typeof pipeline.parameters} ${block(pipelineIdentification)} `)); } // TODO: [🧠] Maybe do here some proper JSON-schema / ZOD checking if (!Array.isArray(pipeline.tasks)) { // TODO: [🧠] what is the correct error tp throw - maybe PromptbookSchemaError throw new ParseError(spaceTrim$1((block) => ` Pipeline is valid JSON but with wrong structure \`PipelineJson.tasks\` expected to be an array, but got ${typeof pipeline.tasks} ${block(pipelineIdentification)} `)); } /* TODO: [🧠][🅾] Should be empty pipeline valid or not // Note: Check that pipeline has some tasks if (pipeline.tasks.length === 0) { throw new PipelineLogicError( spaceTrim( (block) => ` Pipeline must have at least one task ${block(pipelineIdentification)} `, ), ); } */ // Note: Check each parameter individually for (const parameter of pipeline.parameters) { if (parameter.isInput && parameter.isOutput) { throw new PipelineLogicError(spaceTrim$1((block) => ` Parameter \`{${parameter.name}}\` can not be both input and output ${block(pipelineIdentification)} `)); } // Note: Testing that parameter is either intermediate or output BUT not created and unused if (!parameter.isInput && !parameter.isOutput && !pipeline.tasks.some((task) => task.dependentParameterNames.includes(parameter.name))) { throw new PipelineLogicError(spaceTrim$1((block) => ` Parameter \`{${parameter.name}}\` is created but not used You can declare {${parameter.name}} as output parameter by adding in the header: - OUTPUT PARAMETER \`{${parameter.name}}\` ${parameter.description || ''} ${block(pipelineIdentification)} `)); } // Note: Testing that parameter is either input or result of some task if (!parameter.isInput && !pipeline.tasks.some((task) => task.resultingParameterName === parameter.name)) { throw new PipelineLogicError(spaceTrim$1((block) => ` Parameter \`{${parameter.name}}\` is declared but not defined You can do one of these: 1) Remove declaration of \`{${parameter.name}}\` 2) Add task that results in \`-> {${parameter.name}}\` ${block(pipelineIdentification)} `)); } } // Note: All input parameters are defined - so that they can be used as result of some task const definedParameters = new Set(pipeline.parameters.filter(({ isInput }) => isInput).map(({ name }) => name)); // Note: Checking each task individually for (const task of pipeline.tasks) { if (definedParameters.has(task.resultingParameterName)) { throw new PipelineLogicError(spaceTrim$1((block) => ` Parameter \`{${task.resultingParameterName}}\` is defined multiple times ${block(pipelineIdentification)} `)); } if (RESERVED_PARAMETER_NAMES.includes(task.resultingParameterName)) { throw new PipelineLogicError(spaceTrim$1((block) => ` Parameter name {${task.resultingParameterName}} is reserved, please use different name ${block(pipelineIdentification)} `)); } definedParameters.add(task.resultingParameterName); if (task.jokerParameterNames && task.jokerParameterNames.length > 0) { if (!task.format && !task.expectations /* <- TODO: Require at least 1 -> min <- expectation to use jokers */) { throw new PipelineLogicError(spaceTrim$1((block) => ` Joker parameters are used for {${task.resultingParameterName}} but no expectations are defined ${block(pipelineIdentification)} `)); } for (const joker of task.jokerParameterNames) { if (!task.dependentParameterNames.includes(joker)) { throw new PipelineLogicError(spaceTrim$1((block) => ` Parameter \`{${joker}}\` is used for {${task.resultingParameterName}} as joker but not in \`dependentParameterNames\` ${block(pipelineIdentification)} `)); } } } if (task.expectations) { for (const [unit, { min, max }] of Object.entries(task.expectations)) { if (min !== undefined && max !== undefined && min > max) { throw new PipelineLogicError(spaceTrim$1((block) => ` Min expectation (=${min}) of ${unit} is higher than max expectation (=${max}) ${block(pipelineIdentification)} `)); } if (min !== undefined && min < 0) { throw new PipelineLogicError(spaceTrim$1((block) => ` Min expectation of ${unit} must be zero or positive ${block(pipelineIdentification)} `)); } if (max !== undefined && max <= 0) { throw new PipelineLogicError(spaceTrim$1((block) => ` Max expectation of ${unit} must be positive ${block(pipelineIdentification)} `)); } } } } // Note: Detect circular dependencies let resovedParameters = pipeline.parameters .filter(({ isInput }) => isInput) .map(({ name }) => name); // Note: All reserved parameters are resolved for (const reservedParameterName of RESERVED_PARAMETER_NAMES) { resovedParameters = [...resovedParameters, reservedParameterName]; } let unresovedTasks = [...pipeline.tasks]; let loopLimit = LOOP_LIMIT; while (unresovedTasks.length > 0) { if (loopLimit-- < 0) { // Note: Really UnexpectedError not LimitReachedError - this should not happen and be caught below throw new UnexpectedError(spaceTrim$1((block) => ` Loop limit reached during detection of circular dependencies in \`validatePipeline\` ${block(pipelineIdentification)} `)); } const currentlyResovedTasks = unresovedTasks.filter((task) => task.dependentParameterNames.every((name) => resovedParameters.includes(name))); if (currentlyResovedTasks.length === 0) { throw new PipelineLogicError( // TODO: [🐎] DRY spaceTrim$1((block) => ` Can not resolve some parameters: Either you are using a parameter that is not defined, or there are some circular dependencies. ${block(pipelineIdentification)} **Can not resolve:** ${block(unresovedTasks .map(({ resultingParameterName, dependentParameterNames }) => `- Parameter \`{${resultingParameterName}}\` which depends on ${dependentParameterNames .map((dependentParameterName) => `\`{${dependentParameterName}}\``) .join(' and ')}`) .join('\n'))} **Resolved:** ${block(resovedParameters .filter((name) => !RESERVED_PARAMETER_NAMES.includes(name)) .map((name) => `- Parameter \`{${name}}\``) .join('\n'))} **Reserved (which are available):** ${block(resovedParameters .filter((name) => RESERVED_PARAMETER_NAMES.includes(name)) .map((name) => `- Parameter \`{${name}}\``) .join('\n'))} `)); } resovedParameters = [ ...resovedParameters, ...currentlyResovedTasks.map(({ resultingParameterName }) => resultingParameterName), ]; unresovedTasks = unresovedTasks.filter((task) => !currentlyResovedTasks.includes(task)); } // Note: Check that formfactor is corresponding to the pipeline interface // TODO: !!6 Implement this // pipeline.formfactorName } /** * TODO: [🧞‍♀️] Do not allow joker + foreach * TODO: [🧠] Work with promptbookVersion * TODO: Use here some json-schema, Zod or something similar and change it to: * > /** * > * Validates PipelineJson if it is logically valid. * > * * > * It checks: * > * - it has a valid structure * > * - ... * > ex port function validatePipeline(promptbook: really_unknown): asserts promptbook is PipelineJson { */ /** * TODO: [🧳][main] !!4 Validate that all examples match expectations * TODO: [🧳][🐝][main] !!4 Validate that knowledge is valid (non-void) * TODO: [🧳][main] !!4 Validate that persona can be used only with CHAT variant * TODO: [🧳][main] !!4 Validate that parameter with reserved name not used RESERVED_PARAMETER_NAMES * TODO: [🧳][main] !!4 Validate that reserved parameter is not used as joker * TODO: [🧠] Validation not only logic itself but imports around - files and websites and rerefenced pipelines exists * TODO: [🛠] Actions, instruments (and maybe knowledge) => Functions and tools */ /** * Loads the books from the archive file with `.bookc` extension * * @param filePath Path to the archive file with `.bookc` extension * @param fs Filesystem tools * @returns Pipelines loaded from the archive * * @private utility of Promptbook */ async function loadArchive(filePath, fs) { if (!filePath.endsWith('.bookc')) { throw new UnexpectedError(`Archive file must have '.bookc' extension`); } const data = await fs.readFile(filePath); const archive = await JSZip.loadAsync(data); const indexFile = archive.file('index.book.json'); if (!indexFile) { throw new UnexpectedError(`Archive does not contain 'index.book.json' file`); } const collectionJson = jsonParse(await indexFile.async('text')); for (const pipeline of collectionJson) { validatePipeline(pipeline); } return collectionJson; } /** * Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment */ var PipelineCollection = [{title:"Prepare Knowledge from Markdown",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.book",formfactorName:"GENERIC",parameters:[{name:"knowledgeContent",description:"Markdown document content",isInput:true,isOutput:false},{name:"knowledgePieces",description:"The knowledge JSON object",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}",resultingParameterName:"knowledgePieces",dependentParameterNames:["knowledgeContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Knowledge from Markdown\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.book`\n- INPUT PARAMETER `{knowledgeContent}` Markdown document content\n- OUTPUT PARAMETER `{knowledgePieces}` The knowledge JSON object\n\n## Knowledge\n\n\n\n```markdown\nYou are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}\n```\n\n`-> {knowledgePieces}`\n"}],sourceFile:"./books/prepare-knowledge-from-markdown.book"},{title:"Prepare Keywords",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-keywords.book",formfactorName:"GENERIC",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"keywords",description:"Keywords separated by comma",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}",resultingParameterName:"keywords",dependentParameterNames:["knowledgePieceContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Keywords\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-keywords.book`\n- INPUT PARAMETER `{knowledgePieceContent}` The content\n- OUTPUT PARAMETER `{keywords}` Keywords separated by comma\n\n## Knowledge\n\n\n\n```markdown\nYou are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}\n```\n\n`-> {keywords}`\n"}],sourceFile:"./books/prepare-knowledge-keywords.book"},{title:"Prepare Knowledge-piece Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-title.book",formfactorName:"GENERIC",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"title",description:"The title of the document",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}",resultingParameterName:"title",expectations:{words:{min:1,max:8}},dependentParameterNames:["knowledgePieceContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Knowledge-piece Title\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-title.book`\n- INPUT PARAMETER `{knowledgePieceContent}` The content\n- OUTPUT PARAMETER `{title}` The title of the document\n\n## Knowledge\n\n- EXPECT MIN 1 WORD\n- EXPECT MAX 8 WORDS\n\n```markdown\nYou are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}\n```\n\n`-> {title}`\n"}],sourceFile:"./books/prepare-knowledge-title.book"},{title:"Prepare Persona",pipelineUrl:"https://promptbook.studio/promptbook/prepare-persona.book",formfactorName:"GENERIC",parameters:[{name:"availableModels",description:"List of available model names together with their descriptions as JSON",isInput:true,isOutput:false},{name:"personaDescription",description:"Description of the persona",isInput:true,isOutput:false},{name:"modelsRequirements",description:"Specific requirements for the model",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"make-model-requirements",title:"Make modelRequirements",content:"You are an experienced AI engineer, you need to find the best models for virtual assistants:\n\n## Example\n\n```json\n[\n {\n \"modelName\": \"gpt-4o\",\n \"systemMessage\": \"You are experienced AI engineer and helpful assistant.\",\n \"temperature\": 0.7\n },\n {\n \"modelName\": \"claude-3-5-sonnet\",\n \"systemMessage\": \"You are a friendly and knowledgeable chatbot.\",\n \"temperature\": 0.5\n }\n]\n```\n\n## Instructions\n\n- Your output format is JSON array\n- Sort best-fitting models first\n- Omit any models that are not suitable\n- Write just the JSON, no other text should be present\n- Array contain items with following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nHere are the available models:\n\n```json\n{availableModels}\n```\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}",resultingParameterName:"modelsRequirements",format:"JSON",dependentParameterNames:["availableModels","personaDescription"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Persona\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-persona.book`\n- INPUT PARAMETER `{availableModels}` List of available model names together with their descriptions as JSON\n- INPUT PARAMETER `{personaDescription}` Description of the persona\n- OUTPUT PARAMETER `{modelsRequirements}` Specific requirements for the model\n\n## Make modelRequirements\n\n- FORMAT JSON\n\n```markdown\nYou are an experienced AI engineer, you need to find the best models for virtual assistants:\n\n## Example\n\n\\`\\`\\`json\n[\n {\n \"modelName\": \"gpt-4o\",\n \"systemMessage\": \"You are experienced AI engineer and helpful assistant.\",\n \"temperature\": 0.7\n },\n {\n \"modelName\": \"claude-3-5-sonnet\",\n \"systemMessage\": \"You are a friendly and knowledgeable chatbot.\",\n \"temperature\": 0.5\n }\n]\n\\`\\`\\`\n\n## Instructions\n\n- Your output format is JSON array\n- Sort best-fitting models first\n- Omit any models that are not suitable\n- Write just the JSON, no other text should be present\n- Array contain items with following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nHere are the available models:\n\n\\`\\`\\`json\n{availableModels}\n\\`\\`\\`\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model t