UNPKG

@promptbook/documents

Version:

Promptbook: Run AI apps in plain human language across multiple models and platforms

webgptorg/promptbook

1,096 lines (1,054 loc) • 283 kB

JavaScript

import { mkdir, rm, readFile } from 'fs/promises'; import spaceTrim$1, { spaceTrim } from 'spacetrim'; import { spawn } from 'child_process'; import colors from 'colors'; import { forTime } from 'waitasecond'; import { SHA256 } from 'crypto-js'; import hexEncoder from 'crypto-js/enc-hex'; import { basename, join, dirname, isAbsolute } from 'path'; import { randomBytes } from 'crypto'; import { Subject } from 'rxjs'; import sha256 from 'crypto-js/sha256'; import { lookup, extension } from 'mime-types'; import { parse, unparse } from 'papaparse'; // ⚠️ WARNING: This code has been generated so that any manual changes will be overwritten /** * The version of the Book language * * @generated * @see https://github.com/webgptorg/book */ const BOOK_LANGUAGE_VERSION = '1.0.0'; /** * The version of the Promptbook engine * * @generated * @see https://github.com/webgptorg/promptbook */ const PROMPTBOOK_ENGINE_VERSION = '0.101.0-5'; /** * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine * Note: [💞] Ignore a discrepancy between file name and entity name */ /** * Just says that the variable is not used but should be kept * No side effects. * * Note: It can be useful for: * * 1) Suppressing eager optimization of unused imports * 2) Suppressing eslint errors of unused variables in the tests * 3) Keeping the type of the variable for type testing * * @param value any values * @returns void * @private within the repository */ function keepUnused(...valuesToKeep) { } /** * Returns the same value that is passed as argument. * No side effects. * * Note: It can be useful for: * * 1) Leveling indentation * 2) Putting always-true or always-false conditions without getting eslint errors * * @param value any values * @returns the same values * @private within the repository */ function just(value) { if (value === undefined) { return undefined; } return value; } /** * Name for the Promptbook * * TODO: [🗽] Unite branding and make single place for it * * @public exported from `@promptbook/core` */ const NAME = `Promptbook`; /** * Email of the responsible person * * @public exported from `@promptbook/core` */ const ADMIN_EMAIL = 'pavol@ptbk.io'; /** * Name of the responsible person for the Promptbook on GitHub * * @public exported from `@promptbook/core` */ const ADMIN_GITHUB_NAME = 'hejny'; // <- TODO: [🐊] Pick the best claim /** * When the title is not provided, the default title is used * * @public exported from `@promptbook/core` */ const DEFAULT_BOOK_TITLE = `✨ Untitled Book`; /** * Maximum file size limit * * @public exported from `@promptbook/core` */ const DEFAULT_MAX_FILE_SIZE = 100 * 1024 * 1024; // 100MB /** * Threshold value that determines when a dataset is considered "big" * and may require special handling or optimizations * * For example, when error occurs in one item of the big dataset, it will not fail the whole pipeline * * @public exported from `@promptbook/core` */ const BIG_DATASET_TRESHOLD = 50; /** * Placeholder text used to represent a placeholder value of failed operation * * @public exported from `@promptbook/core` */ const FAILED_VALUE_PLACEHOLDER = '!?'; // <- TODO: [🧠] Better system for generator warnings - not always "code" and "by `@promptbook/cli`" /** * The maximum number of iterations for a loops * * @private within the repository - too low-level in comparison with other `MAX_...` */ const LOOP_LIMIT = 1000; /** * Strings to represent various values in the context of parameter values * * @public exported from `@promptbook/utils` */ const VALUE_STRINGS = { empty: '(nothing; empty string)', null: '(no value; null)', undefined: '(unknown value; undefined)', nan: '(not a number; NaN)', infinity: '(infinity; ∞)', negativeInfinity: '(negative infinity; -∞)', unserializable: '(unserializable value)', circular: '(circular JSON)', }; /** * Small number limit * * @public exported from `@promptbook/utils` */ const SMALL_NUMBER = 0.001; /** * Short time interval to prevent race conditions in milliseconds * * @private within the repository - too low-level in comparison with other `MAX_...` */ const IMMEDIATE_TIME = 10; /** * The maximum length of the (generated) filename * * @public exported from `@promptbook/core` */ const MAX_FILENAME_LENGTH = 30; /** * Strategy for caching the intermediate results for knowledge sources * * @public exported from `@promptbook/core` */ const DEFAULT_INTERMEDIATE_FILES_STRATEGY = 'HIDE_AND_KEEP'; // <- TODO: [😡] Change to 'VISIBLE' /** * The maximum number of (LLM) tasks running in parallel * * @public exported from `@promptbook/core` */ const DEFAULT_MAX_PARALLEL_COUNT = 5; // <- TODO: [🤹‍♂️] /** * The maximum number of attempts to execute LLM task before giving up * * @public exported from `@promptbook/core` */ const DEFAULT_MAX_EXECUTION_ATTEMPTS = 7; // <- TODO: [🤹‍♂️] // <- TODO: [🕝] Make also `BOOKS_DIRNAME_ALTERNATIVES` // TODO: Just `.promptbook` in config, hardcode subfolders like `download-cache` or `execution-cache` /** * Where to store the temporary downloads * * Note: When the folder does not exist, it is created recursively * * @public exported from `@promptbook/core` */ const DEFAULT_DOWNLOAD_CACHE_DIRNAME = './.promptbook/download-cache'; /** * Where to store the scrape cache * * Note: When the folder does not exist, it is created recursively * * @public exported from `@promptbook/core` */ const DEFAULT_SCRAPE_CACHE_DIRNAME = './.promptbook/scrape-cache'; // <- TODO: [🧜‍♂️] /** * Default settings for parsing and generating CSV files in Promptbook. * * @public exported from `@promptbook/core` */ const DEFAULT_CSV_SETTINGS = Object.freeze({ delimiter: ',', quoteChar: '"', newline: '\n', skipEmptyLines: true, }); /** * Controls whether verbose logging is enabled by default throughout the application. * * @public exported from `@promptbook/core` */ let DEFAULT_IS_VERBOSE = false; /** * Controls whether auto-installation of dependencies is enabled by default. * * @public exported from `@promptbook/core` */ const DEFAULT_IS_AUTO_INSTALLED = false; /** * Default simulated duration for a task in milliseconds (used for progress reporting) * * @public exported from `@promptbook/core` */ const DEFAULT_TASK_SIMULATED_DURATION_MS = 5 * 60 * 1000; // 5 minutes /** * API request timeout in milliseconds * Can be overridden via API_REQUEST_TIMEOUT environment variable * * @public exported from `@promptbook/core` */ parseInt(process.env.API_REQUEST_TIMEOUT || '90000'); /** * Indicates whether pipeline logic validation is enabled. When true, the pipeline logic is checked for consistency. * * @private within the repository */ const IS_PIPELINE_LOGIC_VALIDATED = just( /**/ // Note: In normal situations, we check the pipeline logic: true); /** * Note: [💞] Ignore a discrepancy between file name and entity name * TODO: [🧠][🧜‍♂️] Maybe join remoteServerUrl and path into single value */ /** * This error type indicates that you try to use a feature that is not available in the current environment * * @public exported from `@promptbook/core` */ class EnvironmentMismatchError extends Error { constructor(message) { super(message); this.name = 'EnvironmentMismatchError'; Object.setPrototypeOf(this, EnvironmentMismatchError.prototype); } } /** * This error indicates that the promptbook can not retrieve knowledge from external sources * * @public exported from `@promptbook/core` */ class KnowledgeScrapeError extends Error { constructor(message) { super(message); this.name = 'KnowledgeScrapeError'; Object.setPrototypeOf(this, KnowledgeScrapeError.prototype); } } /** * This error type indicates that some tools are missing for pipeline execution or preparation * * @public exported from `@promptbook/core` */ class MissingToolsError extends Error { constructor(message) { super(spaceTrim((block) => ` ${block(message)} Note: You have probably forgot to provide some tools for pipeline execution or preparation `)); this.name = 'MissingToolsError'; Object.setPrototypeOf(this, MissingToolsError.prototype); } } /** * Make error report URL for the given error * * @private private within the repository */ function getErrorReportUrl(error) { const report = { title: `🐜 Error report from ${NAME}`, body: spaceTrim$1((block) => ` \`${error.name || 'Error'}\` has occurred in the [${NAME}], please look into it @${ADMIN_GITHUB_NAME}. \`\`\` ${block(error.message || '(no error message)')} \`\`\` ## More info: - **Promptbook engine version:** ${PROMPTBOOK_ENGINE_VERSION} - **Book language version:** ${BOOK_LANGUAGE_VERSION} - **Time:** ${new Date().toISOString()} <details> <summary>Stack trace:</summary> ## Stack trace: \`\`\`stacktrace ${block(error.stack || '(empty)')} \`\`\` </details> `), }; const reportUrl = new URL(`https://github.com/webgptorg/promptbook/issues/new`); reportUrl.searchParams.set('labels', 'bug'); reportUrl.searchParams.set('assignees', ADMIN_GITHUB_NAME); reportUrl.searchParams.set('title', report.title); reportUrl.searchParams.set('body', report.body); return reportUrl; } /** * This error type indicates that the error should not happen and its last check before crashing with some other error * * @public exported from `@promptbook/core` */ class UnexpectedError extends Error { constructor(message) { super(spaceTrim((block) => ` ${block(message)} Note: This error should not happen. It's probably a bug in the pipeline collection Please report issue: ${block(getErrorReportUrl(new Error(message)).href)} Or contact us on ${ADMIN_EMAIL} `)); this.name = 'UnexpectedError'; Object.setPrototypeOf(this, UnexpectedError.prototype); } } /** * Detects if the code is running in a Node.js environment * * Note: `$` is used to indicate that this function is not a pure function - it looks at the global object to determine the environment * * @public exported from `@promptbook/utils` */ const $isRunningInNode = new Function(` try { return this === global; } catch (e) { return false; } `); /** * TODO: [🎺] */ /** * Normalize options for `execCommand` and `execCommands` * * Note: `$` is used to indicate that this function behaves differently according to `process.platform` * * @private internal utility of `execCommand` and `execCommands` */ function $execCommandNormalizeOptions(options) { var _a, _b, _c, _d; let command; let cwd; let crashOnError; let args = []; let timeout; let isVerbose; if (typeof options === 'string') { // TODO: [1] DRY default values command = options; cwd = process.cwd(); crashOnError = true; timeout = Infinity; // <- TODO: [⏳] isVerbose = DEFAULT_IS_VERBOSE; } else { /* TODO: if ((options as any).commands !== undefined) { commands = (options as any).commands; } else { commands = [(options as any).command]; } */ // TODO: [1] DRY default values command = options.command; cwd = (_a = options.cwd) !== null && _a !== void 0 ? _a : process.cwd(); crashOnError = (_b = options.crashOnError) !== null && _b !== void 0 ? _b : true; timeout = (_c = options.timeout) !== null && _c !== void 0 ? _c : Infinity; isVerbose = (_d = options.isVerbose) !== null && _d !== void 0 ? _d : DEFAULT_IS_VERBOSE; } // TODO: /(-[a-zA-Z0-9-]+\s+[^\s]*)|[^\s]*/g const _ = Array.from(command.matchAll(/(".*")|([^\s]*)/g)) .map(([match]) => match) .filter((arg) => arg !== ''); if (_.length > 1) { [command, ...args] = _; } if (options.args) { args = [...args, ...options.args]; } let humanReadableCommand = !['npx', 'npm'].includes(command) ? command : args[0]; if (['ts-node'].includes(humanReadableCommand)) { humanReadableCommand += ` ${args[1]}`; } if (/^win/.test(process.platform) && ['npm', 'npx'].includes(command)) { command = `${command}.cmd`; } return { command, humanReadableCommand, args, cwd, crashOnError, timeout, isVerbose }; } // TODO: This should show type error> execCommandNormalizeOptions({ command: '', commands: [''] }); /** * Run one command in a shell * * * Note: There are 2 similar functions in the codebase: * - `$execCommand` which runs a single command * - `$execCommands` which runs multiple commands * Note: `$` is used to indicate that this function is not a pure function - it runs a command in a shell * * @public exported from `@promptbook/node` */ function $execCommand(options) { if (!$isRunningInNode()) { throw new EnvironmentMismatchError('Function `$execCommand` can run only in Node environment.js'); } return new Promise((resolve, reject) => { // eslint-disable-next-line prefer-const const { command, humanReadableCommand, args, cwd, crashOnError, timeout, isVerbose = DEFAULT_IS_VERBOSE, } = $execCommandNormalizeOptions(options); if (timeout !== Infinity) { // TODO: In waitasecond forTime(Infinity) should be equivalent to forEver() forTime(timeout).then(() => { if (crashOnError) { reject(new Error(`Command "${humanReadableCommand}" exceeded time limit of ${timeout}ms`)); } else { console.warn(`Command "${humanReadableCommand}" exceeded time limit of ${timeout}ms but continues running`); // <- TODO: [🏮] Some standard way how to transform errors into warnings and how to handle non-critical fails during the tasks resolve('Command exceeded time limit'); } }); } if (isVerbose) { console.info(colors.yellow(cwd) + ' ' + colors.green(command) + ' ' + colors.blue(args.join(' '))); } try { const commandProcess = spawn(command, args, { cwd, shell: true }); if (isVerbose) { commandProcess.on('message', (message) => { console.info({ message }); }); } const output = []; commandProcess.stdout.on('data', (stdout) => { output.push(stdout.toString()); if (isVerbose) { console.info(stdout.toString()); } }); commandProcess.stderr.on('data', (stderr) => { output.push(stderr.toString()); if (isVerbose && stderr.toString().trim()) { console.warn(stderr.toString()); // <- TODO: [🏮] Some standard way how to transform errors into warnings and how to handle non-critical fails during the tasks } }); const finishWithCode = (code) => { if (code !== 0) { if (crashOnError) { reject(new Error(output.join('\n').trim() || `Command "${humanReadableCommand}" exited with code ${code}`)); } else { if (isVerbose) { console.warn(`Command "${humanReadableCommand}" exited with code ${code}`); // <- TODO: [🏮] Some standard way how to transform errors into warnings and how to handle non-critical fails during the tasks } resolve(spaceTrim(output.join('\n'))); } } else { resolve(spaceTrim(output.join('\n'))); } }; commandProcess.on('close', finishWithCode); commandProcess.on('exit', finishWithCode); commandProcess.on('disconnect', () => { // Note: Unexpected disconnection should always result in rejection reject(new Error(`Command "${humanReadableCommand}" disconnected`)); }); commandProcess.on('error', (error) => { if (crashOnError) { reject(new Error(`Command "${humanReadableCommand}" failed: \n${error.message}`)); } else { if (isVerbose) { console.warn(error); // <- TODO: [🏮] Some standard way how to transform errors into warnings and how to handle non-critical fails during the tasks } resolve(spaceTrim(output.join('\n'))); } }); } catch (error) { // Note: Unexpected error in sync code should always result in rejection reject(error); } }); } /** * Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment */ /** * Get the file extension from a file name * * @private within the repository */ function getFileExtension(value) { const match = value.match(/\.([0-9a-z]+)(?:[?#]|$)/i); return match ? match[1].toLowerCase() : null; } /** * Checks if the file exists * * @private within the repository */ async function isFileExisting(filename, fs) { const isReadAccessAllowed = await fs .access(filename, fs.constants.R_OK) .then(() => true) .catch(() => false); if (!isReadAccessAllowed) { return false; } const isFile = await fs .stat(filename) .then((fileStat) => fileStat.isFile()) .catch(() => false); return isFile; } /** * Note: Not [~🟢~] because it is not directly dependent on `fs * TODO: [🐠] This can be a validator - with variants that return true/false and variants that throw errors with meaningless messages * TODO: [🖇] What about symlinks? */ /** * Converts a name to a properly formatted subfolder path for cache storage. * Handles normalization and path formatting to create consistent cache directory structures. * * @private for `FileCacheStorage` */ function nameToSubfolderPath(name) { return [name.substr(0, 1).toLowerCase(), name.substr(1, 1).toLowerCase()]; } const defaultDiacriticsRemovalMap = [ { base: 'A', letters: '\u0041\u24B6\uFF21\u00C0\u00C1\u00C2\u1EA6\u1EA4\u1EAA\u1EA8\u00C3\u0100\u0102\u1EB0\u1EAE\u1EB4\u1EB2\u0226\u01E0\u00C4\u01DE\u1EA2\u00C5\u01FA\u01CD\u0200\u0202\u1EA0\u1EAC\u1EB6\u1E00\u0104\u023A\u2C6F', }, { base: 'AA', letters: '\uA732' }, { base: 'AE', letters: '\u00C6\u01FC\u01E2' }, { base: 'AO', letters: '\uA734' }, { base: 'AU', letters: '\uA736' }, { base: 'AV', letters: '\uA738\uA73A' }, { base: 'AY', letters: '\uA73C' }, { base: 'B', letters: '\u0042\u24B7\uFF22\u1E02\u1E04\u1E06\u0243\u0182\u0181', }, { base: 'C', letters: '\u0043\u24B8\uFF23\u0106\u0108\u010A\u010C\u00C7\u1E08\u0187\u023B\uA73E', }, { base: 'D', letters: '\u0044\u24B9\uFF24\u1E0A\u010E\u1E0C\u1E10\u1E12\u1E0E\u0110\u018B\u018A\u0189\uA779\u00D0', }, { base: 'DZ', letters: '\u01F1\u01C4' }, { base: 'Dz', letters: '\u01F2\u01C5' }, { base: 'E', letters: '\u0045\u24BA\uFF25\u00C8\u00C9\u00CA\u1EC0\u1EBE\u1EC4\u1EC2\u1EBC\u0112\u1E14\u1E16\u0114\u0116\u00CB\u1EBA\u011A\u0204\u0206\u1EB8\u1EC6\u0228\u1E1C\u0118\u1E18\u1E1A\u0190\u018E', }, { base: 'F', letters: '\u0046\u24BB\uFF26\u1E1E\u0191\uA77B' }, { base: 'G', letters: '\u0047\u24BC\uFF27\u01F4\u011C\u1E20\u011E\u0120\u01E6\u0122\u01E4\u0193\uA7A0\uA77D\uA77E', }, { base: 'H', letters: '\u0048\u24BD\uFF28\u0124\u1E22\u1E26\u021E\u1E24\u1E28\u1E2A\u0126\u2C67\u2C75\uA78D', }, { base: 'I', letters: '\u0049\u24BE\uFF29\u00CC\u00CD\u00CE\u0128\u012A\u012C\u0130\u00CF\u1E2E\u1EC8\u01CF\u0208\u020A\u1ECA\u012E\u1E2C\u0197', }, { base: 'J', letters: '\u004A\u24BF\uFF2A\u0134\u0248' }, { base: 'K', letters: '\u004B\u24C0\uFF2B\u1E30\u01E8\u1E32\u0136\u1E34\u0198\u2C69\uA740\uA742\uA744\uA7A2', }, { base: 'L', letters: '\u004C\u24C1\uFF2C\u013F\u0139\u013D\u1E36\u1E38\u013B\u1E3C\u1E3A\u0141\u023D\u2C62\u2C60\uA748\uA746\uA780', }, { base: 'LJ', letters: '\u01C7' }, { base: 'Lj', letters: '\u01C8' }, { base: 'M', letters: '\u004D\u24C2\uFF2D\u1E3E\u1E40\u1E42\u2C6E\u019C' }, { base: 'N', letters: '\u004E\u24C3\uFF2E\u01F8\u0143\u00D1\u1E44\u0147\u1E46\u0145\u1E4A\u1E48\u0220\u019D\uA790\uA7A4', }, { base: 'NJ', letters: '\u01CA' }, { base: 'Nj', letters: '\u01CB' }, { base: 'O', letters: '\u004F\u24C4\uFF2F\u00D2\u00D3\u00D4\u1ED2\u1ED0\u1ED6\u1ED4\u00D5\u1E4C\u022C\u1E4E\u014C\u1E50\u1E52\u014E\u022E\u0230\u00D6\u022A\u1ECE\u0150\u01D1\u020C\u020E\u01A0\u1EDC\u1EDA\u1EE0\u1EDE\u1EE2\u1ECC\u1ED8\u01EA\u01EC\u00D8\u01FE\u0186\u019F\uA74A\uA74C', }, { base: 'OI', letters: '\u01A2' }, { base: 'OO', letters: '\uA74E' }, { base: 'OU', letters: '\u0222' }, { base: 'OE', letters: '\u008C\u0152' }, { base: 'oe', letters: '\u009C\u0153' }, { base: 'P', letters: '\u0050\u24C5\uFF30\u1E54\u1E56\u01A4\u2C63\uA750\uA752\uA754', }, { base: 'Q', letters: '\u0051\u24C6\uFF31\uA756\uA758\u024A' }, { base: 'R', letters: '\u0052\u24C7\uFF32\u0154\u1E58\u0158\u0210\u0212\u1E5A\u1E5C\u0156\u1E5E\u024C\u2C64\uA75A\uA7A6\uA782', }, { base: 'S', letters: '\u0053\u24C8\uFF33\u1E9E\u015A\u1E64\u015C\u1E60\u0160\u1E66\u1E62\u1E68\u0218\u015E\u2C7E\uA7A8\uA784', }, { base: 'T', letters: '\u0054\u24C9\uFF34\u1E6A\u0164\u1E6C\u021A\u0162\u1E70\u1E6E\u0166\u01AC\u01AE\u023E\uA786', }, { base: 'TZ', letters: '\uA728' }, { base: 'U', letters: '\u0055\u24CA\uFF35\u00D9\u00DA\u00DB\u0168\u1E78\u016A\u1E7A\u016C\u00DC\u01DB\u01D7\u01D5\u01D9\u1EE6\u016E\u0170\u01D3\u0214\u0216\u01AF\u1EEA\u1EE8\u1EEE\u1EEC\u1EF0\u1EE4\u1E72\u0172\u1E76\u1E74\u0244', }, { base: 'V', letters: '\u0056\u24CB\uFF36\u1E7C\u1E7E\u01B2\uA75E\u0245' }, { base: 'VY', letters: '\uA760' }, { base: 'W', letters: '\u0057\u24CC\uFF37\u1E80\u1E82\u0174\u1E86\u1E84\u1E88\u2C72', }, { base: 'X', letters: '\u0058\u24CD\uFF38\u1E8A\u1E8C' }, { base: 'Y', letters: '\u0059\u24CE\uFF39\u1EF2\u00DD\u0176\u1EF8\u0232\u1E8E\u0178\u1EF6\u1EF4\u01B3\u024E\u1EFE', }, { base: 'Z', letters: '\u005A\u24CF\uFF3A\u0179\u1E90\u017B\u017D\u1E92\u1E94\u01B5\u0224\u2C7F\u2C6B\uA762', }, { base: 'a', letters: '\u0061\u24D0\uFF41\u1E9A\u00E0\u00E1\u00E2\u1EA7\u1EA5\u1EAB\u1EA9\u00E3\u0101\u0103\u1EB1\u1EAF\u1EB5\u1EB3\u0227\u01E1\u00E4\u01DF\u1EA3\u00E5\u01FB\u01CE\u0201\u0203\u1EA1\u1EAD\u1EB7\u1E01\u0105\u2C65\u0250', }, { base: 'aa', letters: '\uA733' }, { base: 'ae', letters: '\u00E6\u01FD\u01E3' }, { base: 'ao', letters: '\uA735' }, { base: 'au', letters: '\uA737' }, { base: 'av', letters: '\uA739\uA73B' }, { base: 'ay', letters: '\uA73D' }, { base: 'b', letters: '\u0062\u24D1\uFF42\u1E03\u1E05\u1E07\u0180\u0183\u0253', }, { base: 'c', letters: '\u0063\u24D2\uFF43\u0107\u0109\u010B\u010D\u00E7\u1E09\u0188\u023C\uA73F\u2184', }, { base: 'd', letters: '\u0064\u24D3\uFF44\u1E0B\u010F\u1E0D\u1E11\u1E13\u1E0F\u0111\u018C\u0256\u0257\uA77A', }, { base: 'dz', letters: '\u01F3\u01C6' }, { base: 'e', letters: '\u0065\u24D4\uFF45\u00E8\u00E9\u00EA\u1EC1\u1EBF\u1EC5\u1EC3\u1EBD\u0113\u1E15\u1E17\u0115\u0117\u00EB\u1EBB\u011B\u0205\u0207\u1EB9\u1EC7\u0229\u1E1D\u0119\u1E19\u1E1B\u0247\u025B\u01DD', }, { base: 'f', letters: '\u0066\u24D5\uFF46\u1E1F\u0192\uA77C' }, { base: 'g', letters: '\u0067\u24D6\uFF47\u01F5\u011D\u1E21\u011F\u0121\u01E7\u0123\u01E5\u0260\uA7A1\u1D79\uA77F', }, { base: 'h', letters: '\u0068\u24D7\uFF48\u0125\u1E23\u1E27\u021F\u1E25\u1E29\u1E2B\u1E96\u0127\u2C68\u2C76\u0265', }, { base: 'hv', letters: '\u0195' }, { base: 'i', letters: '\u0069\u24D8\uFF49\u00EC\u00ED\u00EE\u0129\u012B\u012D\u00EF\u1E2F\u1EC9\u01D0\u0209\u020B\u1ECB\u012F\u1E2D\u0268\u0131', }, { base: 'j', letters: '\u006A\u24D9\uFF4A\u0135\u01F0\u0249' }, { base: 'k', letters: '\u006B\u24DA\uFF4B\u1E31\u01E9\u1E33\u0137\u1E35\u0199\u2C6A\uA741\uA743\uA745\uA7A3', }, { base: 'l', letters: '\u006C\u24DB\uFF4C\u0140\u013A\u013E\u1E37\u1E39\u013C\u1E3D\u1E3B\u017F\u0142\u019A\u026B\u2C61\uA749\uA781\uA747', }, { base: 'lj', letters: '\u01C9' }, { base: 'm', letters: '\u006D\u24DC\uFF4D\u1E3F\u1E41\u1E43\u0271\u026F' }, { base: 'n', letters: '\u006E\u24DD\uFF4E\u01F9\u0144\u00F1\u1E45\u0148\u1E47\u0146\u1E4B\u1E49\u019E\u0272\u0149\uA791\uA7A5', }, { base: 'nj', letters: '\u01CC' }, { base: 'o', letters: '\u006F\u24DE\uFF4F\u00F2\u00F3\u00F4\u1ED3\u1ED1\u1ED7\u1ED5\u00F5\u1E4D\u022D\u1E4F\u014D\u1E51\u1E53\u014F\u022F\u0231\u00F6\u022B\u1ECF\u0151\u01D2\u020D\u020F\u01A1\u1EDD\u1EDB\u1EE1\u1EDF\u1EE3\u1ECD\u1ED9\u01EB\u01ED\u00F8\u01FF\u0254\uA74B\uA74D\u0275', }, { base: 'oi', letters: '\u01A3' }, { base: 'ou', letters: '\u0223' }, { base: 'oo', letters: '\uA74F' }, { base: 'p', letters: '\u0070\u24DF\uFF50\u1E55\u1E57\u01A5\u1D7D\uA751\uA753\uA755', }, { base: 'q', letters: '\u0071\u24E0\uFF51\u024B\uA757\uA759' }, { base: 'r', letters: '\u0072\u24E1\uFF52\u0155\u1E59\u0159\u0211\u0213\u1E5B\u1E5D\u0157\u1E5F\u024D\u027D\uA75B\uA7A7\uA783', }, { base: 's', letters: '\u0073\u24E2\uFF53\u00DF\u015B\u1E65\u015D\u1E61\u0161\u1E67\u1E63\u1E69\u0219\u015F\u023F\uA7A9\uA785\u1E9B', }, { base: 't', letters: '\u0074\u24E3\uFF54\u1E6B\u1E97\u0165\u1E6D\u021B\u0163\u1E71\u1E6F\u0167\u01AD\u0288\u2C66\uA787', }, { base: 'tz', letters: '\uA729' }, { base: 'u', letters: '\u0075\u24E4\uFF55\u00F9\u00FA\u00FB\u0169\u1E79\u016B\u1E7B\u016D\u00FC\u01DC\u01D8\u01D6\u01DA\u1EE7\u016F\u0171\u01D4\u0215\u0217\u01B0\u1EEB\u1EE9\u1EEF\u1EED\u1EF1\u1EE5\u1E73\u0173\u1E77\u1E75\u0289', }, { base: 'v', letters: '\u0076\u24E5\uFF56\u1E7D\u1E7F\u028B\uA75F\u028C' }, { base: 'vy', letters: '\uA761' }, { base: 'w', letters: '\u0077\u24E6\uFF57\u1E81\u1E83\u0175\u1E87\u1E85\u1E98\u1E89\u2C73', }, { base: 'x', letters: '\u0078\u24E7\uFF58\u1E8B\u1E8D' }, { base: 'y', letters: '\u0079\u24E8\uFF59\u1EF3\u00FD\u0177\u1EF9\u0233\u1E8F\u00FF\u1EF7\u1E99\u1EF5\u01B4\u024F\u1EFF', }, { base: 'z', letters: '\u007A\u24E9\uFF5A\u017A\u1E91\u017C\u017E\u1E93\u1E95\u01B6\u0225\u0240\u2C6C\uA763', }, ]; /** * Map of letters from diacritic variant to diacritless variant * Contains lowercase and uppercase separatelly * * > "á" => "a" * > "ě" => "e" * > "Ă" => "A" * > ... * * @public exported from `@promptbook/utils` */ const DIACRITIC_VARIANTS_LETTERS = {}; // tslint:disable-next-line: prefer-for-of for (let i = 0; i < defaultDiacriticsRemovalMap.length; i++) { const letters = defaultDiacriticsRemovalMap[i].letters; // tslint:disable-next-line: prefer-for-of for (let j = 0; j < letters.length; j++) { DIACRITIC_VARIANTS_LETTERS[letters[j]] = defaultDiacriticsRemovalMap[i].base; } } // <- TODO: [🍓] Put to maker function to save execution time if not needed /* @see https://stackoverflow.com/questions/990904/remove-accents-diacritics-in-a-string-in-javascript Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ /** * Removes diacritic marks (accents) from characters in a string. * * Note: [🔂] This function is idempotent. * * @param input The string containing diacritics to be normalized. * @returns The string with diacritics removed or normalized. * @public exported from `@promptbook/utils` */ function removeDiacritics(input) { /*eslint no-control-regex: "off"*/ return input.replace(/[^\u0000-\u007E]/g, (a) => { return DIACRITIC_VARIANTS_LETTERS[a] || a; }); } /** * TODO: [Ж] Variant for cyrillic (and in general non-latin) letters */ /** * Converts a given text to kebab-case format. * * @param text The text to be converted. * @returns The kebab-case formatted string. * @example 'hello-world' * @example 'i-love-promptbook' * @public exported from `@promptbook/utils` */ function normalizeToKebabCase(text) { text = removeDiacritics(text); let charType; let lastCharType = 'OTHER'; let normalizedName = ''; for (const char of text) { let normalizedChar; if (/^[a-z]$/.test(char)) { charType = 'LOWERCASE'; normalizedChar = char; } else if (/^[A-Z]$/.test(char)) { charType = 'UPPERCASE'; normalizedChar = char.toLowerCase(); } else if (/^[0-9]$/.test(char)) { charType = 'NUMBER'; normalizedChar = char; } else { charType = 'OTHER'; normalizedChar = '-'; } if (charType !== lastCharType && !(lastCharType === 'UPPERCASE' && charType === 'LOWERCASE') && !(lastCharType === 'NUMBER') && !(charType === 'NUMBER')) { normalizedName += '-'; } normalizedName += normalizedChar; lastCharType = charType; } normalizedName = normalizedName.split(/-+/g).join('-'); normalizedName = normalizedName.split(/-?\/-?/g).join('/'); normalizedName = normalizedName.replace(/^-/, ''); normalizedName = normalizedName.replace(/-$/, ''); return normalizedName; } /** * Note: [💞] Ignore a discrepancy between file name and entity name */ /** * Removes emojis from a string and fix whitespaces * * Note: [🔂] This function is idempotent. * * @param text with emojis * @returns text without emojis * @public exported from `@promptbook/utils` */ function removeEmojis(text) { // Replace emojis (and also ZWJ sequence) with hyphens text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1'); text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1'); text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1'); text = text.replace(/\p{Extended_Pictographic}/gu, ''); return text; } /** * Tests if given string is valid file path. * * Note: This does not check if the file exists only if the path is valid * @public exported from `@promptbook/utils` */ function isValidFilePath(filename) { if (typeof filename !== 'string') { return false; } if (filename.split('\n').length > 1) { return false; } // Normalize slashes early so heuristics can detect path-like inputs const filenameSlashes = filename.replace(/\\/g, '/'); // Reject strings that look like sentences (informational text) // Heuristic: contains multiple spaces and ends with a period, or contains typical sentence punctuation // But skip this heuristic if the string looks like a path (contains '/' or starts with a drive letter) if (filename.trim().length > 60 && // long enough to be a sentence /[.!?]/.test(filename) && // contains sentence punctuation filename.split(' ').length > 8 && // has many words !/\/|^[A-Z]:/i.test(filenameSlashes) // do NOT treat as sentence if looks like a path ) { return false; } // Absolute Unix path: /hello.txt if (/^(\/)/i.test(filenameSlashes)) { // console.log(filename, 'Absolute Unix path: /hello.txt'); return true; } // Absolute Windows path: C:/ or C:\ (allow spaces and multiple dots in filename) if (/^[A-Z]:\/.+$/i.test(filenameSlashes)) { // console.log(filename, 'Absolute Windows path: /hello.txt'); return true; } // Relative path: ./hello.txt if (/^(\.\.?\/)+/i.test(filenameSlashes)) { // console.log(filename, 'Relative path: ./hello.txt'); return true; } // Allow paths like foo/hello if (/^[^/]+\/[^/]+/i.test(filenameSlashes)) { // console.log(filename, 'Allow paths like foo/hello'); return true; } // Allow paths like hello.book if (/^[^/]+\.[^/]+$/i.test(filenameSlashes)) { // console.log(filename, 'Allow paths like hello.book'); return true; } return false; } /** * TODO: [🍏] Implement for MacOs */ /** * Tests if given string is valid URL. * * Note: [🔂] This function is idempotent. * Note: Dataurl are considered perfectly valid. * Note: There are two similar functions: * - `isValidUrl` which tests any URL * - `isValidPipelineUrl` *(this one)* which tests just promptbook URL * * @public exported from `@promptbook/utils` */ function isValidUrl(url) { if (typeof url !== 'string') { return false; } try { if (url.startsWith('blob:')) { url = url.replace(/^blob:/, ''); } const urlObject = new URL(url /* because fail is handled */); if (!['http:', 'https:', 'data:'].includes(urlObject.protocol)) { return false; } return true; } catch (error) { return false; } } /** * Converts a title string into a normalized name. * * @param value The title string to be converted to a name. * @returns A normalized name derived from the input title. * @example 'Hello World!' -> 'hello-world' * @public exported from `@promptbook/utils` */ function titleToName(value) { if (isValidUrl(value)) { value = value.replace(/^https?:\/\//, ''); value = value.replace(/\.html$/, ''); } else if (isValidFilePath(value)) { value = basename(value); // Note: Keeping extension in the name } value = value.split('/').join('-'); value = removeEmojis(value); value = normalizeToKebabCase(value); // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers return value; } /** * Retrieves an intermediate source for a scraper based on the knowledge source. * Manages the caching and retrieval of intermediate scraper results for optimized performance. * * @private as internal utility for scrapers */ async function getScraperIntermediateSource(source, options) { const { filename: sourceFilename, url } = source; const { rootDirname, cacheDirname, intermediateFilesStrategy, extension, isVerbose } = options; // TODO: [👬] DRY const hash = SHA256( // <- TODO: [🥬] Encapsulate sha256 to some private utility function hexEncoder.parse(sourceFilename || url || 'untitled')) .toString( /* hex */) .substring(0, 20); // <- TODO: [🥬] Make some system for hashes and ids of promptbook const semanticName = normalizeToKebabCase(titleToName((sourceFilename || url || '').split('intermediate').join(''))).substring(0, 20); // <- TODO: [🐱‍🐉] const pieces = ['intermediate', semanticName, hash].filter((piece) => piece !== ''); const name = pieces.join('-').split('--').join('-'); const cacheFilename = join(process.cwd(), cacheDirname, ...nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */), name) .split('\\') .join('/') + '.' + extension; // Note: Try to create cache directory, but don't fail if filesystem has issues try { await mkdir(dirname(cacheFilename), { recursive: true }); } catch (error) { // Note: If we can't create cache directory, continue without it // This handles read-only filesystems, permission issues, and missing parent directories if (error instanceof Error && (error.message.includes('EROFS') || error.message.includes('read-only') || error.message.includes('EACCES') || error.message.includes('EPERM') || error.message.includes('ENOENT'))) ; else { // Re-throw other unexpected errors throw error; } } let isDestroyed = true; const fileHandler = { filename: cacheFilename, get isDestroyed() { return isDestroyed; }, async destroy() { if (intermediateFilesStrategy === 'HIDE_AND_CLEAN') { if (isVerbose) { console.info('legacyDocumentScraper: Clening cache'); } await rm(cacheFilename); // TODO: [🐿][🧠] Maybe remove empty folders } isDestroyed = true; }, }; return fileHandler; } /** * Note: Not using `FileCacheStorage` for two reasons: * 1) Need to store more than serialized JSONs * 2) Need to switch between a `rootDirname` and `cacheDirname` <- TODO: [😡] * TODO: [🐱‍🐉][🧠] Make some smart crop * Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment */ var PipelineCollection = [{title:"Prepare Knowledge from Markdown",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.book",formfactorName:"GENERIC",parameters:[{name:"knowledgeContent",description:"Markdown document content",isInput:true,isOutput:false},{name:"knowledgePieces",description:"The knowledge JSON object",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}",resultingParameterName:"knowledgePieces",dependentParameterNames:["knowledgeContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Knowledge from Markdown\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.book`\n- INPUT PARAMETER `{knowledgeContent}` Markdown document content\n- OUTPUT PARAMETER `{knowledgePieces}` The knowledge JSON object\n\n## Knowledge\n\n\n\n```markdown\nYou are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}\n```\n\n`-> {knowledgePieces}`\n"}],sourceFile:"./books/prepare-knowledge-from-markdown.book"},{title:"Prepare Keywords",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-keywords.book",formfactorName:"GENERIC",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"keywords",description:"Keywords separated by comma",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}",resultingParameterName:"keywords",dependentParameterNames:["knowledgePieceContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Keywords\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-keywords.book`\n- INPUT PARAMETER `{knowledgePieceContent}` The content\n- OUTPUT PARAMETER `{keywords}` Keywords separated by comma\n\n## Knowledge\n\n\n\n```markdown\nYou are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}\n```\n\n`-> {keywords}`\n"}],sourceFile:"./books/prepare-knowledge-keywords.book"},{title:"Prepare Knowledge-piece Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-title.book",formfactorName:"GENERIC",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"title",description:"The title of the document",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}",resultingParameterName:"title",expectations:{words:{min:1,max:8}},dependentParameterNames:["knowledgePieceContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Knowledge-piece Title\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-title.book`\n- INPUT PARAMETER `{knowledgePieceContent}` The content\n- OUTPUT PARAMETER `{title}` The title of the document\n\n## Knowledge\n\n- EXPECT MIN 1 WORD\n- EXPECT MAX 8 WORDS\n\n```markdown\nYou are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}\n```\n\n`-> {title}`\n"}],sourceFile:"./books/prepare-knowledge-title.book"},{title:"Prepare Persona",pipelineUrl:"https://promptbook.studio/promptbook/prepare-persona.book",formfactorName:"GENERIC",parameters:[{name:"availableModels",description:"List of available model names together with their descriptions as JSON",isInput:true,isOutput:false},{name:"personaDescription",description:"Description of the persona",isInput:true,isOutput:false},{name:"modelsRequirements",description:"Specific requirements for the model",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"make-model-requirements",title:"Make modelRequirements",content:"You are an experienced AI engineer, you need to find the best models for virtual assistants:\n\n## Example\n\n```json\n[\n {\n \"modelName\": \"gpt-4o\",\n \"systemMessage\": \"You are experienced AI engineer and helpful assistant.\",\n \"temperature\": 0.7\n },\n {\n \"modelName\": \"claude-3-5-sonnet\",\n \"systemMessage\": \"You are a friendly and knowledgeable chatbot.\",\n \"temperature\": 0.5\n }\n]\n```\n\n## Instructions\n\n- Your output format is JSON array\n- Sort best-fitting models first\n- Omit any models that are not suitable\n- Write just the JSON, no other text should be present\n- Array contain items with following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nHere are the available models:\n\n```json\n{availableModels}\n```\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}",resultingParameterName:"modelsRequirements",format:"JSON",dependentParameterNames:["availableModels","personaDescription"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Persona\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-persona.book`\n- INPUT PARAMETER `{availableModels}` List of available model names together with their descriptions as JSON\n- INPUT PARAMETER `{personaDescription}` Description of the persona\n- OUTPUT PARAMETER `{modelsRequirements}` Specific requirements for the model\n\n## Make modelRequirements\n\n- FORMAT JSON\n\n```markdown\nYou are an experienced AI engineer, you need to find the best models for virtual assistants:\n\n## Example\n\n\\`\\`\\`json\n[\n {\n \"modelName\": \"gpt-4o\",\n \"systemMessage\": \"You are experienced AI engineer and helpful assistant.\",\n \"temperature\": 0.7\n },\n {\n \"modelName\": \"claude-3-5-sonnet\",\n \"systemMessage\": \"You are a friendly and knowledgeable chatbot.\",\n \"temperature\": 0.5\n }\n]\n\\`\\`\\`\n\n## Instructions\n\n- Your output format is JSON array\n- Sort best-fitting models first\n- Omit any models that are not suitable\n- Write just the JSON, no other text should be present\n- Array contain items with following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nHere are the available models:\n\n\\`\\`\\`json\n{availableModels}\n\\`\\`\\`\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}\n```\n\n`-> {modelsRequirements}`\n"}],sourceFile:"./books/prepare-persona.book"},{title:"Prepare Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-title.book",formfactorNa