UNPKG

@promptbook/markdown-utils

Version:

Promptbook: Run AI apps in plain human language across multiple models and platforms

1,075 lines (1,024 loc) โ€ข 267 kB
import spaceTrim, { spaceTrim as spaceTrim$1 } from 'spacetrim'; import { format } from 'prettier'; import parserHtml from 'prettier/parser-html'; import { randomBytes } from 'crypto'; import { Subject } from 'rxjs'; import { forTime } from 'waitasecond'; import hexEncoder from 'crypto-js/enc-hex'; import sha256 from 'crypto-js/sha256'; import { basename, join, dirname } from 'path'; import { SHA256 } from 'crypto-js'; import { lookup, extension } from 'mime-types'; import { parse, unparse } from 'papaparse'; // โš ๏ธ WARNING: This code has been generated so that any manual changes will be overwritten /** * The version of the Book language * * @generated * @see https://github.com/webgptorg/book */ const BOOK_LANGUAGE_VERSION = '1.0.0'; /** * The version of the Promptbook engine * * @generated * @see https://github.com/webgptorg/promptbook */ const PROMPTBOOK_ENGINE_VERSION = '0.100.0-14'; /** * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine * Note: [๐Ÿ’ž] Ignore a discrepancy between file name and entity name */ /** * This error indicates that the promptbook in a markdown format cannot be parsed into a valid promptbook object * * @public exported from `@promptbook/core` */ class ParseError extends Error { constructor(message) { super(message); this.name = 'ParseError'; Object.setPrototypeOf(this, ParseError.prototype); } } /** * TODO: Maybe split `ParseError` and `ApplyError` */ /** * Makes first letter of a string uppercase * * @public exported from `@promptbook/utils` */ function capitalize(word) { return word.substring(0, 1).toUpperCase() + word.substring(1); } /** * Extracts all code blocks from markdown. * * Note: There are multiple similar functions: * - `extractBlock` just extracts the content of the code block which is also used as built-in function for postprocessing * - `extractJsonBlock` extracts exactly one valid JSON code block * - `extractOneBlockFromMarkdown` extracts exactly one code block with language of the code block * - `extractAllBlocksFromMarkdown` extracts all code blocks with language of the code block * * @param markdown any valid markdown * @returns code blocks with language and content * @throws {ParseError} if block is not closed properly * @public exported from `@promptbook/markdown-utils` */ function extractAllBlocksFromMarkdown(markdown) { const codeBlocks = []; const lines = markdown.split('\n'); // Note: [0] Ensure that the last block notated by gt > will be closed lines.push(''); let currentCodeBlock = null; for (const line of lines) { if (line.startsWith('> ') || line === '>') { if (currentCodeBlock === null) { currentCodeBlock = { blockNotation: '>', language: null, content: '' }; } /* not else */ if (currentCodeBlock.blockNotation === '>') { if (currentCodeBlock.content !== '') { currentCodeBlock.content += '\n'; } currentCodeBlock.content += line.slice(2); } } else if (currentCodeBlock !== null && currentCodeBlock.blockNotation === '>' /* <- Note: [0] */) { codeBlocks.push(currentCodeBlock); currentCodeBlock = null; } /* not else */ if (line.startsWith('```')) { const language = line.slice(3).trim() || null; if (currentCodeBlock === null) { currentCodeBlock = { blockNotation: '```', language, content: '' }; } else { if (language !== null) { throw new ParseError(`${capitalize(currentCodeBlock.language || 'the')} code block was not closed and already opening new ${language} code block`); } codeBlocks.push(currentCodeBlock); currentCodeBlock = null; } } else if (currentCodeBlock !== null && currentCodeBlock.blockNotation === '```') { if (currentCodeBlock.content !== '') { currentCodeBlock.content += '\n'; } currentCodeBlock.content += line.split('\\`\\`\\`').join('```') /* <- TODO: Maybe make proper unescape */; } } if (currentCodeBlock !== null) { throw new ParseError(`${capitalize(currentCodeBlock.language || 'the')} code block was not closed at the end of the markdown`); } return codeBlocks; } /** * TODO: Maybe name for `blockNotation` instead of '```' and '>' */ /** * Extracts exactly ONE code block from markdown. * * - When there are multiple or no code blocks the function throws a `ParseError` * * Note: There are multiple similar functions: * - `extractBlock` just extracts the content of the code block which is also used as built-in function for postprocessing * - `extractJsonBlock` extracts exactly one valid JSON code block * - `extractOneBlockFromMarkdown` extracts exactly one code block with language of the code block * - `extractAllBlocksFromMarkdown` extracts all code blocks with language of the code block * * @param markdown any valid markdown * @returns code block with language and content * @public exported from `@promptbook/markdown-utils` * @throws {ParseError} if there is not exactly one code block in the markdown */ function extractOneBlockFromMarkdown(markdown) { const codeBlocks = extractAllBlocksFromMarkdown(markdown); if (codeBlocks.length !== 1) { throw new ParseError(spaceTrim((block) => ` There should be exactly 1 code block in task section, found ${codeBlocks.length} code blocks ${block(codeBlocks.map((block, i) => `Block ${i + 1}:\n${block.content}`).join('\n\n\n'))} `)); } return codeBlocks[0]; } /*** * TODO: [๐Ÿ“][๐ŸŒป] Decide of this is internal utility, external util OR validator/postprocessor */ /** * Extracts code block from markdown. * * - When there are multiple or no code blocks the function throws a `ParseError` * * Note: There are multiple similar function: * - `extractBlock` just extracts the content of the code block which is also used as build-in function for postprocessing * - `extractJsonBlock` extracts exactly one valid JSON code block * - `extractOneBlockFromMarkdown` extracts exactly one code block with language of the code block * - `extractAllBlocksFromMarkdown` extracts all code blocks with language of the code block * * @public exported from `@promptbook/markdown-utils` * @throws {ParseError} if there is not exactly one code block in the markdown */ function extractBlock(markdown) { const { content } = extractOneBlockFromMarkdown(markdown); return content; } /** * Returns the same value that is passed as argument. * No side effects. * * Note: It can be useful for: * * 1) Leveling indentation * 2) Putting always-true or always-false conditions without getting eslint errors * * @param value any values * @returns the same values * @private within the repository */ function just(value) { if (value === undefined) { return undefined; } return value; } /** * Warning message for the generated sections and files files * * @private within the repository */ const GENERATOR_WARNING = `โš ๏ธ WARNING: This code has been generated so that any manual changes will be overwritten`; /** * Name for the Promptbook * * TODO: [๐Ÿ—ฝ] Unite branding and make single place for it * * @public exported from `@promptbook/core` */ const NAME = `Promptbook`; /** * Email of the responsible person * * @public exported from `@promptbook/core` */ const ADMIN_EMAIL = 'pavol@ptbk.io'; /** * Name of the responsible person for the Promptbook on GitHub * * @public exported from `@promptbook/core` */ const ADMIN_GITHUB_NAME = 'hejny'; // <- TODO: [๐ŸŠ] Pick the best claim /** * When the title is not provided, the default title is used * * @public exported from `@promptbook/core` */ const DEFAULT_BOOK_TITLE = `โœจ Untitled Book`; /** * Maximum file size limit * * @public exported from `@promptbook/core` */ const DEFAULT_MAX_FILE_SIZE = 100 * 1024 * 1024; // 100MB /** * Threshold value that determines when a dataset is considered "big" * and may require special handling or optimizations * * For example, when error occurs in one item of the big dataset, it will not fail the whole pipeline * * @public exported from `@promptbook/core` */ const BIG_DATASET_TRESHOLD = 50; /** * Placeholder text used to represent a placeholder value of failed operation * * @public exported from `@promptbook/core` */ const FAILED_VALUE_PLACEHOLDER = '!?'; // <- TODO: [๐Ÿง ] Better system for generator warnings - not always "code" and "by `@promptbook/cli`" /** * The maximum number of iterations for a loops * * @private within the repository - too low-level in comparison with other `MAX_...` */ const LOOP_LIMIT = 1000; /** * Strings to represent various values in the context of parameter values * * @public exported from `@promptbook/utils` */ const VALUE_STRINGS = { empty: '(nothing; empty string)', null: '(no value; null)', undefined: '(unknown value; undefined)', nan: '(not a number; NaN)', infinity: '(infinity; โˆž)', negativeInfinity: '(negative infinity; -โˆž)', unserializable: '(unserializable value)', circular: '(circular JSON)', }; /** * Small number limit * * @public exported from `@promptbook/utils` */ const SMALL_NUMBER = 0.001; /** * Short time interval to prevent race conditions in milliseconds * * @private within the repository - too low-level in comparison with other `MAX_...` */ const IMMEDIATE_TIME = 10; /** * The maximum length of the (generated) filename * * @public exported from `@promptbook/core` */ const MAX_FILENAME_LENGTH = 30; /** * Strategy for caching the intermediate results for knowledge sources * * @public exported from `@promptbook/core` */ const DEFAULT_INTERMEDIATE_FILES_STRATEGY = 'HIDE_AND_KEEP'; // <- TODO: [๐Ÿ˜ก] Change to 'VISIBLE' /** * The maximum number of (LLM) tasks running in parallel * * @public exported from `@promptbook/core` */ const DEFAULT_MAX_PARALLEL_COUNT = 5; // <- TODO: [๐Ÿคนโ€โ™‚๏ธ] /** * The maximum number of attempts to execute LLM task before giving up * * @public exported from `@promptbook/core` */ const DEFAULT_MAX_EXECUTION_ATTEMPTS = 7; // <- TODO: [๐Ÿคนโ€โ™‚๏ธ] // <- TODO: [๐Ÿ•] Make also `BOOKS_DIRNAME_ALTERNATIVES` // TODO: Just `.promptbook` in config, hardcode subfolders like `download-cache` or `execution-cache` /** * Where to store the temporary downloads * * Note: When the folder does not exist, it is created recursively * * @public exported from `@promptbook/core` */ const DEFAULT_DOWNLOAD_CACHE_DIRNAME = './.promptbook/download-cache'; /** * Where to store the scrape cache * * Note: When the folder does not exist, it is created recursively * * @public exported from `@promptbook/core` */ const DEFAULT_SCRAPE_CACHE_DIRNAME = './.promptbook/scrape-cache'; // <- TODO: [๐Ÿงœโ€โ™‚๏ธ] /** * Default settings for parsing and generating CSV files in Promptbook. * * @public exported from `@promptbook/core` */ const DEFAULT_CSV_SETTINGS = Object.freeze({ delimiter: ',', quoteChar: '"', newline: '\n', skipEmptyLines: true, }); /** * Controls whether verbose logging is enabled by default throughout the application. * * @public exported from `@promptbook/core` */ let DEFAULT_IS_VERBOSE = false; /** * Controls whether auto-installation of dependencies is enabled by default. * * @public exported from `@promptbook/core` */ const DEFAULT_IS_AUTO_INSTALLED = false; /** * Indicates whether pipeline logic validation is enabled. When true, the pipeline logic is checked for consistency. * * @private within the repository */ const IS_PIPELINE_LOGIC_VALIDATED = just( /**/ // Note: In normal situations, we check the pipeline logic: true); /** * Note: [๐Ÿ’ž] Ignore a discrepancy between file name and entity name * TODO: [๐Ÿง ][๐Ÿงœโ€โ™‚๏ธ] Maybe join remoteServerUrl and path into single value */ /** * Make error report URL for the given error * * @private private within the repository */ function getErrorReportUrl(error) { const report = { title: `๐Ÿœ Error report from ${NAME}`, body: spaceTrim((block) => ` \`${error.name || 'Error'}\` has occurred in the [${NAME}], please look into it @${ADMIN_GITHUB_NAME}. \`\`\` ${block(error.message || '(no error message)')} \`\`\` ## More info: - **Promptbook engine version:** ${PROMPTBOOK_ENGINE_VERSION} - **Book language version:** ${BOOK_LANGUAGE_VERSION} - **Time:** ${new Date().toISOString()} <details> <summary>Stack trace:</summary> ## Stack trace: \`\`\`stacktrace ${block(error.stack || '(empty)')} \`\`\` </details> `), }; const reportUrl = new URL(`https://github.com/webgptorg/promptbook/issues/new`); reportUrl.searchParams.set('labels', 'bug'); reportUrl.searchParams.set('assignees', ADMIN_GITHUB_NAME); reportUrl.searchParams.set('title', report.title); reportUrl.searchParams.set('body', report.body); return reportUrl; } /** * This error type indicates that the error should not happen and its last check before crashing with some other error * * @public exported from `@promptbook/core` */ class UnexpectedError extends Error { constructor(message) { super(spaceTrim$1((block) => ` ${block(message)} Note: This error should not happen. It's probably a bug in the pipeline collection Please report issue: ${block(getErrorReportUrl(new Error(message)).href)} Or contact us on ${ADMIN_EMAIL} `)); this.name = 'UnexpectedError'; Object.setPrototypeOf(this, UnexpectedError.prototype); } } /** * This error type indicates that somewhere in the code non-Error object was thrown and it was wrapped into the `WrappedError` * * @public exported from `@promptbook/core` */ class WrappedError extends Error { constructor(whatWasThrown) { const tag = `[๐Ÿคฎ]`; console.error(tag, whatWasThrown); super(spaceTrim$1(` Non-Error object was thrown Note: Look for ${tag} in the console for more details Please report issue on ${ADMIN_EMAIL} `)); this.name = 'WrappedError'; Object.setPrototypeOf(this, WrappedError.prototype); } } /** * Helper used in catch blocks to assert that the error is an instance of `Error` * * @param whatWasThrown Any object that was thrown * @returns Nothing if the error is an instance of `Error` * @throws `WrappedError` or `UnexpectedError` if the error is not standard * * @private within the repository */ function assertsError(whatWasThrown) { // Case 1: Handle error which was rethrown as `WrappedError` if (whatWasThrown instanceof WrappedError) { const wrappedError = whatWasThrown; throw wrappedError; } // Case 2: Handle unexpected errors if (whatWasThrown instanceof UnexpectedError) { const unexpectedError = whatWasThrown; throw unexpectedError; } // Case 3: Handle standard errors - keep them up to consumer if (whatWasThrown instanceof Error) { return; } // Case 4: Handle non-standard errors - wrap them into `WrappedError` and throw throw new WrappedError(whatWasThrown); } /** * Function isValidJsonString will tell you if the string is valid JSON or not * * @param value The string to check * @returns `true` if the string is a valid JSON string, false otherwise * * @public exported from `@promptbook/utils` */ function isValidJsonString(value /* <- [๐Ÿ‘จโ€โš–๏ธ] */) { try { JSON.parse(value); return true; } catch (error) { assertsError(error); if (error.message.includes('Unexpected token')) { return false; } return false; } } /** * Extracts extracts exactly one valid JSON code block * * - When given string is a valid JSON as it is, it just returns it * - When there is no JSON code block the function throws a `ParseError` * - When there are multiple JSON code blocks the function throws a `ParseError` * * Note: It is not important if marked as ```json BUT if it is VALID JSON * Note: There are multiple similar function: * - `extractBlock` just extracts the content of the code block which is also used as build-in function for postprocessing * - `extractJsonBlock` extracts exactly one valid JSON code block * - `extractOneBlockFromMarkdown` extracts exactly one code block with language of the code block * - `extractAllBlocksFromMarkdown` extracts all code blocks with language of the code block * * @public exported from `@promptbook/markdown-utils` * @throws {ParseError} if there is no valid JSON block in the markdown */ function extractJsonBlock(markdown) { if (isValidJsonString(markdown)) { return markdown; } const codeBlocks = extractAllBlocksFromMarkdown(markdown); const jsonBlocks = codeBlocks.filter(({ content }) => isValidJsonString(content)); if (jsonBlocks.length === 0) { throw new Error('There is no valid JSON block in the markdown'); } if (jsonBlocks.length > 1) { throw new Error('There are multiple JSON code blocks in the markdown'); } return jsonBlocks[0].content; } /** * TODO: Add some auto-healing logic + extract YAML, JSON5, TOML, etc. * TODO: [๐Ÿข] Make this logic part of `JsonFormatParser` or `isValidJsonString` */ /** * Just says that the variable is not used but should be kept * No side effects. * * Note: It can be useful for: * * 1) Suppressing eager optimization of unused imports * 2) Suppressing eslint errors of unused variables in the tests * 3) Keeping the type of the variable for type testing * * @param value any values * @returns void * @private within the repository */ function keepUnused(...valuesToKeep) { } var PipelineCollection = [{title:"Prepare Knowledge from Markdown",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.book",formfactorName:"GENERIC",parameters:[{name:"knowledgeContent",description:"Markdown document content",isInput:true,isOutput:false},{name:"knowledgePieces",description:"The knowledge JSON object",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}",resultingParameterName:"knowledgePieces",dependentParameterNames:["knowledgeContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Knowledge from Markdown\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-from-markdown.book`\n- INPUT PARAMETER `{knowledgeContent}` Markdown document content\n- OUTPUT PARAMETER `{knowledgePieces}` The knowledge JSON object\n\n## Knowledge\n\n<!-- TODO: [๐Ÿ†] -FORMAT JSON -->\n\n```markdown\nYou are experienced data researcher, extract the important knowledge from the document.\n\n# Rules\n\n- Make pieces of information concise, clear, and easy to understand\n- One piece of information should be approximately 1 paragraph\n- Divide the paragraphs by markdown horizontal lines ---\n- Omit irrelevant information\n- Group redundant information\n- Write just extracted information, nothing else\n\n# The document\n\nTake information from this document:\n\n> {knowledgeContent}\n```\n\n`-> {knowledgePieces}`\n"}],sourceFile:"./books/prepare-knowledge-from-markdown.book"},{title:"Prepare Keywords",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-keywords.book",formfactorName:"GENERIC",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"keywords",description:"Keywords separated by comma",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}",resultingParameterName:"keywords",dependentParameterNames:["knowledgePieceContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Keywords\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-keywords.book`\n- INPUT PARAMETER `{knowledgePieceContent}` The content\n- OUTPUT PARAMETER `{keywords}` Keywords separated by comma\n\n## Knowledge\n\n<!-- TODO: [๐Ÿ†] -FORMAT JSON -->\n\n```markdown\nYou are experienced data researcher, detect the important keywords in the document.\n\n# Rules\n\n- Write just keywords separated by comma\n\n# The document\n\nTake information from this document:\n\n> {knowledgePieceContent}\n```\n\n`-> {keywords}`\n"}],sourceFile:"./books/prepare-knowledge-keywords.book"},{title:"Prepare Knowledge-piece Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-knowledge-title.book",formfactorName:"GENERIC",parameters:[{name:"knowledgePieceContent",description:"The content",isInput:true,isOutput:false},{name:"title",description:"The title of the document",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"knowledge",title:"Knowledge",content:"You are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}",resultingParameterName:"title",expectations:{words:{min:1,max:8}},dependentParameterNames:["knowledgePieceContent"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Knowledge-piece Title\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-knowledge-title.book`\n- INPUT PARAMETER `{knowledgePieceContent}` The content\n- OUTPUT PARAMETER `{title}` The title of the document\n\n## Knowledge\n\n- EXPECT MIN 1 WORD\n- EXPECT MAX 8 WORDS\n\n```markdown\nYou are experienced content creator, write best title for the document.\n\n# Rules\n\n- Write just title, nothing else\n- Write maximum 5 words for the title\n\n# The document\n\n> {knowledgePieceContent}\n```\n\n`-> {title}`\n"}],sourceFile:"./books/prepare-knowledge-title.book"},{title:"Prepare Persona",pipelineUrl:"https://promptbook.studio/promptbook/prepare-persona.book",formfactorName:"GENERIC",parameters:[{name:"availableModels",description:"List of available model names together with their descriptions as JSON",isInput:true,isOutput:false},{name:"personaDescription",description:"Description of the persona",isInput:true,isOutput:false},{name:"modelsRequirements",description:"Specific requirements for the model",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"make-model-requirements",title:"Make modelRequirements",content:"You are an experienced AI engineer, you need to find the best models for virtual assistants:\n\n## Example\n\n```json\n[\n {\n \"modelName\": \"gpt-4o\",\n \"systemMessage\": \"You are experienced AI engineer and helpful assistant.\",\n \"temperature\": 0.7\n },\n {\n \"modelName\": \"claude-3-5-sonnet\",\n \"systemMessage\": \"You are a friendly and knowledgeable chatbot.\",\n \"temperature\": 0.5\n }\n]\n```\n\n## Instructions\n\n- Your output format is JSON array\n- Sort best-fitting models first\n- Omit any models that are not suitable\n- Write just the JSON, no other text should be present\n- Array contain items with following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nHere are the available models:\n\n```json\n{availableModels}\n```\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}",resultingParameterName:"modelsRequirements",format:"JSON",dependentParameterNames:["availableModels","personaDescription"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Persona\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-persona.book`\n- INPUT PARAMETER `{availableModels}` List of available model names together with their descriptions as JSON\n- INPUT PARAMETER `{personaDescription}` Description of the persona\n- OUTPUT PARAMETER `{modelsRequirements}` Specific requirements for the model\n\n## Make modelRequirements\n\n- FORMAT JSON\n\n```markdown\nYou are an experienced AI engineer, you need to find the best models for virtual assistants:\n\n## Example\n\n\\`\\`\\`json\n[\n {\n \"modelName\": \"gpt-4o\",\n \"systemMessage\": \"You are experienced AI engineer and helpful assistant.\",\n \"temperature\": 0.7\n },\n {\n \"modelName\": \"claude-3-5-sonnet\",\n \"systemMessage\": \"You are a friendly and knowledgeable chatbot.\",\n \"temperature\": 0.5\n }\n]\n\\`\\`\\`\n\n## Instructions\n\n- Your output format is JSON array\n- Sort best-fitting models first\n- Omit any models that are not suitable\n- Write just the JSON, no other text should be present\n- Array contain items with following keys:\n - `modelName`: The name of the model to use\n - `systemMessage`: The system message to provide context to the model\n - `temperature`: The sampling temperature to use\n\n### Key `modelName`\n\nHere are the available models:\n\n\\`\\`\\`json\n{availableModels}\n\\`\\`\\`\n\n### Key `systemMessage`\n\nThe system message is used to communicate instructions or provide context to the model at the beginning of a conversation. It is displayed in a different format compared to user messages, helping the model understand its role in the conversation. The system message typically guides the model's behavior, sets the tone, or specifies desired output from the model. By utilizing the system message effectively, users can steer the model towards generating more accurate and relevant responses.\n\nFor example:\n\n> You are an experienced AI engineer and helpful assistant.\n\n> You are a friendly and knowledgeable chatbot.\n\n### Key `temperature`\n\nThe sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n\nYou can pick a value between 0 and 2. For example:\n\n- `0.1`: Low temperature, extremely conservative and deterministic\n- `0.5`: Medium temperature, balanced between conservative and creative\n- `1.0`: High temperature, creative and bit random\n- `1.5`: Very high temperature, extremely creative and often chaotic and unpredictable\n- `2.0`: Maximum temperature, completely random and unpredictable, for some extreme creative use cases\n\n# The assistant\n\nTake this description of the persona:\n\n> {personaDescription}\n```\n\n`-> {modelsRequirements}`\n"}],sourceFile:"./books/prepare-persona.book"},{title:"Prepare Title",pipelineUrl:"https://promptbook.studio/promptbook/prepare-title.book",formfactorName:"GENERIC",parameters:[{name:"book",description:"The book to prepare the title for",isInput:true,isOutput:false},{name:"title",description:"Best title for the book",isInput:false,isOutput:true}],tasks:[{taskType:"PROMPT_TASK",name:"make-title",title:"Make title",content:"Make best title for given text which describes the workflow:\n\n## Rules\n\n- Write just title, nothing else\n- Title should be concise and clear - Write maximum ideally 2 words, maximum 5 words\n- Title starts with emoticon\n- Title should not mention the input and output of the workflow but the main purpose of the workflow\n _For example, not \"โœ Convert Knowledge-piece to title\" but \"โœ Title\"_\n\n## The workflow\n\n> {book}",resultingParameterName:"title",expectations:{words:{min:1,max:8},lines:{min:1,max:1}},dependentParameterNames:["book"]}],personas:[],preparations:[],knowledgeSources:[],knowledgePieces:[],sources:[{type:"BOOK",path:null,content:"# Prepare Title\n\n- PIPELINE URL `https://promptbook.studio/promptbook/prepare-title.book`\n- INPUT PARAMETER `{book}` The book to prepare the title for\n- OUTPUT PARAMETER `{title}` Best title for the book\n\n## Make title\n\n- EXPECT MIN 1 Word\n- EXPECT MAX 8 Words\n- EXPECT EXACTLY 1 Line\n\n```markdown\nMake best title for given text which describes the workflow:\n\n## Rules\n\n- Write just title, nothing else\n- Title should be concise and clear - Write maximum ideally 2 words, maximum 5 words\n- Title starts with emoticon\n- Title should not mention the input and output of the workflow but the main purpose of the workflow\n _For example, not \"โœ Convert Knowledge-piece to title\" but \"โœ Title\"_\n\n## The workflow\n\n> {book}\n```\n\n`-> {title}`\n"}],sourceFile:"./books/prepare-title.book"}]; /** * Checks if value is valid email * * @public exported from `@promptbook/utils` */ function isValidEmail(email) { if (typeof email !== 'string') { return false; } if (email.split('\n').length > 1) { return false; } return /^.+@.+\..+$/.test(email); } /** * Tests if given string is valid URL. * * Note: This does not check if the file exists only if the path is valid * @public exported from `@promptbook/utils` */ function isValidFilePath(filename) { if (typeof filename !== 'string') { return false; } if (filename.split('\n').length > 1) { return false; } if (filename.split(' ').length > 5 /* <- TODO: [๐Ÿง ][๐Ÿˆท] Make some better non-arbitrary way how to distinct filenames from informational texts */) { return false; } const filenameSlashes = filename.split('\\').join('/'); // Absolute Unix path: /hello.txt if (/^(\/)/i.test(filenameSlashes)) { // console.log(filename, 'Absolute Unix path: /hello.txt'); return true; } // Absolute Windows path: /hello.txt if (/^([A-Z]{1,2}:\/?)\//i.test(filenameSlashes)) { // console.log(filename, 'Absolute Windows path: /hello.txt'); return true; } // Relative path: ./hello.txt if (/^(\.\.?\/)+/i.test(filenameSlashes)) { // console.log(filename, 'Relative path: ./hello.txt'); return true; } // Allow paths like foo/hello if (/^[^/]+\/[^/]+/i.test(filenameSlashes)) { // console.log(filename, 'Allow paths like foo/hello'); return true; } // Allow paths like hello.book if (/^[^/]+\.[^/]+$/i.test(filenameSlashes)) { // console.log(filename, 'Allow paths like hello.book'); return true; } return false; } /** * TODO: [๐Ÿ] Implement for MacOs */ /** * Tests if given string is valid URL. * * Note: Dataurl are considered perfectly valid. * Note: There are two similar functions: * - `isValidUrl` which tests any URL * - `isValidPipelineUrl` *(this one)* which tests just promptbook URL * * @public exported from `@promptbook/utils` */ function isValidUrl(url) { if (typeof url !== 'string') { return false; } try { if (url.startsWith('blob:')) { url = url.replace(/^blob:/, ''); } const urlObject = new URL(url /* because fail is handled */); if (!['http:', 'https:', 'data:'].includes(urlObject.protocol)) { return false; } return true; } catch (error) { return false; } } /** * Function `validatePipelineString` will validate the if the string is a valid pipeline string * It does not check if the string is fully logically correct, but if it is a string that can be a pipeline string or the string looks completely different. * * @param {string} pipelineString the candidate for a pipeline string * @returns {PipelineString} the same string as input, but validated as valid * @throws {ParseError} if the string is not a valid pipeline string * @public exported from `@promptbook/core` */ function validatePipelineString(pipelineString) { if (isValidJsonString(pipelineString)) { throw new ParseError('Expected a book, but got a JSON string'); } else if (isValidUrl(pipelineString)) { throw new ParseError(`Expected a book, but got just the URL "${pipelineString}"`); } else if (isValidFilePath(pipelineString)) { throw new ParseError(`Expected a book, but got just the file path "${pipelineString}"`); } else if (isValidEmail(pipelineString)) { throw new ParseError(`Expected a book, but got just the email "${pipelineString}"`); } // <- TODO: Implement the validation + add tests when the pipeline logic considered as invalid return pipelineString; } /** * TODO: [๐Ÿง ][๐Ÿˆด] Where is the best location for this file */ /** * Prettify the html code * * @param content raw html code * @returns formatted html code * @private withing the package because of HUGE size of prettier dependency */ function prettifyMarkdown(content) { try { return format(content, { parser: 'markdown', plugins: [parserHtml], // TODO: DRY - make some import or auto-copy of .prettierrc endOfLine: 'lf', tabWidth: 4, singleQuote: true, trailingComma: 'all', arrowParens: 'always', printWidth: 120, htmlWhitespaceSensitivity: 'ignore', jsxBracketSameLine: false, bracketSpacing: true, }); } catch (error) { // TODO: [๐ŸŸฅ] Detect browser / node and make it colorful console.error('There was an error with prettifying the markdown, using the original as the fallback', { error, html: content, }); return content; } } /** * Converts promptbook in JSON format to string format * * @deprecated TODO: [๐Ÿฅ][๐Ÿง ] Backup original files in `PipelineJson` same as in Promptbook.studio * @param pipelineJson Promptbook in JSON format (.bookc) * @returns Promptbook in string format (.book.md) * @public exported from `@promptbook/core` */ function pipelineJsonToString(pipelineJson) { const { title, pipelineUrl, bookVersion, description, parameters, tasks } = pipelineJson; let pipelineString = `# ${title}`; if (description) { pipelineString += '\n\n'; pipelineString += description; } const commands = []; if (pipelineUrl) { commands.push(`PIPELINE URL ${pipelineUrl}`); } if (bookVersion !== `undefined`) { commands.push(`BOOK VERSION ${bookVersion}`); } // TODO: [main] !!5 This increases size of the bundle and is probably not necessary pipelineString = prettifyMarkdown(pipelineString); for (const parameter of parameters.filter(({ isInput }) => isInput)) { commands.push(`INPUT PARAMETER ${taskParameterJsonToString(parameter)}`); } for (const parameter of parameters.filter(({ isOutput }) => isOutput)) { commands.push(`OUTPUT PARAMETER ${taskParameterJsonToString(parameter)}`); } pipelineString += '\n\n'; pipelineString += commands.map((command) => `- ${command}`).join('\n'); for (const task of tasks) { const { /* Note: Not using:> name, */ title, description, /* Note: dependentParameterNames, */ jokerParameterNames: jokers, taskType, content, postprocessingFunctionNames: postprocessing, expectations, format, resultingParameterName, } = task; pipelineString += '\n\n'; pipelineString += `## ${title}`; if (description) { pipelineString += '\n\n'; pipelineString += description; } const commands = []; let contentLanguage = 'text'; if (taskType === 'PROMPT_TASK') { const { modelRequirements } = task; const { modelName, modelVariant } = modelRequirements || {}; // Note: Do nothing, it is default // commands.push(`PROMPT`); if (modelVariant) { commands.push(`MODEL VARIANT ${capitalize(modelVariant)}`); } if (modelName) { commands.push(`MODEL NAME \`${modelName}\``); } } else if (taskType === 'SIMPLE_TASK') { commands.push(`SIMPLE TEMPLATE`); // Note: Nothing special here } else if (taskType === 'SCRIPT_TASK') { commands.push(`SCRIPT`); if (task.contentLanguage) { contentLanguage = task.contentLanguage; } else { contentLanguage = ''; } } else if (taskType === 'DIALOG_TASK') { commands.push(`DIALOG`); // Note: Nothing special here } // <- }else if([๐Ÿ…ฑ] if (jokers) { for (const joker of jokers) { commands.push(`JOKER {${joker}}`); } } /* not else */ if (postprocessing) { for (const postprocessingFunctionName of postprocessing) { commands.push(`POSTPROCESSING \`${postprocessingFunctionName}\``); } } /* not else */ if (expectations) { for (const [unit, { min, max }] of Object.entries(expectations)) { if (min === max) { commands.push(`EXPECT EXACTLY ${min} ${capitalize(unit + (min > 1 ? 's' : ''))}`); } else { if (min !== undefined) { commands.push(`EXPECT MIN ${min} ${capitalize(unit + (min > 1 ? 's' : ''))}`); } /* not else */ if (max !== undefined) { commands.push(`EXPECT MAX ${max} ${capitalize(unit + (max > 1 ? 's' : ''))}`); } } } } /* not else */ if (format) { if (format === 'JSON') { // TODO: @deprecated remove commands.push(`FORMAT JSON`); } } /* not else */ pipelineString += '\n\n'; pipelineString += commands.map((command) => `- ${command}`).join('\n'); pipelineString += '\n\n'; pipelineString += '```' + contentLanguage; pipelineString += '\n'; pipelineString += spaceTrim(content); // <- TODO: [main] !!3 Escape // <- TODO: [๐Ÿง ] Some clear strategy how to spaceTrim the blocks pipelineString += '\n'; pipelineString += '```'; pipelineString += '\n\n'; pipelineString += `\`-> {${resultingParameterName}}\``; // <- TODO: [main] !!3 If the parameter here has description, add it and use taskParameterJsonToString } return validatePipelineString(pipelineString); } /** * @private internal utility of `pipelineJsonToString` */ function taskParameterJsonToString(taskParameterJson) { const { name, description } = taskParameterJson; let parameterString = `{${name}}`; if (description) { parameterString = `${parameterString} ${description}`; } return parameterString; } /** * TODO: [๐Ÿ›‹] Implement new features and commands into `pipelineJsonToString` + `taskParameterJsonToString` , use `stringifyCommand` * TODO: [๐Ÿง ] Is there a way to auto-detect missing features in pipelineJsonToString * TODO: [๐Ÿ›] Maybe make some markdown builder * TODO: [๐Ÿ›] Escape all * TODO: [๐Ÿง ] Should be in generated .book.md file GENERATOR_WARNING */ /** * Orders JSON object by keys * * @returns The same type of object as the input re-ordered * @public exported from `@promptbook/utils` */ function orderJson(options) { const { value, order } = options; const orderedValue = { ...(order === undefined ? {} : Object.fromEntries(order.map((key) => [key, undefined]))), ...value, }; return orderedValue; } /** * Freezes the given object and all its nested objects recursively * * Note: `$` is used to indicate that this function is not a pure function - it mutates given object * Note: This function mutates the object and returns the original (but mutated-deep-freezed) object * * @returns The same object as the input, but deeply frozen * @public exported from `@promptbook/utils` */ function $deepFreeze(objectValue) { if (Array.isArray(objectValue)) { return Object.freeze(objectValue.map((item) => $deepFreeze(item))); } const propertyNames = Object.getOwnPropertyNames(objectValue); for (const propertyName of propertyNames) { const value = objectValue[propertyName]; if (value && typeof value === 'object') { $deepFreeze(value); } } Object.freeze(objectValue); return objectValue; } /** * TODO: [๐Ÿง ] Is there a way how to meaningfully test this utility */ /** * Checks if the value is [๐Ÿš‰] serializable as JSON * If not, throws an UnexpectedError with a rich error message and tracking * * - Almost all primitives are serializable BUT: * - `undefined` is not serializable * - `NaN` is not serializable * - Objects and arrays are serializable if all their properties are serializable * - Functions are not serializable * - Circular references are not serializable * - `Date` objects are not serializable * - `Map` and `Set` objects are not serializable * - `RegExp` objects are not serializable * - `Error` objects are not serializable * - `Symbol` objects are not serializable * - And much more... * * @throws UnexpectedError if the value is not serializable as JSON * @public exported from `@promptbook/utils` */ function checkSerializableAsJson(options) { const { value, name, message } = options; if (value === undefined) { throw new UnexpectedError(`${name} is undefined`); } else if (value === null) { return; } else if (typeof value === 'boolean') { return; } else if (typeof value === 'number' && !isNaN(value)) { return; } else if (typeof value === 'string') { return; } else if (typeof value === 'symbol') { throw new UnexpectedError(`${name} is symbol`); } else if (typeof value === 'function') { throw new UnexpectedError(`${name} is function`); } else if (typeof value === 'object' && Array.isArray(value)) { for (let i = 0; i < value.length; i++) { checkSerializableAsJson({ name: `${name}[${i}]`, value: value[i], message }); } } else if (typeof value === 'object') { if (value instanceof Date) { throw new UnexpectedError(spaceTrim((block) => ` \`${name}\` is Date Use \`string_date_iso8601\` instead Additional message for \`${name}\`: ${block(message || '(nothing)')} `)); } else if (value instanceof Map) { throw new UnexpectedError(`${name} is Map`); } else if (value instanceof Set) { throw new UnexpectedError(`${name} is Set`); } else if (value instanceof RegExp) { throw new UnexpectedError(`${name} is RegExp`); } else if (value instanceof Error) { throw new UnexpectedError(spaceTrim((block) => ` \`${name}\` is unserialized Error Use function \`serializeError\` Additional message for \`${name}\`: ${block(message || '(nothing)')} `)); } else { for (const [subName, subValue] of Object.entries(value)) { if (subValue === undefined) { // Note: undefined in object is serializable - it is just omitted continue; } checkSerializableAsJson({ name: `${name}.${subName}`, value: subValue, message }); } try { JSON.stringify(value); // <- TODO: [0] } catch (error) { assertsError(error); throw new UnexpectedError(spaceTrim((block) => ` \`${name}\` is not serializable ${block(error.stack || error.message)} Additional message for \`${name}\`: ${block(message || '(nothing)')} `)); } /* TODO: [0] Is there some more elegant way to check circular references? const seen = new Set(); const stack = [{ value }]; while (stack.length > 0) { const { value } = stack.pop()!; if (typeof value === 'object' && value !== null) { if (seen.has(value)) { throw new UnexpectedError(`${name} has circular reference`); } seen.add(value); if (Array.isArray(value)) { stack.push(...value.map((value) => ({ value }))); } else { stack.push(...Object.values(value).map((value) => ({ value }))); } } } */ return; } } else { throw new UnexpectedError(spaceTrim((block) => ` \`${name}\` is unknown type Additional message for \`${name}\`: ${block(message || '(nothing)')} `)); } } /** * TODO: Can be return type more type-safe? like `asserts options.value is JsonValue` * TODO: [๐Ÿง ][main] !!3 In-memory cache of same values to prevent multiple checks * Note: [๐Ÿ ] This is how `checkSerializableAsJson` + `isSerializableAsJson` together can just retun true/false or rich error message */ /** * Creates a deep clone of the given object * * Note: This method only works for objects that are fully serializable to JSON and do not contain functions, Dates, or special types. * * @param objectValue The object to clone. * @returns A deep, writable clone of the input object. * @public exported from `@promptbook/utils` */ function deepClone(objectValue) { return JSON.parse(JSON.stringify(objectValue)); /* TODO: [๐Ÿง ] Is there a better implementation? > const propertyNames = Object.getOwnPropertyNames(objectValue); > for (const propertyName of propertyNames) { > const value = (objectValue as really_any)[propertyName]; > if (value && typeof value === 'object') { > deepClone(value); > } > } > return Object.assign({}, objectValue); */ } /** * TODO: [๐Ÿง ] Is there a way how to meaningfully test this utility */ /** * Utility to export a JSON object from a function * * 1) Checks if the value is serializable as JSON * 2) Makes a deep clone of the object * 2) Orders the object properties * 2) Deeply freezes the cloned object * * Note: This function does not mutates the given object * * @returns The same type of object as the input but read-only and re-ordered * @public exported from `@promptbook/utils` */ function exportJson(options) { const { name, value, order, message } = options; checkSerializableAsJson({ name,