UNPKG

dotprompt

Version:

Dotprompt: Executable GenAI Prompt Templates

445 lines (405 loc) 13.2 kB
/** * Copyright 2024 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * SPDX-License-Identifier: Apache-2.0 */ import { parse } from 'yaml'; import type { DataArgument, MediaPart, Message, ParsedPrompt, Part, PendingPart, PromptMetadata, Role, TextPart, } from './types'; /** * A message source is a message with a source string and optional content and * metadata. */ export type MessageSource = { role: Role; source?: string; content?: Message['content']; metadata?: Record<string, unknown>; }; /** * Prefixes for the role markers in the template. */ export const ROLE_MARKER_PREFIX = '<<<dotprompt:role:'; /** * Prefixes for the history markers in the template. */ export const HISTORY_MARKER_PREFIX = '<<<dotprompt:history'; /** * Prefixes for the media markers in the template. */ export const MEDIA_MARKER_PREFIX = '<<<dotprompt:media:'; /** * Prefixes for the section markers in the template. */ export const SECTION_MARKER_PREFIX = '<<<dotprompt:section'; /** * Regular expression to match YAML frontmatter delineated by `---` markers at * the start of a .prompt content block. */ export const FRONTMATTER_AND_BODY_REGEX = /^---\s*\n([\s\S]*?)\n---\s*\n([\s\S]*)$/; /** * Regular expression to match <<<dotprompt:role:xxx>>> and * <<<dotprompt:history>>> markers in the template. * * Examples of matching patterns: * - <<<dotprompt:role:user>>> * - <<<dotprompt:role:assistant>>> * - <<<dotprompt:role:system>>> * - <<<dotprompt:history>>> * * Note: Only lowercase letters are allowed after 'role:'. */ export const ROLE_AND_HISTORY_MARKER_REGEX = /(<<<dotprompt:(?:role:[a-z]+|history))>>>/g; /** * Regular expression to match <<<dotprompt:media:url>>> and * <<<dotprompt:section>>> markers in the template. * * Examples of matching patterns: * - <<<dotprompt:media:url>>> * - <<<dotprompt:section>>> */ export const MEDIA_AND_SECTION_MARKER_REGEX = /(<<<dotprompt:(?:media:url|section).*?)>>>/g; /** * List of reserved keywords that are handled specially in the metadata. * These keys are processed differently from extension metadata. */ export const RESERVED_METADATA_KEYWORDS: (keyof PromptMetadata)[] = [ // NOTE: KEEP SORTED 'config', 'description', 'ext', 'input', 'model', 'name', 'output', 'raw', 'toolDefs', 'tools', 'variant', 'version', ]; /** * Default metadata structure with empty extension and configuration objects. */ const BASE_METADATA: PromptMetadata<any> = { ext: {}, metadata: {}, config: {}, }; /** * Splits a string by a regular expression while filtering out * empty/whitespace-only pieces. * * @param source The source string to split into parts. * @param regex The regular expression to use for splitting. * @return An array of non-empty string pieces. */ export function splitByRegex(source: string, regex: RegExp): string[] { return source.split(regex).filter((s) => s.trim() !== ''); } /** * Splits a rendered template string into pieces based on role and history * markers while filtering out empty/whitespace-only pieces. * * @param renderedString The template string to split. * @return Array of non-empty string pieces. */ export function splitByRoleAndHistoryMarkers(renderedString: string): string[] { return splitByRegex(renderedString, ROLE_AND_HISTORY_MARKER_REGEX); } /** * Split the source into pieces based on media and section markers while * filtering out empty/whitespace-only pieces. * * @param source The source string to split into parts * @return An array of string parts */ export function splitByMediaAndSectionMarkers(source: string): string[] { return splitByRegex(source, MEDIA_AND_SECTION_MARKER_REGEX); } /** * Processes a namespaced key-value pair into a nested object structure. * For example, 'foo.bar': 'value' becomes { foo: { bar: 'value' } } * * @param key The dotted namespace key (e.g., 'foo.bar') * @param value The value to assign * @param obj The object to add the namespaced value to * @returns The updated target object */ export function convertNamespacedEntryToNestedObject( key: string, value: unknown, obj: Record<string, Record<string, unknown>> = {} ): Record<string, Record<string, unknown>> { // NOTE: Goes only a single level deep. const result = obj || {}; const lastDotIndex = key.lastIndexOf('.'); const ns = key.substring(0, lastDotIndex); const field = key.substring(lastDotIndex + 1); // Ensure the namespace exists. result[ns] = result[ns] || {}; result[ns][field] = value; return result; } /** * Extracts the YAML frontmatter and body from a document. * * @param source The source document containing frontmatter and template * @returns An object containing the frontmatter and body If the pattern does * not match, both the values returned will be empty. */ export function extractFrontmatterAndBody(source: string) { const match = source.match(FRONTMATTER_AND_BODY_REGEX); if (match) { const [, frontmatter, body] = match; return { frontmatter, body }; } return { frontmatter: '', body: '' }; } /** * Parses a document containing YAML frontmatter and a template content section. * The frontmatter contains metadata and configuration for the prompt. * * @template ModelConfig Type for model-specific configuration * @param source The source document containing frontmatter and template * @return Parsed prompt with metadata and template content */ export function parseDocument<ModelConfig = Record<string, any>>( source: string ): ParsedPrompt<ModelConfig> { const { frontmatter, body } = extractFrontmatterAndBody(source); if (frontmatter) { try { const parsedMetadata = parse(frontmatter) as PromptMetadata<ModelConfig>; const raw = { ...parsedMetadata }; const pruned: PromptMetadata<ModelConfig> = { ...BASE_METADATA }; const ext: PromptMetadata['ext'] = {}; for (const k in raw) { const key = k as keyof PromptMetadata; if (RESERVED_METADATA_KEYWORDS.includes(key)) { pruned[key] = raw[key] as any; } else if (key.includes('.')) { convertNamespacedEntryToNestedObject(key, raw[key], ext); } } return { ...pruned, raw, ext, template: body.trim() }; } catch (error) { console.error('Dotprompt: Error parsing YAML frontmatter:', error); return { ...BASE_METADATA, template: source.trim() }; } } // No frontmatter, return a basic ParsedPrompt with just the template return { ...BASE_METADATA, template: source }; } /** * Processes an array of message sources into an array of messages. * * @param messageSources Array of message sources * @returns Array of structured messages */ export function messageSourcesToMessages( messageSources: MessageSource[] ): Message[] { return messageSources .filter((ms) => ms.content || ms.source) .map((m) => { const out: Message = { role: m.role as Role, content: m.content || toParts(m.source || ''), }; if (m.metadata) { out.metadata = m.metadata; } return out; }); } /** * Transforms an array of messages by adding history metadata to each message. * * @param messages Array of messages to transform * @returns Array of messages with history metadata added */ export function transformMessagesToHistory( messages: Array<Message> ): Array<Message> { return messages.map((m) => ({ ...m, metadata: { ...m.metadata, purpose: 'history' }, })); } /** * Converts a rendered template string into an array of messages. Processes * role markers and history placeholders to structure the conversation. * * @template ModelConfig Type for model-specific configuration * @param renderedString The rendered template string to convert * @param data Optional data containing message history * @return Array of structured messages */ export function toMessages<ModelConfig = Record<string, unknown>>( renderedString: string, data?: DataArgument ): Message[] { let currentMessage: MessageSource = { role: 'user', source: '' }; const messageSources: MessageSource[] = [currentMessage]; for (const piece of splitByRoleAndHistoryMarkers(renderedString)) { if (piece.startsWith(ROLE_MARKER_PREFIX)) { const role = piece.substring(ROLE_MARKER_PREFIX.length) as Role; if (currentMessage.source?.trim()) { // If the current message has a source, reset it. currentMessage = { role, source: '' }; messageSources.push(currentMessage); } else { // Otherwise, update the role of the current message. currentMessage.role = role; } } else if (piece.startsWith(HISTORY_MARKER_PREFIX)) { // Add the history messages to the message sources. const historyMessages = transformMessagesToHistory(data?.messages ?? []); if (historyMessages) { messageSources.push(...historyMessages); } // Add a new message source for the model. currentMessage = { role: 'model', source: '' }; messageSources.push(currentMessage); } else { // Otherwise, add the piece to the current message source. currentMessage.source += piece; } } const messages: Message[] = messageSourcesToMessages(messageSources); return insertHistory(messages, data?.messages); } /** * Checks if the messages have history metadata. * * @param messages The messages to check * @return True if the messages have history metadata, false otherwise */ export function messagesHaveHistory(messages: Message[]): boolean { return messages.some((m) => m.metadata?.purpose === 'history'); } /** * Inserts historical messages into the conversation at appropriate positions. * * The history is inserted at: * - Before the last user message if there is a user message. * - The end of the conversation if there is no history or no user message. * * The history is not inserted: * - If it already exists in the messages. * - If there is no user message. * * @param messages Current array of messages * @param history Historical messages to insert * @return Messages with history inserted */ export function insertHistory( messages: Message[], history: Message[] = [] ): Message[] { // If we have no history or find an existing instance of history, return the // original messages unmodified. if (!history || messagesHaveHistory(messages)) { return messages; } // If there are no messages, return the history. if (messages.length === 0) { return history; } const lastMessage = messages.at(-1); if (lastMessage?.role === 'user') { // If the last message is a user message, insert the history before it. const messagesWithoutLast = messages.slice(0, -1); return [...messagesWithoutLast, ...history, lastMessage]; } // Otherwise, append the history to the end of the messages. return [...messages, ...history]; } /** * Converts a source string into an array of parts, processing media and section * markers. * * @param source The source string to convert into parts * @return Array of structured parts (text, media, or metadata) */ export function toParts(source: string): Part[] { return splitByMediaAndSectionMarkers(source).map(parsePart); } /** * Parses a part from a string. * * @param piece The piece to parse * @return Parsed part */ export function parsePart(piece: string): Part { if (piece.startsWith(MEDIA_MARKER_PREFIX)) { return parseMediaPart(piece); } else if (piece.startsWith(SECTION_MARKER_PREFIX)) { return parseSectionPart(piece); } return parseTextPart(piece); } /** * Parses a media part from a string. * * @param piece The piece to parse * @return Parsed media part */ export function parseMediaPart(piece: string): MediaPart { if (!piece.startsWith(MEDIA_MARKER_PREFIX)) { throw new Error('Invalid media piece'); } const [_, url, contentType] = piece.split(' '); const part: MediaPart = { media: { url } }; if (contentType) { part.media.contentType = contentType; } return part; } /** * Parses a section part from a string. * * @param piece The piece to parse * @return Parsed section part */ export function parseSectionPart(piece: string): PendingPart { if (!piece.startsWith(SECTION_MARKER_PREFIX)) { throw new Error('Invalid section piece'); } const [_, sectionType] = piece.split(' '); return { metadata: { purpose: sectionType, pending: true } }; } /** * Parses a text part from a string. * * @param piece The piece to parse * @return Parsed text part */ export function parseTextPart(piece: string): TextPart { return { text: piece }; }