UNPKG

ai-functions

Version:

Core AI primitives for building intelligent applications

132 lines (131 loc) 4.68 kB
/** * Structured Output Eval * * Tests model ability to generate valid structured JSON output * matching specified schemas across all providers. */ import { evalite } from 'evalite'; import { generateObject } from '../src/generate.js'; import { schema } from '../src/schema.js'; import { createModelVariants, getModelPricing } from '../src/eval/models.js'; // Test cases for structured output const TEST_CASES = [ { name: 'Simple object', prompt: 'Generate a greeting in French', schema: { greeting: 'A friendly greeting', language: 'The language of the greeting', }, expectedTypes: { greeting: 'string', language: 'string' }, }, { name: 'With numbers', prompt: 'Generate info about Tokyo', schema: { name: 'City name', population: 'Population in millions (number)', area: 'Area in square kilometers (number)', }, expectedTypes: { name: 'string', population: 'number', area: 'number' }, }, { name: 'With arrays', prompt: 'Generate a simple pasta recipe', schema: { title: 'Recipe title', ingredients: ['List of ingredients'], steps: ['Cooking steps'], }, expectedTypes: { title: 'string', ingredients: 'array', steps: 'array' }, }, { name: 'With enum', prompt: 'Analyze sentiment: "I love this product!"', schema: { sentiment: 'positive | negative | neutral', confidence: 'Confidence score 0-1 (number)', }, expectedTypes: { sentiment: 'string', confidence: 'number' }, }, { name: 'Nested object', prompt: 'Generate a fictional person living in Japan', schema: { person: { name: 'Full name', age: 'Age (number)' }, address: { city: 'City name', country: 'Country name' }, }, expectedTypes: { person: 'object', address: 'object' }, }, ]; // Test across models - start with fast tier for quick iteration const modelVariants = createModelVariants({ tiers: ['fast'] }); evalite.each(modelVariants)('Structured Output', { data: TEST_CASES.map(tc => ({ input: tc })), task: async (input, variant) => { const model = variant; const startTime = Date.now(); const { object, usage } = await generateObject({ model: model.id, schema: schema(input.schema), prompt: input.prompt, }); const latencyMs = Date.now() - startTime; // Calculate cost from language-models pricing const pricing = getModelPricing(model.id); const cost = pricing ? ((usage?.promptTokens ?? 0) * pricing.prompt + (usage?.completionTokens ?? 0) * pricing.completion) / 1_000_000 : 0; return { object, expectedTypes: input.expectedTypes, testName: input.name, modelId: model.id, modelName: model.name, provider: model.provider, latencyMs, cost, usage, }; }, scorers: [ // Type accuracy { name: 'Type Accuracy', description: 'Whether output fields have correct types', scorer: ({ output }) => { const obj = output.object; const expected = output.expectedTypes; const fields = Object.keys(expected); let correct = 0; for (const field of fields) { const val = obj[field]; const expectedType = expected[field]; const actualType = Array.isArray(val) ? 'array' : typeof val; if (actualType === expectedType) correct++; } return { score: correct / fields.length }; }, }, // Latency { name: 'Latency', description: 'Response time (target < 3s)', scorer: ({ output }) => { const ms = output.latencyMs; if (ms < 2000) return { score: 1 }; if (ms > 10000) return { score: 0 }; return { score: 1 - (ms - 2000) / 8000 }; }, }, ], columns: ({ output, scores }) => [ { label: 'Model', value: output.modelName }, { label: 'Test', value: output.testName }, { label: 'Latency', value: `${output.latencyMs}ms` }, { label: 'Cost', value: `$${output.cost.toFixed(6)}` }, ], });