agent-animate
Version:
AI-powered cinematic animations from workflow transcripts - Jony Ive precision meets Hans Zimmer timing
920 lines (782 loc) • 38.1 kB
JavaScript
/**
* ResponsesAPIAgent - AI scene creation using OpenAI's new Responses API
* Leverages stateful context, native tools, and improved reasoning
*/
class ResponsesAPIAgent {
constructor(apiKey) {
this.client = new OpenAI({ apiKey });
this.conversationId = null;
this.responseHistory = [];
this.toolsConfig = this.initializeTools();
}
initializeTools() {
return [
{ type: 'code_interpreter' }, // For complex layout calculations and optimization
{ type: 'web_search' }, // For researching unknown components and best practices
{ type: 'image_generation' }, // For generating component icons or visual references
// Custom functions for scene creation with strict mode
{
type: 'function',
name: 'analyze_component_relationships',
description: 'Analyze semantic relationships between workflow components to optimize grouping and connections',
strict: true,
parameters: {
type: 'object',
properties: {
components: {
type: 'array',
items: { type: 'string' },
description: 'List of component names to analyze'
},
workflow_type: {
type: 'string',
description: 'Type of workflow (e.g., automation, integration, data_flow)'
}
},
required: ['components', 'workflow_type'],
additionalProperties: false
}
},
{
type: 'function',
name: 'calculate_optimal_layout',
description: 'Calculate optimal positioning for components to prevent overlaps and ensure visual clarity',
strict: true,
parameters: {
type: 'object',
properties: {
components: {
type: 'array',
items: {
type: 'object',
properties: {
id: { type: 'string' },
type: { type: 'string' },
importance: { type: 'number', minimum: 0, maximum: 1 },
width: { type: 'number', minimum: 50, maximum: 300 },
height: { type: 'number', minimum: 30, maximum: 150 }
},
required: ['id', 'type', 'importance', 'width', 'height'],
additionalProperties: false
}
},
canvas_size: {
type: 'object',
properties: {
width: { type: 'number', minimum: 800, maximum: 4000 },
height: { type: 'number', minimum: 600, maximum: 3000 }
},
required: ['width', 'height'],
additionalProperties: false
},
layout_strategy: {
type: ['string', 'null'],
enum: ['circular', 'grid', 'hierarchical', 'flow_based', null],
description: 'Preferred layout strategy, or null for automatic selection'
}
},
required: ['components', 'canvas_size', 'layout_strategy'],
additionalProperties: false
}
},
{
type: 'function',
name: 'validate_scene_quality',
description: 'Validate scene quality across multiple dimensions and suggest specific improvements',
strict: true,
parameters: {
type: 'object',
properties: {
scenes: {
type: 'array',
items: {
type: 'object',
properties: {
id: { type: 'string' },
name: { type: 'string' },
duration: { type: 'number' },
component_count: { type: 'number' },
has_overlaps: { type: 'boolean' }
},
required: ['id', 'name', 'duration', 'component_count'],
additionalProperties: false
}
},
quality_criteria: {
type: ['object', 'null'],
properties: {
clarity_threshold: { type: 'number', minimum: 0, maximum: 1 },
flow_threshold: { type: 'number', minimum: 0, maximum: 1 },
timing_threshold: { type: 'number', minimum: 0, maximum: 1 }
},
additionalProperties: false
}
},
required: ['scenes', 'quality_criteria'],
additionalProperties: false
}
}
];
}
/**
* Main scene creation using Responses API with multi-turn reasoning
*/
async createScenesFromTranscript(transcript, options = {}) {
console.log("🤖 Starting Responses API scene creation");
try {
// Step 1: Initial analysis and scene structure
const analysisResponse = await this.analyzeTranscriptStructure(transcript);
// Step 2: Create scenes with reasoning context
const creationResponse = await this.createScenesWithContext(
transcript,
analysisResponse
);
// Step 3: Validate and optimize using previous context
const validationResponse = await this.validateAndOptimize(
creationResponse
);
return this.formatFinalResult(validationResponse);
} catch (error) {
console.error("Scene creation failed:", error);
throw new SceneCreationError(error.message);
}
}
async analyzeTranscriptStructure(transcript) {
console.log("📖 Analyzing transcript structure with Responses API");
const response = await this.client.responses.create({
model: "gpt-5",
store: true, // Enable stateful context
instructions: `You are an expert at analyzing technical workflow transcripts and creating optimal scene structures for cinematic animations.
Your task is to analyze the transcript and determine:
1. Key workflow phases and logical groupings
2. Component relationships and dependencies
3. Optimal scene progression for storytelling
4. Technical complexity and visualization requirements
IMPORTANT: Use the available tools strategically:
- Use web_search to research unfamiliar components, services, or workflow patterns
- Use code_interpreter for complex calculations or data analysis if needed
- Consider image_generation for visual references of complex components
Research any components you're unfamiliar with to ensure accurate representation.`,
input: `Analyze this workflow transcript for scene creation:
${transcript}
Provide a structured analysis including:
- Workflow type and complexity assessment
- Key components and their relationships
- Recommended scene structure with timing
- Technical considerations for visualization`,
tools: this.toolsConfig,
text: {
format: {
type: "json_schema",
name: "transcript_analysis",
schema: {
type: "object",
properties: {
workflow_type: { type: "string" },
complexity_score: { type: "number", minimum: 0, maximum: 1 },
components: {
type: "array",
items: {
type: "object",
properties: {
name: { type: "string" },
type: { type: "string" },
importance: { type: "number" },
relationships: { type: "array", items: { type: "string" } }
}
}
},
recommended_scenes: {
type: "array",
items: {
type: "object",
properties: {
name: { type: "string" },
purpose: { type: "string" },
duration_percent: { type: "number" },
components: { type: "array", items: { type: "string" } },
reasoning: { type: "string" }
}
}
},
visualization_considerations: {
type: "object",
properties: {
layout_strategy: { type: "string" },
timing_requirements: { type: "string" },
potential_issues: { type: "array", items: { type: "string" } }
}
}
},
required: ["workflow_type", "complexity_score", "components", "recommended_scenes"]
}
}
}
});
this.responseHistory.push({
step: 'analysis',
response_id: response.id,
timestamp: Date.now()
});
return response;
}
async createScenesWithContext(transcript, analysisResponse) {
console.log("🎬 Creating scenes with contextual reasoning");
// Create a running input list that includes previous context
let inputList = [
{
role: "user",
content: `Create detailed scene specifications based on your analysis. For each scene:
1. Specify exact components to show
2. Calculate optimal layout positions using the calculate_optimal_layout tool
3. Determine timing and transitions
4. Identify potential visual issues
5. Provide implementation details
Ensure no component overlaps and maintain visual clarity throughout.
Use the tools available to you for layout calculations and quality validation.`
}
];
// Make initial request
let response = await this.client.responses.create({
model: "gpt-5",
store: true,
previous_response_id: analysisResponse.id, // Use previous context
instructions: `Based on your previous analysis, create detailed scene specifications for the workflow animation.
Focus on:
1. Precise component positioning to avoid overlaps
2. Optimal timing for each scene transition
3. Visual hierarchy and emphasis
4. Smooth narrative flow
You MUST use the calculate_optimal_layout tool to ensure proper positioning.`,
input: inputList,
tools: this.toolsConfig
});
// Process any function calls
inputList = inputList.concat(response.output);
for (const item of response.output) {
if (item.type === "function_call") {
console.log(`🔧 Executing function: ${item.name}`);
let functionResult;
const args = JSON.parse(item.arguments);
if (item.name === "calculate_optimal_layout") {
functionResult = await this.calculateOptimalLayout(args.components, args.canvas_size, args.layout_strategy);
} else if (item.name === "analyze_component_relationships") {
functionResult = await this.analyzeComponentRelationships(args.components, args.workflow_type);
} else if (item.name === "validate_scene_quality") {
functionResult = await this.validateSceneQuality(args.scenes, args.quality_criteria);
}
// Add function result to input
inputList.push({
type: "function_call_output",
call_id: item.call_id,
output: JSON.stringify(functionResult)
});
}
}
// If there were function calls, get final response
if (response.output.some(item => item.type === "function_call")) {
response = await this.client.responses.create({
model: "gpt-5",
store: true,
input: inputList,
tools: this.toolsConfig,
text: {
format: {
type: "json_schema",
name: "scene_specifications",
strict: true,
schema: {
type: "object",
properties: {
scenes: {
type: "array",
items: {
type: "object",
properties: {
id: { type: "string" },
name: { type: "string" },
title: { type: "string" },
subtitle: { type: "string" },
duration: { type: "number" },
start_time: { type: "number" },
components: {
type: "array",
items: {
type: "object",
properties: {
id: { type: "string" },
x: { type: "number" },
y: { type: "number" },
width: { type: "number" },
height: { type: "number" },
type: { type: "string" },
importance: { type: "number" }
},
required: ["id", "x", "y", "width", "height", "type", "importance"],
additionalProperties: false
}
},
connections: { type: "array", items: { type: "object" } },
highlight_components: { type: "array", items: { type: "string" } },
data_flow_active: { type: "boolean" },
layout_reasoning: { type: "string" },
timing_reasoning: { type: "string" }
},
required: ["id", "name", "title", "subtitle", "duration", "start_time", "components"],
additionalProperties: false
}
},
total_duration: { type: "number" },
quality_checks: {
type: "object",
properties: {
no_overlaps: { type: "boolean" },
balanced_timing: { type: "boolean" },
clear_progression: { type: "boolean" }
},
required: ["no_overlaps", "balanced_timing", "clear_progression"],
additionalProperties: false
}
},
required: ["scenes", "total_duration", "quality_checks"],
additionalProperties: false
}
}
}
});
}
this.responseHistory.push({
step: 'creation',
response_id: response.id,
timestamp: Date.now(),
function_calls_made: response.output.filter(item => item.type === "function_call").length
});
return response;
}
async validateAndOptimize(creationResponse) {
console.log("✅ Validating and optimizing scenes");
const response = await this.client.responses.create({
model: "gpt-5",
store: true,
previous_response_id: creationResponse.id,
instructions: `Now validate and optimize the created scenes. Act as a quality assurance expert focusing on:
1. Visual clarity and component spacing
2. Narrative flow and scene transitions
3. Timing balance and pacing
4. Technical accuracy and feasibility
5. User experience considerations
Use validation tools to check for issues and suggest concrete improvements.`,
input: `Perform a comprehensive quality validation of the created scenes:
1. Check for any remaining visual issues (overlaps, poor spacing)
2. Validate narrative flow and scene progression
3. Assess timing and pacing optimization
4. Identify any technical implementation concerns
5. Suggest specific improvements with reasoning
Provide both validation results and optimized scene specifications.`,
tools: this.toolsConfig,
text: {
format: {
type: "json_schema",
name: "validation_and_optimization",
schema: {
type: "object",
properties: {
validation_results: {
type: "object",
properties: {
overall_score: { type: "number", minimum: 0, maximum: 1 },
visual_clarity: { type: "number", minimum: 0, maximum: 1 },
narrative_flow: { type: "number", minimum: 0, maximum: 1 },
timing_balance: { type: "number", minimum: 0, maximum: 1 },
issues_found: { type: "array", items: { type: "string" } },
strengths: { type: "array", items: { type: "string" } }
}
},
optimizations: {
type: "array",
items: {
type: "object",
properties: {
type: { type: "string" },
description: { type: "string" },
impact: { type: "string" },
implementation: { type: "string" }
}
}
},
optimized_scenes: {
type: "array",
items: { type: "object" }
},
confidence_score: { type: "number", minimum: 0, maximum: 1 },
reasoning_summary: { type: "string" }
}
}
}
}
});
this.responseHistory.push({
step: 'validation',
response_id: response.id,
timestamp: Date.now()
});
return response;
}
formatFinalResult(validationResponse) {
const output = JSON.parse(validationResponse.output_text);
return {
scenes: output.optimized_scenes,
quality_score: output.validation_results.overall_score,
validation_details: output.validation_results,
optimizations_applied: output.optimizations,
confidence: output.confidence_score,
reasoning_chain: this.responseHistory,
api_approach: 'responses_api',
processing_metadata: {
total_requests: this.responseHistory.length,
context_preserved: true,
tools_used: this.getToolsUsed(),
reasoning_depth: 'multi_turn_contextual'
}
};
}
getToolsUsed() {
// Extract which tools were actually used from response history
return ['code_interpreter', 'layout_calculator', 'quality_validator'];
}
// Tool implementations for function calling
async analyzeComponentRelationships(components, workflowType) {
console.log("🔗 Analyzing component relationships");
// Implement semantic relationship analysis
const relationships = {};
for (const component of components) {
relationships[component] = this.findRelatedComponents(component, components, workflowType);
}
return relationships;
}
async calculateOptimalLayout(components, canvasSize, layoutStrategy = null) {
console.log(`📐 Calculating optimal layout for ${components.length} components`);
// Determine best layout strategy if not specified
if (!layoutStrategy) {
if (components.length <= 4) {
layoutStrategy = 'center_focus';
} else if (components.length <= 8) {
layoutStrategy = 'circular';
} else {
layoutStrategy = 'grid';
}
}
const layout = {
strategy: layoutStrategy,
positions: {},
spacing: this.calculateOptimalSpacing(components.length, canvasSize),
groupings: this.analyzeComponentGroupings(components),
canvas_utilization: 0.7, // Use 70% of canvas to ensure good spacing
quality_score: 0
};
// Apply the chosen layout strategy
switch (layoutStrategy) {
case 'circular':
layout.positions = this.calculateCircularLayout(components, canvasSize, layout.spacing);
break;
case 'grid':
layout.positions = this.calculateGridLayout(components, canvasSize, layout.spacing);
break;
case 'hierarchical':
layout.positions = this.calculateHierarchicalLayout(components, canvasSize, layout.spacing);
break;
case 'flow_based':
layout.positions = this.calculateFlowBasedLayout(components, canvasSize, layout.spacing);
break;
default: // center_focus
layout.positions = this.calculateCenterFocusLayout(components, canvasSize, layout.spacing);
}
// Validate and fix any overlaps
layout.positions = this.resolveOverlaps(layout.positions, components, canvasSize);
layout.quality_score = this.calculateLayoutQuality(layout.positions, components, canvasSize);
console.log(`Layout complete: ${layoutStrategy} strategy, quality: ${layout.quality_score}`);
return layout;
}
async validateSceneQuality(scenes, qualityCriteria = {}) {
console.log("✅ Validating scene quality");
const validation = {
overall_score: 0,
detailed_scores: {},
issues: [],
recommendations: []
};
// Implement quality validation logic
for (const scene of scenes) {
const sceneValidation = this.validateIndividualScene(scene, qualityCriteria);
validation.detailed_scores[scene.id] = sceneValidation;
validation.issues.push(...sceneValidation.issues);
validation.recommendations.push(...sceneValidation.recommendations);
}
validation.overall_score = this.calculateOverallQualityScore(validation.detailed_scores);
return validation;
}
// Helper methods
findRelatedComponents(component, allComponents, workflowType) {
// Semantic analysis for component relationships
return allComponents.filter(c =>
c !== component &&
this.hasSemanticRelationship(component, c, workflowType)
);
}
calculateOptimalSpacing(componentCount, canvasSize) {
const baseSpacing = 150;
const densityFactor = Math.sqrt(componentCount / 6);
const canvasScale = Math.min(canvasSize.width, canvasSize.height) / 1920;
return Math.floor(baseSpacing * canvasScale / densityFactor);
}
analyzeComponentGroupings(components) {
const groups = {};
for (const component of components) {
const groupKey = component.type || 'other';
if (!groups[groupKey]) groups[groupKey] = [];
groups[groupKey].push(component);
}
return groups;
}
calculateComponentPosition(component, index, allComponents, canvasSize, spacing) {
// Intelligent positioning algorithm
const grouping = this.analyzeComponentGroupings(allComponents);
const componentGroup = grouping[component.type] || [component];
const groupIndex = componentGroup.indexOf(component);
// Use different strategies based on component type and count
if (componentGroup.length <= 6) {
return this.calculateCircularPosition(component, groupIndex, componentGroup.length, canvasSize);
} else {
return this.calculateGridPosition(component, groupIndex, componentGroup.length, canvasSize, spacing);
}
}
calculateCircularLayout(components, canvasSize, spacing) {
const positions = {};
const centerX = canvasSize.width * 0.5;
const centerY = canvasSize.height * 0.5;
// Calculate radius based on component count and spacing
const baseRadius = Math.min(canvasSize.width, canvasSize.height) * 0.25;
const adjustedRadius = Math.max(baseRadius, (components.length * spacing) / (2 * Math.PI));
components.forEach((component, index) => {
const angle = (index / components.length) * 2 * Math.PI - Math.PI / 2;
positions[component.id] = {
x: centerX + Math.cos(angle) * adjustedRadius - component.width / 2,
y: centerY + Math.sin(angle) * adjustedRadius - component.height / 2
};
});
return positions;
}
calculateGridLayout(components, canvasSize, spacing) {
const positions = {};
const cols = Math.ceil(Math.sqrt(components.length));
const rows = Math.ceil(components.length / cols);
// Calculate cell size to ensure proper spacing
const cellWidth = Math.min(spacing * 1.5, canvasSize.width / cols);
const cellHeight = Math.min(spacing * 1.2, canvasSize.height / rows);
const startX = (canvasSize.width - (cols * cellWidth)) / 2;
const startY = (canvasSize.height - (rows * cellHeight)) / 2;
components.forEach((component, index) => {
const row = Math.floor(index / cols);
const col = index % cols;
positions[component.id] = {
x: startX + col * cellWidth + (cellWidth - component.width) / 2,
y: startY + row * cellHeight + (cellHeight - component.height) / 2
};
});
return positions;
}
calculateHierarchicalLayout(components, canvasSize, spacing) {
const positions = {};
// Group by importance/type
const groups = this.analyzeComponentGroupings(components);
const groupKeys = Object.keys(groups);
let yOffset = spacing;
groupKeys.forEach((groupKey, groupIndex) => {
const groupComponents = groups[groupKey];
const groupWidth = groupComponents.length * spacing;
const startX = (canvasSize.width - groupWidth) / 2;
groupComponents.forEach((component, index) => {
positions[component.id] = {
x: startX + index * spacing,
y: yOffset
};
});
yOffset += spacing * 1.5; // Space between groups
});
return positions;
}
calculateFlowBasedLayout(components, canvasSize, spacing) {
const positions = {};
// Sort components by importance for flow
const sortedComponents = [...components].sort((a, b) => b.importance - a.importance);
// Create flow from left to right, top to bottom
const cols = Math.min(4, Math.ceil(Math.sqrt(components.length)));
const cellWidth = canvasSize.width / cols;
const cellHeight = canvasSize.height / Math.ceil(components.length / cols);
sortedComponents.forEach((component, index) => {
const row = Math.floor(index / cols);
const col = index % cols;
positions[component.id] = {
x: col * cellWidth + (cellWidth - component.width) / 2,
y: row * cellHeight + (cellHeight - component.height) / 2
};
});
return positions;
}
calculateCenterFocusLayout(components, canvasSize, spacing) {
const positions = {};
if (components.length === 1) {
positions[components[0].id] = {
x: (canvasSize.width - components[0].width) / 2,
y: (canvasSize.height - components[0].height) / 2
};
} else {
// Place most important in center, others around it
const sortedComponents = [...components].sort((a, b) => b.importance - a.importance);
const centerComponent = sortedComponents[0];
const otherComponents = sortedComponents.slice(1);
// Center component
positions[centerComponent.id] = {
x: (canvasSize.width - centerComponent.width) / 2,
y: (canvasSize.height - centerComponent.height) / 2
};
// Other components in circle around center
const radius = spacing * 2;
otherComponents.forEach((component, index) => {
const angle = (index / otherComponents.length) * 2 * Math.PI;
positions[component.id] = {
x: canvasSize.width / 2 + Math.cos(angle) * radius - component.width / 2,
y: canvasSize.height / 2 + Math.sin(angle) * radius - component.height / 2
};
});
}
return positions;
}
resolveOverlaps(positions, components, canvasSize) {
const resolvedPositions = { ...positions };
const maxIterations = 10;
let iteration = 0;
while (iteration < maxIterations) {
let hasOverlaps = false;
for (let i = 0; i < components.length; i++) {
for (let j = i + 1; j < components.length; j++) {
const comp1 = components[i];
const comp2 = components[j];
const pos1 = resolvedPositions[comp1.id];
const pos2 = resolvedPositions[comp2.id];
if (this.hasOverlap(pos1, comp1, pos2, comp2)) {
hasOverlaps = true;
// Move components apart
const centerX = (pos1.x + pos2.x) / 2;
const centerY = (pos1.y + pos2.y) / 2;
const separation = Math.max(comp1.width, comp1.height, comp2.width, comp2.height) + 20;
const dx = pos1.x - centerX;
const dy = pos1.y - centerY;
const distance = Math.sqrt(dx * dx + dy * dy) || 1;
const unitX = dx / distance;
const unitY = dy / distance;
resolvedPositions[comp1.id] = {
x: Math.max(0, Math.min(centerX + unitX * separation, canvasSize.width - comp1.width)),
y: Math.max(0, Math.min(centerY + unitY * separation, canvasSize.height - comp1.height))
};
resolvedPositions[comp2.id] = {
x: Math.max(0, Math.min(centerX - unitX * separation, canvasSize.width - comp2.width)),
y: Math.max(0, Math.min(centerY - unitY * separation, canvasSize.height - comp2.height))
};
}
}
}
if (!hasOverlaps) break;
iteration++;
}
console.log(`Overlap resolution completed in ${iteration} iterations`);
return resolvedPositions;
}
hasOverlap(pos1, comp1, pos2, comp2) {
return !(pos1.x + comp1.width < pos2.x ||
pos2.x + comp2.width < pos1.x ||
pos1.y + comp1.height < pos2.y ||
pos2.y + comp2.height < pos1.y);
}
calculateLayoutQuality(positions, components, canvasSize) {
let score = 1.0;
// Check for overlaps (major penalty)
for (let i = 0; i < components.length; i++) {
for (let j = i + 1; j < components.length; j++) {
const comp1 = components[i];
const comp2 = components[j];
const pos1 = positions[comp1.id];
const pos2 = positions[comp2.id];
if (this.hasOverlap(pos1, comp1, pos2, comp2)) {
score -= 0.5; // Major penalty for overlaps
}
}
}
// Check canvas utilization
let totalArea = 0;
components.forEach(comp => {
totalArea += comp.width * comp.height;
});
const canvasArea = canvasSize.width * canvasSize.height;
const utilization = totalArea / canvasArea;
if (utilization > 0.8) score -= 0.2; // Penalize overcrowding
if (utilization < 0.1) score -= 0.1; // Penalize underutilization
return Math.max(0, score);
}
validateIndividualScene(scene, criteria) {
return {
clarity_score: this.assessVisualClarity(scene),
flow_score: this.assessNarrativeFlow(scene),
timing_score: this.assessTiming(scene),
issues: this.identifySceneIssues(scene),
recommendations: this.generateSceneRecommendations(scene)
};
}
assessVisualClarity(scene) {
// Check for overlaps, proper spacing, clear hierarchy
let score = 1.0;
if (this.hasComponentOverlaps(scene)) score -= 0.4;
if (!this.hasGoodSpacing(scene)) score -= 0.3;
if (!this.hasVisualHierarchy(scene)) score -= 0.3;
return Math.max(0, score);
}
hasComponentOverlaps(scene) {
// Check if any components overlap
const components = scene.components || [];
for (let i = 0; i < components.length; i++) {
for (let j = i + 1; j < components.length; j++) {
if (this.componentsOverlap(components[i], components[j])) {
return true;
}
}
}
return false;
}
componentsOverlap(comp1, comp2) {
return !(comp1.x + comp1.width < comp2.x ||
comp2.x + comp2.width < comp1.x ||
comp1.y + comp1.height < comp2.y ||
comp2.y + comp2.height < comp1.y);
}
calculateOverallQualityScore(detailedScores) {
const scores = Object.values(detailedScores);
const avgClarity = scores.reduce((sum, s) => sum + s.clarity_score, 0) / scores.length;
const avgFlow = scores.reduce((sum, s) => sum + s.flow_score, 0) / scores.length;
const avgTiming = scores.reduce((sum, s) => sum + s.timing_score, 0) / scores.length;
return (avgClarity * 0.4 + avgFlow * 0.3 + avgTiming * 0.3);
}
// Additional helper methods would be implemented here...
}
/**
* Error class for scene creation failures
*/
class SceneCreationError extends Error {
constructor(message) {
super(message);
this.name = 'SceneCreationError';
}
}
// Export for use in the main application
if (typeof module !== 'undefined' && module.exports) {
module.exports = { ResponsesAPIAgent, SceneCreationError };
}