@aj-archipelago/cortex
Version:
Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.
371 lines (302 loc) • 12.5 kB
JavaScript
import { execSync } from "child_process";
import { existsSync } from "fs";
import fs from "fs/promises";
import os from "os";
import { dirname, join } from "path";
import { performance } from "perf_hooks";
import { fileURLToPath } from "url";
import test from "ava";
import nock from "nock";
import { splitMediaFile, downloadFile } from "../src/fileChunker.js";
import { createTestMediaFile } from "./testUtils.helper.js";
const __dirname = dirname(fileURLToPath(import.meta.url));
// Setup: Create test files and mock external services
test.before(async (t) => {
// Check if ffmpeg is available
try {
execSync("ffmpeg -version", { stdio: "ignore" });
} catch (error) {
console.error(
"ffmpeg is not installed. Please install it to run these tests.",
);
process.exit(1);
}
const testDir = join(__dirname, "test-files");
await fs.mkdir(testDir, { recursive: true });
try {
// Create test files of different durations
const testFile1s = join(testDir, "test-1s.mp3");
const testFile10s = join(testDir, "test-10s.mp3");
const testFile600s = join(testDir, "test-600s.mp3");
await createTestMediaFile(testFile1s, 1);
await createTestMediaFile(testFile10s, 10);
await createTestMediaFile(testFile600s, 600);
// Create large test files
const testFile1h = join(testDir, "test-1h.mp3");
const testFile4h = join(testDir, "test-4h.mp3");
console.log("\nCreating large test files (this may take a while)...");
await createTestMediaFile(testFile1h, 3600);
await createTestMediaFile(testFile4h, 14400);
t.context = {
testDir,
testFile1s,
testFile10s,
testFile600s,
testFile1h,
testFile4h,
};
// Setup nock for URL tests with proper headers
nock("https://example.com")
.get("/media/test.mp3")
.replyWithFile(200, testFile10s, {
"Content-Type": "audio/mpeg",
"Content-Length": (await fs.stat(testFile10s)).size.toString(),
})
.persist();
} catch (error) {
console.error("Error during test setup:", error);
// Clean up any partially created files
try {
await fs.rm(testDir, { recursive: true, force: true });
} catch (cleanupError) {
console.error("Error during cleanup:", cleanupError);
}
throw error;
}
});
// Cleanup: Remove test files
test.after.always(async (t) => {
// Clean up test files
if (t.context.testDir) {
try {
await fs.rm(t.context.testDir, { recursive: true, force: true });
console.log("Test files cleaned up successfully");
} catch (error) {
console.error("Error cleaning up test files:", error);
}
}
// Clean up nock
nock.cleanAll();
});
// Test successful chunking of a short file
test("successfully chunks short media file", async (t) => {
const { chunkPromises, chunkOffsets, uniqueOutputPath } =
await splitMediaFile(t.context.testFile1s);
t.true(Array.isArray(chunkPromises), "Should return array of promises");
t.true(Array.isArray(chunkOffsets), "Should return array of offsets");
t.true(typeof uniqueOutputPath === "string", "Should return output path");
// Should only create one chunk for 1s file
t.is(chunkPromises.length, 1, "Should create single chunk for short file");
// Wait for chunks to process
const chunkPaths = await Promise.all(chunkPromises);
// Verify chunk exists
t.true(existsSync(chunkPaths[0]), "Chunk file should exist");
// Cleanup
await fs.rm(uniqueOutputPath, { recursive: true, force: true });
});
// Test chunking of a longer file
test("correctly chunks longer media file", async (t) => {
const { chunkPromises, chunkOffsets, uniqueOutputPath } =
await splitMediaFile(t.context.testFile600s);
// For 600s file with 500s chunks, should create 2 chunks
t.is(chunkPromises.length, 2, "Should create correct number of chunks");
t.is(chunkOffsets.length, 2, "Should create correct number of offsets");
// Verify offsets
t.is(chunkOffsets[0], 0, "First chunk should start at 0");
t.is(chunkOffsets[1], 500, "Second chunk should start at 500s");
// Wait for chunks to process
const chunkPaths = await Promise.all(chunkPromises);
// Verify all chunks exist
for (const chunkPath of chunkPaths) {
t.true(existsSync(chunkPath), "Each chunk file should exist");
}
// Cleanup
await fs.rm(uniqueOutputPath, { recursive: true, force: true });
});
// Test custom chunk duration
test("respects custom chunk duration", async (t) => {
const customDuration = 5; // 5 seconds
const { chunkPromises, chunkOffsets } = await splitMediaFile(
t.context.testFile10s,
customDuration,
);
// For 10s file with 5s chunks, should create 2 chunks
t.is(
chunkPromises.length,
2,
"Should create correct number of chunks for custom duration",
);
t.deepEqual(chunkOffsets, [0, 5], "Should have correct offset points");
});
// Test URL-based file processing
test("processes media file from URL", async (t) => {
const url = "https://example.com/media/test.mp3";
const { chunkPromises, uniqueOutputPath } = await splitMediaFile(url);
// Wait for chunks to process
const chunkPaths = await Promise.all(chunkPromises);
// Verify chunks were created
for (const chunkPath of chunkPaths) {
t.true(
existsSync(chunkPath),
"Chunk files should exist for URL-based media",
);
}
// Cleanup
await fs.rm(uniqueOutputPath, { recursive: true, force: true });
});
// Test error handling for invalid files
test("handles invalid media files gracefully", async (t) => {
const invalidFile = join(t.context.testDir, "invalid.mp3");
await fs.writeFile(invalidFile, "not a valid mp3 file");
await t.throwsAsync(async () => splitMediaFile(invalidFile), {
message: /Error processing media file/,
});
});
// Test error handling for non-existent files
test("handles non-existent files gracefully", async (t) => {
const nonExistentFile = join(t.context.testDir, "non-existent.mp3");
await t.throwsAsync(async () => splitMediaFile(nonExistentFile), {
message: /Error processing media file/,
});
});
// Test file download functionality
test("successfully downloads file from URL", async (t) => {
const url = "https://example.com/media/test.mp3";
const outputPath = join(os.tmpdir(), "downloaded-test.mp3");
await downloadFile(url, outputPath);
t.true(existsSync(outputPath), "Downloaded file should exist");
// Cleanup
await fs.unlink(outputPath);
});
// Test error handling for invalid URLs in download
test("handles invalid URLs in download gracefully", async (t) => {
// Use a localhost URL with a port that's definitely not in use
const invalidUrl = "http://localhost:54321/nonexistent.mp3";
const outputPath = join(os.tmpdir(), "should-not-exist.mp3");
try {
await downloadFile(invalidUrl, outputPath);
t.fail("Expected downloadFile to throw an error for invalid URL");
} catch (error) {
t.truthy(error, "Should throw an error for invalid URL");
// Accept various network error types
const isValidError =
error.code === 'ENOTFOUND' ||
error.code === 'ECONNREFUSED' ||
error.code === 'ENETUNREACH' ||
error.code === 'ETIMEDOUT' ||
error.message.includes('ENOTFOUND') ||
error.message.includes('ECONNREFUSED') ||
error.message.includes('ENETUNREACH') ||
error.message.includes('ETIMEDOUT') ||
error.message.includes('getaddrinfo') ||
error.message.includes('Network Error') ||
error.message.includes('Request failed');
t.true(isValidError, `Expected network error but got: ${error.code} - ${error.message}`);
}
});
// Helper to format duration nicely
function formatDuration(ms) {
if (ms < 1000) return `${ms}ms`;
const seconds = ms / 1000;
if (seconds < 60) return `${seconds.toFixed(2)}s`;
const minutes = seconds / 60;
if (minutes < 60) return `${minutes.toFixed(2)}m`;
const hours = minutes / 60;
return `${hours.toFixed(2)}h`;
}
// Test performance with 1-hour file
test("performance test - 1 hour file", async (t) => {
const start = performance.now();
const { chunkPromises, uniqueOutputPath } = await splitMediaFile(
t.context.testFile1h,
);
// Wait for all chunks to complete
const chunkPaths = await Promise.all(chunkPromises);
const end = performance.now();
const duration = end - start;
console.log(`\n1 hour file processing stats:
- Total time: ${formatDuration(duration)}
- Chunks created: ${chunkPaths.length}
- Average time per chunk: ${formatDuration(duration / chunkPaths.length)}
- Processing speed: ${(3600 / (duration / 1000)).toFixed(2)}x realtime`);
t.true(chunkPaths.length > 0, "Should create chunks");
t.true(duration > 0, "Should measure time");
// Cleanup
await fs.rm(uniqueOutputPath, { recursive: true, force: true });
});
// Test performance with 4-hour file
test("performance test - 4 hour file", async (t) => {
const start = performance.now();
const { chunkPromises, uniqueOutputPath } = await splitMediaFile(
t.context.testFile4h,
);
// Wait for all chunks to complete
const chunkPaths = await Promise.all(chunkPromises);
const end = performance.now();
const duration = end - start;
console.log(`\n4 hour file processing stats:
- Total time: ${formatDuration(duration)}
- Chunks created: ${chunkPaths.length}
- Average time per chunk: ${formatDuration(duration / chunkPaths.length)}
- Processing speed: ${(14400 / (duration / 1000)).toFixed(2)}x realtime`);
t.true(chunkPaths.length > 0, "Should create chunks");
t.true(duration > 0, "Should measure time");
// Cleanup
await fs.rm(uniqueOutputPath, { recursive: true, force: true });
});
// Test memory usage during large file processing
test("memory usage during large file processing", async (t) => {
const initialMemory = process.memoryUsage().heapUsed;
let peakMemory = initialMemory;
const interval = setInterval(() => {
const used = process.memoryUsage().heapUsed;
peakMemory = Math.max(peakMemory, used);
}, 100);
const { chunkPromises, uniqueOutputPath } = await splitMediaFile(
t.context.testFile4h,
);
await Promise.all(chunkPromises);
clearInterval(interval);
const memoryIncrease = (peakMemory - initialMemory) / 1024 / 1024; // Convert to MB
console.log(`\nMemory usage stats:
- Initial memory: ${(initialMemory / 1024 / 1024).toFixed(2)}MB
- Peak memory: ${(peakMemory / 1024 / 1024).toFixed(2)}MB
- Memory increase: ${memoryIncrease.toFixed(2)}MB`);
t.true(memoryIncrease >= 0, "Should track memory usage");
// Cleanup
await fs.rm(uniqueOutputPath, { recursive: true, force: true });
});
test("should chunk video files with .mp3 extension for transcription", async (t) => {
// Create a test video file (we'll use an MP3 file but rename it to simulate a video)
const testVideoFile = join(t.context.testDir, "test-video.mp4");
await fs.copyFile(t.context.testFile10s, testVideoFile);
const { chunkPromises, chunkOffsets, uniqueOutputPath, chunkBaseName } =
await splitMediaFile(testVideoFile, 5); // Use 5 second chunks for faster test
t.true(Array.isArray(chunkPromises), "Should return array of promises");
t.is(chunkPromises.length, 2, "Should create 2 chunks for 10s file with 5s chunks");
t.true(Array.isArray(chunkOffsets), "Should return array of offsets");
t.is(chunkOffsets.length, 2, "Should have 2 offsets");
t.truthy(uniqueOutputPath, "Should return unique output path");
// Check that the chunk base name has .mp3 extension (not .mp4)
t.true(chunkBaseName.endsWith('.mp3'), "Chunk base name should end with .mp3 extension");
t.false(chunkBaseName.endsWith('.mp4'), "Chunk base name should not end with .mp4 extension");
// Process the chunks
const chunks = [];
for (const chunkPromise of chunkPromises) {
const chunkPath = await chunkPromise;
chunks.push(chunkPath);
}
// Verify all chunks have .mp3 extension
for (const chunkPath of chunks) {
t.true(chunkPath.endsWith('.mp3'), `Chunk path should end with .mp3: ${chunkPath}`);
t.false(chunkPath.endsWith('.mp4'), `Chunk path should not end with .mp4: ${chunkPath}`);
}
// Clean up
try {
if (uniqueOutputPath && existsSync(uniqueOutputPath)) {
await fs.rm(uniqueOutputPath, { recursive: true, force: true });
}
} catch (err) {
console.log("Error cleaning up test directory:", err);
}
});