UNPKG

@aj-archipelago/cortex

Version:

Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.

246 lines (207 loc) 8.05 kB
import fs from "fs/promises"; import { dirname, join } from "path"; import { fileURLToPath } from "url"; import test from "ava"; import axios from "axios"; import nock from "nock"; import XLSX from "xlsx"; import { FileConversionService } from "../src/services/FileConversionService.js"; const __dirname = dirname(fileURLToPath(import.meta.url)); // Mock context const mockContext = { log: console.log, }; // Setup: Create test documents test.before(async (t) => { const testDir = join(__dirname, "test-docs"); await fs.mkdir(testDir, { recursive: true }); // Create various test files const textFile = join(testDir, "test.txt"); const largeTextFile = join(testDir, "large.txt"); const unicodeFile = join(testDir, "unicode.txt"); const jsonFile = join(testDir, "test.json"); const emptyFile = join(testDir, "empty.txt"); const excelFile = join(testDir, "test.xlsx"); // Regular text content await fs.writeFile( textFile, "This is a test document content.\nIt has multiple lines.\nThird line here.", ); // Large text content (>100KB) const largeContent = "Lorem ipsum ".repeat(10000); await fs.writeFile(largeTextFile, largeContent); // Unicode content const unicodeContent = "这是中文内容\nこれは日本語です\nЭто русский текст\n🌟 emoji test"; await fs.writeFile(unicodeFile, unicodeContent); // JSON content await fs.writeFile(jsonFile, JSON.stringify({ test: "content" })); // Empty file await fs.writeFile(emptyFile, ""); // Create a test Excel file const workbook = XLSX.utils.book_new(); const ws1 = XLSX.utils.aoa_to_sheet([ ["Header 1", "Header 2"], ["Data 1", "Data 2"], ["Data 3", "Data 4"], ]); XLSX.utils.book_append_sheet(workbook, ws1, "Sheet1"); XLSX.writeFile(workbook, excelFile); t.context = { testDir, textFile, largeTextFile, unicodeFile, jsonFile, emptyFile, excelFile, }; }); // Cleanup test.after.always(async (t) => { await fs.rm(t.context.testDir, { recursive: true, force: true }); }); // Test Excel to CSV conversion test("converts Excel to CSV successfully", async (t) => { const service = new FileConversionService(mockContext); const result = await service.convertFile(t.context.excelFile); t.true(result.converted); t.true(result.convertedPath.endsWith(".csv")); // Read the converted file and verify content const content = await fs.readFile(result.convertedPath, "utf-8"); t.true(content.includes("Header 1,Header 2")); t.true(content.includes("Data 1,Data 2")); t.true(content.includes("Data 3,Data 4")); }); // Test document conversion with MarkItDown API test("converts document to markdown via MarkItDown API", async (t) => { // Set the environment variable for the test const originalEnv = process.env.MARKITDOWN_CONVERT_URL; const originalPdfEnv = process.env.DOC_TO_PDF_SERVICE_URL; // Ensure PDF path is NOT used in this test delete process.env.DOC_TO_PDF_SERVICE_URL; process.env.MARKITDOWN_CONVERT_URL = "http://localhost:8080/convert?url="; // Mock axios.get for MarkItDown API const originalAxiosGet = axios.get; axios.get = async (url) => { if (url.includes("test.docx")) { return { data: { markdown: "# Test Document\n\nThis is a test document converted to markdown.", }, }; } throw new Error("Invalid URL"); }; const service = new FileConversionService(mockContext); const result = await service.convertFile( "test.docx", "https://example.com/test.docx", ); t.true(result.converted); t.true(result.convertedPath.endsWith(".md")); // Read the converted file and verify content const content = await fs.readFile(result.convertedPath, "utf-8"); t.true(content.includes("# Test Document")); t.true(content.includes("This is a test document converted to markdown")); // Restore original axios.get and environment variable axios.get = originalAxiosGet; if (originalEnv) { process.env.MARKITDOWN_CONVERT_URL = originalEnv; } else { delete process.env.MARKITDOWN_CONVERT_URL; } if (originalPdfEnv) { process.env.DOC_TO_PDF_SERVICE_URL = originalPdfEnv; } }); // Test document conversion with external PDF service test("converts document to PDF via external service", async (t) => { const originalPdfEnv = process.env.DOC_TO_PDF_SERVICE_URL; const originalMdEnv = process.env.MARKITDOWN_CONVERT_URL; // Prefer PDF path in this test delete process.env.MARKITDOWN_CONVERT_URL; process.env.DOC_TO_PDF_SERVICE_URL = "http://pdf.test/convert"; // Mock the external PDF service const pdfBody = Buffer.from("%PDF-1.4\n%\u00E2\u00E3\u00CF\u00D3\n1 0 obj<<>>endobj\ntrailer<<>>\n%%EOF\n", "utf-8"); const scope = nock("http://pdf.test").post("/convert").reply(200, pdfBody, { "Content-Type": "application/pdf", "Content-Length": String(pdfBody.length), }); const service = new FileConversionService(mockContext); // Create a real local test file to stream to the PDF service const docPath = join(t.context.testDir, "test.docx"); await fs.writeFile(docPath, "Dummy DOCX content for PDF test"); const result = await service.convertFile(docPath, "https://example.com/test.docx"); t.true(result.converted); t.true(result.convertedPath.endsWith(".pdf")); const content = await fs.readFile(result.convertedPath); t.is(content.slice(0, 4).toString(), "%PDF"); t.true(scope.isDone()); // Restore env if (originalPdfEnv) { process.env.DOC_TO_PDF_SERVICE_URL = originalPdfEnv; } else { delete process.env.DOC_TO_PDF_SERVICE_URL; } if (originalMdEnv) { process.env.MARKITDOWN_CONVERT_URL = originalMdEnv; } nock.cleanAll(); }); // Test error handling for missing original URL test("handles missing original URL for document conversion", async (t) => { const service = new FileConversionService(mockContext); await t.throwsAsync(async () => service.convertFile("test.docx"), { message: "Original URL is required for document conversion", }); }); // Test error handling for unsupported file types test("handles unsupported file types", async (t) => { const service = new FileConversionService(mockContext); const result = await service.convertFile(t.context.jsonFile); t.false(result.converted); }); // Test file extension detection test("correctly detects file extensions", (t) => { const service = new FileConversionService(mockContext); t.true(service.needsConversion("test.docx")); t.true(service.needsConversion("test.xlsx")); t.false(service.needsConversion("test.txt")); t.false(service.needsConversion("test.json")); }); // Test _saveConvertedFile method signature and container parameter handling test("_saveConvertedFile accepts container parameter", async (t) => { const service = new FileConversionService(mockContext, false); // Use local storage for testing // Create a test file const testFile = join(t.context.testDir, "container-param-test.txt"); await fs.writeFile(testFile, "Test content for container parameter"); // Test that the method accepts all parameters without throwing const result = await service._saveConvertedFile( testFile, "test-request-id", "test-filename.txt", "test-container" ); t.truthy(result); t.truthy(result.url); t.true(typeof result.url === 'string'); }); // Test ensureConvertedVersion method signature with container parameter test("ensureConvertedVersion accepts container parameter", async (t) => { const service = new FileConversionService(mockContext, false); // Mock file info object const fileInfo = { url: "http://example.com/test.txt", // Non-convertible file gcs: "gs://bucket/test.txt" }; // Test that the method accepts container parameter without throwing const result = await service.ensureConvertedVersion( fileInfo, "test-request-id", "test-container" ); t.truthy(result); t.is(result.url, fileInfo.url); // Should return original for non-convertible file });