UNPKG

apx-toolkit

Version:

Automatically discover APIs and generate complete integration packages: code in 12 languages, TypeScript types, test suites, SDK packages, API documentation, mock servers, performance reports, and contract tests. Saves 2-4 weeks of work in seconds.

285 lines 12.8 kB
/** * Core Runner - Decoupled execution logic for APX * This module contains the core APX functionality without Apify Actor dependencies * Can be used by CLI, test scripts, or other integrations * * Key Decoupling Features: * - Accepts native TypeScript objects (ActorInput) instead of reading from Apify KeyValueStore * - Uses Crawlee's local storage (works without Apify platform) * - Returns structured results instead of pushing to global Dataset * - Fully executable outside Apify environment */ import { PlaywrightCrawler, HttpCrawler, Router, RequestQueue, Dataset } from 'crawlee'; import { REQUEST_LABELS } from './types.js'; import { handleDiscovery } from './handlers/discovery-handler.js'; import { handleAPIProcessing } from './handlers/api-handler.js'; import { StatisticsCollector } from './utils/statistics.js'; import { setStatistics } from './utils/statistics.js'; import { ProgressTracker } from './utils/progress-tracker.js'; /** * Validates input configuration */ function validateInput(input) { if (!input.startUrls || input.startUrls.length === 0) { throw new Error('startUrls is required and must contain at least one URL'); } for (const urlObj of input.startUrls) { if (!urlObj.url || typeof urlObj.url !== 'string') { throw new Error('Each startUrl must have a valid url string'); } try { new URL(urlObj.url); } catch (error) { throw new Error(`Invalid URL format: ${urlObj.url}`); } } if (input.minResponseSize !== undefined && input.minResponseSize < 0) { throw new Error('minResponseSize must be >= 0'); } if (input.discoveryTimeout !== undefined && input.discoveryTimeout < 1000) { throw new Error('discoveryTimeout must be >= 1000ms'); } if (input.maxPages !== undefined && input.maxPages < 1) { throw new Error('maxPages must be >= 1'); } if (input.maxConcurrency !== undefined && input.maxConcurrency < 1) { throw new Error('maxConcurrency must be >= 1'); } if (input.paginationType && !['auto', 'offset', 'page', 'cursor'].includes(input.paginationType)) { throw new Error('paginationType must be one of: auto, offset, page, cursor'); } if (input.exportFormats) { const validFormats = ['openapi', 'postman', 'curl', 'insomnia']; for (const format of input.exportFormats) { if (!validFormats.includes(format)) { throw new Error(`Invalid export format: ${format}. Must be one of: ${validFormats.join(', ')}`); } } } } /** * Core APX execution function * Runs the complete APX workflow without Apify Actor dependencies * * Decoupling Strategy: * 1. Input: Accepts native TypeScript object (ActorInput) instead of reading from Apify KeyValueStore * 2. Crawlee Setup: Uses Crawlee's local storage automatically (works without Apify platform) * - RequestQueue, Dataset, and other storage clients work locally via file system * - No special configuration needed - Crawlee detects environment automatically * 3. Output: Collects generated data into structured object (APXResult) for return * - Data is collected from Dataset after processing * - All artifacts are structured and returned to caller * * This allows APX to run in multiple environments: * - Apify Actor (via main.ts) * - CLI tool (via cli.ts) * - Test scripts (via test-main.ts) * - Any Node.js environment * * @param input - Configuration input (native TypeScript object) * @param options - Optional execution options (progress callbacks, error handlers) * @returns Structured result with all generated artifacts and data */ export async function runAPXCore(input, options) { const startTime = Date.now(); const log = options?.onProgress || ((msg) => console.log(msg)); const onError = options?.onError || ((err) => console.error(err.message)); // Initialize progress tracker const progressTracker = options?.progressTracker || new ProgressTracker(); progressTracker.onProgress((event) => { const progressMsg = event.progress !== undefined ? `[${event.progress}%] ${event.message}` : event.message; log(progressMsg); }); // Validate input validateInput(input); // Initialize statistics collector const statistics = new StatisticsCollector(); setStatistics(statistics); // Create a shared request queue // Crawlee automatically uses local storage when not on Apify platform // Storage location: ./storage/request_queues/default (local) or Apify cloud (on platform) // No special configuration needed - it works out of the box const requestQueue = await RequestQueue.open(); // Create router for request handling const router = Router.create(); // Register START_DISCOVERY handler (Playwright-based) router.addHandler(REQUEST_LABELS.START_DISCOVERY, async (context) => { await handleDiscovery(context, input); }); // Register API_PROCESS handler (HTTP-based) router.addHandler(REQUEST_LABELS.API_PROCESS, async (context) => { await handleAPIProcessing(context, input); }); // Configure PlaywrightCrawler for discovery phase // Crawlee automatically uses local storage when not on Apify platform const playwrightCrawler = new PlaywrightCrawler({ requestHandler: router, requestQueue, maxRequestsPerCrawl: input.startUrls.length, launchContext: { launchOptions: { headless: true, }, }, requestHandlerTimeoutSecs: 60, }); // Configure HttpCrawler for API processing phase // Crawlee automatically uses local storage when not on Apify platform const httpCrawler = new HttpCrawler({ requestHandler: router, requestQueue, maxRequestsPerCrawl: (input.maxPages || 100) * (input.startUrls.length || 1), maxConcurrency: input.maxConcurrency || 5, requestHandlerTimeoutSecs: 30, }); // Prepare initial requests with START_DISCOVERY label const initialRequests = input.startUrls.map((urlObj) => ({ url: urlObj.url, label: REQUEST_LABELS.START_DISCOVERY, })); log('🚀 Starting APX - The API Toolkit'); log('='.repeat(60)); log(`📋 Configuration:`); log(` Start URLs: ${input.startUrls.length}`); log(` Max Pages: ${input.maxPages || 100}`); log(` Max Concurrency: ${input.maxConcurrency || 5}`); log(` Generate Documentation: ${input.generateDocumentation !== false ? 'Yes' : 'No'}`); log(` Export Formats: ${input.exportFormats?.join(', ') || 'openapi, postman, curl'}`); log('='.repeat(60)); log(''); try { const discoveryStartTime = Date.now(); progressTracker.discovery('Starting API discovery phase...'); // Run PlaywrightCrawler for discovery await playwrightCrawler.run(initialRequests); const discoveryDuration = (Date.now() - discoveryStartTime) / 1000; progressTracker.discovery(`Discovery phase complete (${discoveryDuration.toFixed(1)}s)`, undefined, undefined, 100); log(`✅ Discovery phase complete (${discoveryDuration.toFixed(1)}s)`); log(''); // Get discovery stats const queueInfo = await requestQueue.getInfo(); const apisDiscovered = (queueInfo?.totalRequestCount || 0) - input.startUrls.length; if (apisDiscovered > 0) { statistics.recordDiscovery(apisDiscovered, discoveryDuration); progressTracker.discovery(`Discovered ${apisDiscovered} API endpoint(s)`, undefined, apisDiscovered, apisDiscovered); log(`🔍 Discovered ${apisDiscovered} API endpoint(s)`); const queueInfoBefore = await requestQueue.getInfo(); const pendingRequests = (queueInfoBefore?.totalRequestCount || 0) - (queueInfoBefore?.handledRequestCount || 0); log(`📋 Queue status: ${queueInfoBefore?.totalRequestCount || 0} total, ${queueInfoBefore?.handledRequestCount || 0} handled, ${pendingRequests} pending`); progressTracker.processing('Starting API processing phase...', undefined, pendingRequests, 0); log('⚡ Starting API processing phase...'); log(''); } else { progressTracker.discovery('No APIs discovered', undefined, 0, 0); log('⚠️ No APIs discovered. The site may not use API calls or they may require user interaction.'); log(''); } // Run HttpCrawler to process all API_PROCESS requests await httpCrawler.run(); const queueInfoAfter = await requestQueue.getInfo(); const processed = queueInfoAfter?.handledRequestCount || 0; const total = queueInfoAfter?.totalRequestCount || 0; progressTracker.processing('API processing phase complete', undefined, total, processed); log('✅ API processing phase complete.'); log(''); // Get final statistics const finalQueueInfo = await requestQueue.getInfo(); // Open dataset - Crawlee automatically uses local storage when not on Apify platform const dataset = await Dataset.open(); const datasetInfo = await dataset.getInfo(); // Collect all data items const data = []; if (datasetInfo && datasetInfo.itemCount && datasetInfo.itemCount > 0) { const { items } = await dataset.getData({ limit: datasetInfo.itemCount }); data.push(...items); } // Collect generated artifacts from dataset const artifacts = { codeSnippets: {}, typescriptTypes: '', testSuites: [], sdkPackages: [], documentation: [], examples: [], }; // Extract artifacts from dataset items for (const item of data) { if (item._type === 'code_snippets') { // Code snippets are stored as a single object with snippets key if (item.snippets) { Object.assign(artifacts.codeSnippets, item.snippets); } } else if (item._type === 'typescript_types') { artifacts.typescriptTypes = item.content || ''; } else if (item._type === 'test_suites') { // Test suites are stored with suites array if (item.suites && Array.isArray(item.suites)) { artifacts.testSuites.push(...item.suites); } else { artifacts.testSuites.push(item); } } else if (item._type === 'sdk_package') { artifacts.sdkPackages.push({ language: item.language, packageName: item.packageName, files: item.files, description: item.description, }); } else if (item._type === 'api_documentation') { artifacts.documentation.push({ format: item.format, filename: item.filename, content: item.content, mimeType: item.mimeType, }); } else if (item._type === 'api_examples') { if (item.examples && Array.isArray(item.examples)) { artifacts.examples.push(...item.examples); } else { artifacts.examples.push(item); } } } const totalDuration = (Date.now() - startTime) / 1000; log('📊 Execution Summary'); log('='.repeat(60)); log(` APIs Discovered: ${apisDiscovered}`); log(` Requests Processed: ${finalQueueInfo?.handledRequestCount || 0}`); log(` Items Extracted: ${datasetInfo?.itemCount || 0}`); log(` Total Duration: ${totalDuration.toFixed(1)}s`); log(''); const stats = statistics.getStats(); const summary = await statistics.saveSummary(); return { summary: { apisDiscovered, requestsProcessed: finalQueueInfo?.handledRequestCount || 0, itemsExtracted: datasetInfo?.itemCount || 0, discoveryDuration, totalDuration, }, artifacts, data, statistics: { stats, summary: summary.summary, }, }; } catch (error) { onError(error instanceof Error ? error : new Error(String(error))); throw error; } } //# sourceMappingURL=core-runner.js.map