apx-toolkit
Version:
Automatically discover APIs and generate complete integration packages: code in 12 languages, TypeScript types, test suites, SDK packages, API documentation, mock servers, performance reports, and contract tests. Saves 2-4 weeks of work in seconds.
285 lines • 12.8 kB
JavaScript
/**
* Core Runner - Decoupled execution logic for APX
* This module contains the core APX functionality without Apify Actor dependencies
* Can be used by CLI, test scripts, or other integrations
*
* Key Decoupling Features:
* - Accepts native TypeScript objects (ActorInput) instead of reading from Apify KeyValueStore
* - Uses Crawlee's local storage (works without Apify platform)
* - Returns structured results instead of pushing to global Dataset
* - Fully executable outside Apify environment
*/
import { PlaywrightCrawler, HttpCrawler, Router, RequestQueue, Dataset } from 'crawlee';
import { REQUEST_LABELS } from './types.js';
import { handleDiscovery } from './handlers/discovery-handler.js';
import { handleAPIProcessing } from './handlers/api-handler.js';
import { StatisticsCollector } from './utils/statistics.js';
import { setStatistics } from './utils/statistics.js';
import { ProgressTracker } from './utils/progress-tracker.js';
/**
* Validates input configuration
*/
function validateInput(input) {
if (!input.startUrls || input.startUrls.length === 0) {
throw new Error('startUrls is required and must contain at least one URL');
}
for (const urlObj of input.startUrls) {
if (!urlObj.url || typeof urlObj.url !== 'string') {
throw new Error('Each startUrl must have a valid url string');
}
try {
new URL(urlObj.url);
}
catch (error) {
throw new Error(`Invalid URL format: ${urlObj.url}`);
}
}
if (input.minResponseSize !== undefined && input.minResponseSize < 0) {
throw new Error('minResponseSize must be >= 0');
}
if (input.discoveryTimeout !== undefined && input.discoveryTimeout < 1000) {
throw new Error('discoveryTimeout must be >= 1000ms');
}
if (input.maxPages !== undefined && input.maxPages < 1) {
throw new Error('maxPages must be >= 1');
}
if (input.maxConcurrency !== undefined && input.maxConcurrency < 1) {
throw new Error('maxConcurrency must be >= 1');
}
if (input.paginationType && !['auto', 'offset', 'page', 'cursor'].includes(input.paginationType)) {
throw new Error('paginationType must be one of: auto, offset, page, cursor');
}
if (input.exportFormats) {
const validFormats = ['openapi', 'postman', 'curl', 'insomnia'];
for (const format of input.exportFormats) {
if (!validFormats.includes(format)) {
throw new Error(`Invalid export format: ${format}. Must be one of: ${validFormats.join(', ')}`);
}
}
}
}
/**
* Core APX execution function
* Runs the complete APX workflow without Apify Actor dependencies
*
* Decoupling Strategy:
* 1. Input: Accepts native TypeScript object (ActorInput) instead of reading from Apify KeyValueStore
* 2. Crawlee Setup: Uses Crawlee's local storage automatically (works without Apify platform)
* - RequestQueue, Dataset, and other storage clients work locally via file system
* - No special configuration needed - Crawlee detects environment automatically
* 3. Output: Collects generated data into structured object (APXResult) for return
* - Data is collected from Dataset after processing
* - All artifacts are structured and returned to caller
*
* This allows APX to run in multiple environments:
* - Apify Actor (via main.ts)
* - CLI tool (via cli.ts)
* - Test scripts (via test-main.ts)
* - Any Node.js environment
*
* @param input - Configuration input (native TypeScript object)
* @param options - Optional execution options (progress callbacks, error handlers)
* @returns Structured result with all generated artifacts and data
*/
export async function runAPXCore(input, options) {
const startTime = Date.now();
const log = options?.onProgress || ((msg) => console.log(msg));
const onError = options?.onError || ((err) => console.error(err.message));
// Initialize progress tracker
const progressTracker = options?.progressTracker || new ProgressTracker();
progressTracker.onProgress((event) => {
const progressMsg = event.progress !== undefined
? `[${event.progress}%] ${event.message}`
: event.message;
log(progressMsg);
});
// Validate input
validateInput(input);
// Initialize statistics collector
const statistics = new StatisticsCollector();
setStatistics(statistics);
// Create a shared request queue
// Crawlee automatically uses local storage when not on Apify platform
// Storage location: ./storage/request_queues/default (local) or Apify cloud (on platform)
// No special configuration needed - it works out of the box
const requestQueue = await RequestQueue.open();
// Create router for request handling
const router = Router.create();
// Register START_DISCOVERY handler (Playwright-based)
router.addHandler(REQUEST_LABELS.START_DISCOVERY, async (context) => {
await handleDiscovery(context, input);
});
// Register API_PROCESS handler (HTTP-based)
router.addHandler(REQUEST_LABELS.API_PROCESS, async (context) => {
await handleAPIProcessing(context, input);
});
// Configure PlaywrightCrawler for discovery phase
// Crawlee automatically uses local storage when not on Apify platform
const playwrightCrawler = new PlaywrightCrawler({
requestHandler: router,
requestQueue,
maxRequestsPerCrawl: input.startUrls.length,
launchContext: {
launchOptions: {
headless: true,
},
},
requestHandlerTimeoutSecs: 60,
});
// Configure HttpCrawler for API processing phase
// Crawlee automatically uses local storage when not on Apify platform
const httpCrawler = new HttpCrawler({
requestHandler: router,
requestQueue,
maxRequestsPerCrawl: (input.maxPages || 100) * (input.startUrls.length || 1),
maxConcurrency: input.maxConcurrency || 5,
requestHandlerTimeoutSecs: 30,
});
// Prepare initial requests with START_DISCOVERY label
const initialRequests = input.startUrls.map((urlObj) => ({
url: urlObj.url,
label: REQUEST_LABELS.START_DISCOVERY,
}));
log('🚀 Starting APX - The API Toolkit');
log('='.repeat(60));
log(`📋 Configuration:`);
log(` Start URLs: ${input.startUrls.length}`);
log(` Max Pages: ${input.maxPages || 100}`);
log(` Max Concurrency: ${input.maxConcurrency || 5}`);
log(` Generate Documentation: ${input.generateDocumentation !== false ? 'Yes' : 'No'}`);
log(` Export Formats: ${input.exportFormats?.join(', ') || 'openapi, postman, curl'}`);
log('='.repeat(60));
log('');
try {
const discoveryStartTime = Date.now();
progressTracker.discovery('Starting API discovery phase...');
// Run PlaywrightCrawler for discovery
await playwrightCrawler.run(initialRequests);
const discoveryDuration = (Date.now() - discoveryStartTime) / 1000;
progressTracker.discovery(`Discovery phase complete (${discoveryDuration.toFixed(1)}s)`, undefined, undefined, 100);
log(`✅ Discovery phase complete (${discoveryDuration.toFixed(1)}s)`);
log('');
// Get discovery stats
const queueInfo = await requestQueue.getInfo();
const apisDiscovered = (queueInfo?.totalRequestCount || 0) - input.startUrls.length;
if (apisDiscovered > 0) {
statistics.recordDiscovery(apisDiscovered, discoveryDuration);
progressTracker.discovery(`Discovered ${apisDiscovered} API endpoint(s)`, undefined, apisDiscovered, apisDiscovered);
log(`🔍 Discovered ${apisDiscovered} API endpoint(s)`);
const queueInfoBefore = await requestQueue.getInfo();
const pendingRequests = (queueInfoBefore?.totalRequestCount || 0) - (queueInfoBefore?.handledRequestCount || 0);
log(`📋 Queue status: ${queueInfoBefore?.totalRequestCount || 0} total, ${queueInfoBefore?.handledRequestCount || 0} handled, ${pendingRequests} pending`);
progressTracker.processing('Starting API processing phase...', undefined, pendingRequests, 0);
log('⚡ Starting API processing phase...');
log('');
}
else {
progressTracker.discovery('No APIs discovered', undefined, 0, 0);
log('⚠️ No APIs discovered. The site may not use API calls or they may require user interaction.');
log('');
}
// Run HttpCrawler to process all API_PROCESS requests
await httpCrawler.run();
const queueInfoAfter = await requestQueue.getInfo();
const processed = queueInfoAfter?.handledRequestCount || 0;
const total = queueInfoAfter?.totalRequestCount || 0;
progressTracker.processing('API processing phase complete', undefined, total, processed);
log('✅ API processing phase complete.');
log('');
// Get final statistics
const finalQueueInfo = await requestQueue.getInfo();
// Open dataset - Crawlee automatically uses local storage when not on Apify platform
const dataset = await Dataset.open();
const datasetInfo = await dataset.getInfo();
// Collect all data items
const data = [];
if (datasetInfo && datasetInfo.itemCount && datasetInfo.itemCount > 0) {
const { items } = await dataset.getData({ limit: datasetInfo.itemCount });
data.push(...items);
}
// Collect generated artifacts from dataset
const artifacts = {
codeSnippets: {},
typescriptTypes: '',
testSuites: [],
sdkPackages: [],
documentation: [],
examples: [],
};
// Extract artifacts from dataset items
for (const item of data) {
if (item._type === 'code_snippets') {
// Code snippets are stored as a single object with snippets key
if (item.snippets) {
Object.assign(artifacts.codeSnippets, item.snippets);
}
}
else if (item._type === 'typescript_types') {
artifacts.typescriptTypes = item.content || '';
}
else if (item._type === 'test_suites') {
// Test suites are stored with suites array
if (item.suites && Array.isArray(item.suites)) {
artifacts.testSuites.push(...item.suites);
}
else {
artifacts.testSuites.push(item);
}
}
else if (item._type === 'sdk_package') {
artifacts.sdkPackages.push({
language: item.language,
packageName: item.packageName,
files: item.files,
description: item.description,
});
}
else if (item._type === 'api_documentation') {
artifacts.documentation.push({
format: item.format,
filename: item.filename,
content: item.content,
mimeType: item.mimeType,
});
}
else if (item._type === 'api_examples') {
if (item.examples && Array.isArray(item.examples)) {
artifacts.examples.push(...item.examples);
}
else {
artifacts.examples.push(item);
}
}
}
const totalDuration = (Date.now() - startTime) / 1000;
log('📊 Execution Summary');
log('='.repeat(60));
log(` APIs Discovered: ${apisDiscovered}`);
log(` Requests Processed: ${finalQueueInfo?.handledRequestCount || 0}`);
log(` Items Extracted: ${datasetInfo?.itemCount || 0}`);
log(` Total Duration: ${totalDuration.toFixed(1)}s`);
log('');
const stats = statistics.getStats();
const summary = await statistics.saveSummary();
return {
summary: {
apisDiscovered,
requestsProcessed: finalQueueInfo?.handledRequestCount || 0,
itemsExtracted: datasetInfo?.itemCount || 0,
discoveryDuration,
totalDuration,
},
artifacts,
data,
statistics: {
stats,
summary: summary.summary,
},
};
}
catch (error) {
onError(error instanceof Error ? error : new Error(String(error)));
throw error;
}
}
//# sourceMappingURL=core-runner.js.map