apx-toolkit
Version:
Automatically discover APIs and generate complete integration packages: code in 12 languages, TypeScript types, test suites, SDK packages, API documentation, mock servers, performance reports, and contract tests. Saves 2-4 weeks of work in seconds.
600 lines • 27.9 kB
JavaScript
import { Dataset } from 'crawlee';
import { REQUEST_LABELS } from '../types.js';
import { isAPIResponse, extractAPIMetadata } from '../utils/api-detector.js';
import { generateExports } from '../utils/api-exporter.js';
import { generateAllCodeSnippets } from '../utils/code-generator.js';
import { generateTypeScriptDeclarationFile } from '../utils/typescript-generator.js';
import { generateAllTestSuites } from '../utils/test-generator.js';
import { generateSDKPackages } from '../utils/sdk-generator.js';
import { getStatistics } from '../utils/statistics.js';
import { deepInteractionFuzzing } from '../utils/interaction-fuzzer.js';
/**
* Simulates user interactions to trigger API calls on landing pages
*/
async function simulateInteractions(page, log, lastAPIActivityTime, discoveredAPIs, discoveredBaseUrls, responseBodyCache, responseExamples, input) {
const interactionWaitTime = input.interactionWaitTime || 2000;
try {
log.info('Starting interaction simulation: scrolling and clicking...');
// Set up response listener for interactions
const responseListener = async (response) => {
try {
const url = response.url();
const isAPI = await isAPIResponse(response, {
apiPatterns: input.apiPatterns,
minResponseSize: input.minResponseSize || 1000,
});
if (isAPI) {
lastAPIActivityTime.value = Date.now();
log.info(`API discovered during interaction: ${url}`);
try {
const body = await response.body();
responseBodyCache.set(url, body);
}
catch (error) {
log.debug(`Could not cache response body for ${url}`);
}
const cachedBody = responseBodyCache.get(url);
const apiMetadata = await extractAPIMetadata(response, input, cachedBody);
if (apiMetadata && !discoveredBaseUrls.has(apiMetadata.baseUrl)) {
discoveredBaseUrls.add(apiMetadata.baseUrl);
discoveredAPIs.push(apiMetadata);
try {
// Use cached body to avoid double-read (Playwright limitation)
if (cachedBody) {
const json = JSON.parse(cachedBody.toString());
responseExamples.set(apiMetadata.url, json);
}
else {
log.debug(`Response body not cached for ${url}, skipping example capture`);
}
}
catch (error) {
log.debug(`Could not capture response example for ${url}`);
}
log.info(`Discovered API: ${apiMetadata.baseUrl}`);
}
}
}
catch (error) {
log.debug(`Error processing response during interaction: ${error}`);
}
};
page.on('response', responseListener);
// Scroll down to trigger lazy-loaded content
log.info('Scrolling page to trigger lazy-loaded APIs...');
await page.evaluate(async () => {
await new Promise((resolve) => {
let totalHeight = 0;
const distance = 300;
const timer = setInterval(() => {
const scrollHeight = document.body.scrollHeight;
window.scrollBy(0, distance);
totalHeight += distance;
if (totalHeight >= scrollHeight) {
clearInterval(timer);
resolve();
}
}, 100);
});
});
await page.waitForTimeout(interactionWaitTime);
// Try clicking common interactive elements
log.info('Clicking interactive elements...');
const clickableSelectors = [
'button',
'a[href]',
'[role="button"]',
'.btn',
'.button',
'[data-testid*="button"]',
'[data-testid*="link"]',
];
for (const selector of clickableSelectors) {
try {
const elements = await page.$$(selector);
if (elements.length > 0 && elements.length <= 10) {
// Only click if there are a reasonable number of elements
for (let i = 0; i < Math.min(3, elements.length); i++) {
try {
const element = elements[i];
const isVisible = await element.isVisible();
if (isVisible) {
await element.click({ timeout: 1000 });
await page.waitForTimeout(1000);
// If we found APIs, we can stop
if (discoveredAPIs.length > 0) {
break;
}
}
}
catch (error) {
// Element might not be clickable, continue
}
}
}
if (discoveredAPIs.length > 0) {
break;
}
}
catch (error) {
// Selector might not exist, continue
}
}
// Wait a bit more for any delayed API calls
await page.waitForTimeout(interactionWaitTime);
// Remove the response listener
page.off('response', responseListener);
if (discoveredAPIs.length > 0) {
log.info(`Interaction simulation successful! Discovered ${discoveredAPIs.length} API(s)`);
}
else {
log.warning('Interaction simulation did not discover any APIs');
}
}
catch (error) {
log.warning(`Error during interaction simulation: ${error}`);
}
}
/**
* Handles OAuth 2.0 login flow and captures authentication tokens
*/
async function handleOAuthFlow(page, loginUrl, log) {
try {
log.info(`Starting OAuth flow: navigating to login URL: ${loginUrl}`);
const capturedTokens = [];
const capturedCookies = [];
const capturedHeaders = {};
// Set up response listener to capture tokens
const responseListener = async (response) => {
try {
const url = response.url();
const headers = response.headers();
// Look for Authorization headers in responses
if (headers['authorization']) {
capturedTokens.push(headers['authorization']);
log.info(`Captured authorization token from: ${url}`);
}
// Look for tokens in response body
try {
const body = await response.body();
const text = body.toString();
// Common token patterns
const tokenPatterns = [
/"access_token"\s*:\s*"([^"]+)"/i,
/"token"\s*:\s*"([^"]+)"/i,
/"bearer"\s*:\s*"([^"]+)"/i,
/access_token=([^&\s]+)/i,
];
for (const pattern of tokenPatterns) {
const match = text.match(pattern);
if (match && match[1]) {
capturedTokens.push(match[1]);
log.info(`Captured token from response body: ${url}`);
}
}
}
catch (error) {
// Ignore body read errors
}
}
catch (error) {
// Ignore individual response errors
}
};
page.on('response', responseListener);
// Navigate to login page
await page.goto(loginUrl, { waitUntil: 'domcontentloaded', timeout: 30000 });
// Wait a bit for any automatic redirects
await page.waitForTimeout(2000);
// Try to find and fill login form
try {
const emailInput = await page.$('input[type="email"], input[name*="email"], input[id*="email"]');
const passwordInput = await page.$('input[type="password"], input[name*="password"], input[id*="password"]');
const submitButton = await page.$('button[type="submit"], input[type="submit"], button:has-text("Login"), button:has-text("Sign in")');
if (emailInput && passwordInput) {
log.info('Found login form, attempting to fill (will need user credentials)');
// Note: In production, user would need to provide credentials
// For now, we'll just wait and capture tokens from the flow
}
}
catch (error) {
// Form not found or not fillable
}
// Wait for redirects and token capture
await page.waitForTimeout(5000);
// Get cookies
const cookies = await page.context().cookies();
const cookieString = cookies.map(c => `${c.name}=${c.value}`).join('; ');
// Remove listener
page.off('response', responseListener);
const result = {};
if (capturedTokens.length > 0) {
result.token = capturedTokens[0];
result.headers = { Authorization: capturedTokens[0] };
log.info('✅ OAuth flow completed: Token captured');
}
if (cookieString) {
result.cookies = cookieString;
if (!result.headers)
result.headers = {};
result.headers['Cookie'] = cookieString;
}
return result;
}
catch (error) {
log.warning(`OAuth flow error: ${error}`);
return {};
}
}
/**
* Handler for START_DISCOVERY requests
* Uses Playwright to intercept network traffic and discover API endpoints
*/
export async function handleDiscovery(context, input) {
const { request, page, log, crawler } = context;
// Handle OAuth flow if login URL provided
let oauthAuthHeaders = {};
if (input.loginUrl && input.oauthFlow) {
const oauthResult = await handleOAuthFlow(page, input.loginUrl, log);
if (oauthResult.headers) {
oauthAuthHeaders = oauthResult.headers;
log.info('OAuth authentication completed, using captured tokens for API requests');
}
}
const discoveredAPIs = [];
// Use Set for O(1) deduplication lookups
const discoveredBaseUrls = new Set();
// Cache response bodies to avoid multiple reads
const responseBodyCache = new Map();
// Store response examples for TypeScript types and documentation
const responseExamples = new Map();
log.info(`Starting API discovery for ${request.url}`);
// Set up WebSocket detection BEFORE page navigation
const webSocketConnections = [];
page.on('websocket', (ws) => {
const url = ws.url();
log.info(`WebSocket connection detected: ${url}`);
webSocketConnections.push({
url,
protocols: undefined, // Playwright doesn't expose protocol directly
});
});
// Track API activity to enable early exit
const lastAPIActivityTime = { value: Date.now() };
const API_ACTIVITY_TIMEOUT = 3000; // 3 seconds of no activity = likely done
let apiActivityCheckInterval = null;
// Set up response interceptor
page.on('response', async (response) => {
try {
const url = response.url();
// Check if this response matches our API criteria
const isAPI = await isAPIResponse(response, {
apiPatterns: input.apiPatterns,
minResponseSize: input.minResponseSize || 1000,
});
if (isAPI) {
lastAPIActivityTime.value = Date.now();
log.info(`Potential API endpoint found: ${url}`);
// Cache response body for later use
try {
const body = await response.body();
responseBodyCache.set(url, body);
}
catch (error) {
// If we can't cache, continue anyway
log.debug(`Could not cache response body for ${url}`);
}
// Extract API metadata (will use cached body if available)
const cachedBody = responseBodyCache.get(url);
const apiMetadata = await extractAPIMetadata(response, input, cachedBody);
if (apiMetadata) {
// Check if we already discovered this API (O(1) lookup)
if (!discoveredBaseUrls.has(apiMetadata.baseUrl)) {
discoveredBaseUrls.add(apiMetadata.baseUrl);
discoveredAPIs.push(apiMetadata);
// Capture response example for TypeScript types
// Use cached body to avoid double-read (Playwright limitation)
try {
if (cachedBody) {
const json = JSON.parse(cachedBody.toString());
responseExamples.set(apiMetadata.url, json);
}
else {
// If not cached, we already read it above, so skip
log.debug(`Response body not cached for ${url}, skipping example capture`);
}
}
catch (error) {
// If we can't parse, continue without example
log.debug(`Could not capture response example for ${url}`);
}
log.info(`Discovered API: ${apiMetadata.baseUrl}`, {
method: apiMetadata.method,
hasPagination: !!apiMetadata.paginationInfo,
dataPath: apiMetadata.dataPath,
});
}
}
}
}
catch (error) {
// Log but don't fail on individual response errors
log.debug(`Error processing response: ${error}`);
}
});
// Navigate to the page with optimized wait strategy
try {
// Use 'domcontentloaded' for faster initial load, then wait for network activity
await page.goto(request.url, {
waitUntil: 'domcontentloaded',
timeout: input.discoveryTimeout || 10000,
});
// Wait for network activity with early exit
const startTime = Date.now();
const maxWaitTime = Math.min((input.discoveryTimeout || 10000) - 2000, 8000); // Reserve 2s buffer
// Monitor for API activity
apiActivityCheckInterval = setInterval(() => {
const timeSinceLastActivity = Date.now() - lastAPIActivityTime.value;
if (timeSinceLastActivity > API_ACTIVITY_TIMEOUT && discoveredAPIs.length > 0) {
// We have APIs and no recent activity - likely done
log.debug('No recent API activity, proceeding with discovered APIs');
}
}, 500);
// Wait for network idle or timeout, but with shorter timeout
try {
await page.waitForLoadState('networkidle', {
timeout: maxWaitTime,
});
}
catch (error) {
// Network idle timeout is OK - we might have captured APIs already
log.debug('Network idle timeout, proceeding with discovered APIs');
}
// Short wait for any final lazy-loaded calls (reduced from 2s to 1s)
await page.waitForTimeout(1000);
// If no APIs discovered yet and interaction simulation is enabled, try deep fuzzing
if (discoveredAPIs.length === 0 && input.enableInteractionSimulation !== false) {
log.info('No APIs discovered on initial load. Attempting deep interaction fuzzing...');
// Use deep interaction fuzzer for complex SPAs
await deepInteractionFuzzing(page, log, {
interactionDelayMs: input.interactionWaitTime || 500,
});
// Wait a bit more for any APIs triggered by fuzzing
await page.waitForTimeout(2000);
// If still no APIs, try basic interaction simulation as fallback
if (discoveredAPIs.length === 0) {
log.info('Deep fuzzing did not discover APIs. Attempting basic interaction simulation...');
await simulateInteractions(page, log, lastAPIActivityTime, discoveredAPIs, discoveredBaseUrls, responseBodyCache, responseExamples, input);
}
}
else if (input.enableInteractionSimulation !== false && discoveredAPIs.length > 0) {
// Even if we found some APIs, run deep fuzzing to find more hidden ones
log.info('APIs discovered, but running deep fuzzing to find additional hidden APIs...');
await deepInteractionFuzzing(page, log, {
interactionDelayMs: input.interactionWaitTime || 500,
});
await page.waitForTimeout(2000);
}
}
catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
log.warning(`Error navigating to page: ${errorMessage}`);
// Provide helpful error messages based on error type
if (errorMessage.includes('net::ERR_NAME_NOT_RESOLVED') ||
errorMessage.includes('net::ERR_NAME_RESOLUTION_FAILED') ||
errorMessage.includes('getaddrinfo ENOTFOUND')) {
log.error(`❌ Domain not found: ${request.url}`);
log.warning('💡 This domain does not exist or is unreachable. Please check the URL.');
}
else if (errorMessage.includes('timeout') || errorMessage.includes('Navigation timeout')) {
log.warning(`⏱️ Navigation timeout for: ${request.url}`);
log.warning('💡 The page took too long to load. Consider increasing discoveryTimeout.');
}
else if (errorMessage.includes('net::ERR_CONNECTION_REFUSED')) {
log.error(`❌ Connection refused: ${request.url}`);
log.warning('💡 The server refused the connection. The service may be down.');
}
else if (errorMessage.includes('net::ERR_SSL')) {
log.warning(`🔒 SSL error for: ${request.url}`);
log.warning('💡 There may be an SSL certificate issue with this URL.');
}
// Continue anyway - we might have captured some responses before the error
// Return early to avoid processing if navigation completely failed
if (discoveredAPIs.length === 0) {
log.warning('⚠️ No APIs discovered due to navigation error. Skipping this URL.');
return;
}
}
finally {
// Clean up interval
if (apiActivityCheckInterval) {
clearInterval(apiActivityCheckInterval);
}
}
// If no APIs were discovered, log a warning with helpful suggestions
if (discoveredAPIs.length === 0) {
log.warning('No API endpoints discovered. The page might not use API calls, or they might be triggered by user interaction.');
log.warning('💡 Tip: Try enabling interaction simulation or provide a direct API endpoint URL instead of a landing page.');
return;
}
// Convert WebSocket connections to DiscoveredAPI format
for (const ws of webSocketConnections) {
const wsAPI = {
url: ws.url,
baseUrl: ws.url,
method: 'GET', // WebSocket uses GET for initial handshake
headers: {},
isWebSocket: true,
webSocketUrl: ws.url,
webSocketProtocols: ws.protocols,
};
// Check if we already discovered this WebSocket
if (!discoveredBaseUrls.has(wsAPI.baseUrl)) {
discoveredBaseUrls.add(wsAPI.baseUrl);
discoveredAPIs.push(wsAPI);
log.info(`Discovered WebSocket API: ${ws.url}`);
}
}
// Enqueue discovered APIs for processing
// Access the shared requestQueue from the crawler to ensure HttpCrawler can pick up requests
const requestQueue = crawler.requestQueue;
for (const api of discoveredAPIs) {
const initialUserData = {
page: api.paginationInfo?.currentPage || 1,
offset: api.paginationInfo?.currentOffset || 0,
cursor: api.paginationInfo?.nextCursor,
totalRecords: api.paginationInfo?.totalRecords,
totalPages: api.paginationInfo?.totalPages,
discoveredAPI: api,
};
// Merge OAuth headers with API headers if OAuth flow was used
const mergedHeaders = { ...api.headers, ...oauthAuthHeaders };
// Add the first API request directly to the shared requestQueue
// This ensures HttpCrawler can pick it up
if (requestQueue) {
await requestQueue.addRequest({
url: api.baseUrl,
label: REQUEST_LABELS.API_PROCESS,
userData: initialUserData,
headers: mergedHeaders,
});
}
else {
// Fallback to crawler.addRequests if requestQueue not accessible
await crawler.addRequests([
{
url: api.baseUrl,
label: REQUEST_LABELS.API_PROCESS,
userData: initialUserData,
headers: mergedHeaders,
},
]);
}
log.info(`Enqueued API request: ${api.baseUrl}`, {
label: REQUEST_LABELS.API_PROCESS,
initialPage: initialUserData.page,
});
}
log.info(`Discovery complete. Found ${discoveredAPIs.length} API endpoint(s).`);
// Record statistics
const statistics = getStatistics();
if (statistics && discoveredAPIs.length > 0) {
statistics.recordDocumentation(discoveredAPIs.length);
}
// Generate API documentation exports if requested
const shouldGenerateDocs = input.generateDocumentation !== false &&
(input.exportFormats && input.exportFormats.length > 0);
if (shouldGenerateDocs && discoveredAPIs.length > 0) {
log.info('Generating API documentation exports...');
const baseUrl = discoveredAPIs[0]?.baseUrl
? new URL(discoveredAPIs[0].baseUrl).origin
: undefined;
const exports = generateExports(discoveredAPIs, input.exportFormats || ['openapi', 'postman', 'curl'], baseUrl, responseExamples);
// Save exports to dataset
for (const exportData of exports) {
await Dataset.pushData({
_type: 'api_documentation',
format: exportData.format,
filename: exportData.filename,
content: exportData.content,
mimeType: exportData.mimeType,
apiCount: discoveredAPIs.length,
generatedAt: new Date().toISOString(),
sourceUrl: request.url,
});
log.info(`Generated ${exportData.format.toUpperCase()} export: ${exportData.filename}`);
}
// Generate code snippets for all APIs (DEVELOPER DREAM FEATURE!)
log.info('Generating code snippets in multiple languages...');
const codeSnippets = generateAllCodeSnippets(discoveredAPIs);
await Dataset.pushData({
_type: 'code_snippets',
snippets: codeSnippets,
languages: Object.values(codeSnippets)[0]?.map(s => s.language) || [],
totalApis: discoveredAPIs.length,
generatedAt: new Date().toISOString(),
sourceUrl: request.url,
});
log.info(`Generated code snippets for ${discoveredAPIs.length} API(s) in 10 languages`);
// Generate TypeScript type definitions (DEVELOPER DREAM FEATURE!)
log.info('Generating TypeScript type definitions...');
const tsTypes = generateTypeScriptDeclarationFile(discoveredAPIs, responseExamples);
await Dataset.pushData({
_type: 'typescript_types',
content: tsTypes,
filename: 'api-types.d.ts',
mimeType: 'text/typescript',
totalApis: discoveredAPIs.length,
generatedAt: new Date().toISOString(),
sourceUrl: request.url,
});
log.info(`Generated TypeScript types for ${discoveredAPIs.length} API(s)`);
// Save response examples for documentation
if (responseExamples.size > 0) {
log.info('Saving request/response examples...');
const examples = [];
for (const api of discoveredAPIs) {
const example = responseExamples.get(api.url);
if (example) {
examples.push({
apiUrl: api.baseUrl,
method: api.method,
request: api.body,
response: example,
});
}
}
await Dataset.pushData({
_type: 'api_examples',
examples,
totalApis: examples.length,
generatedAt: new Date().toISOString(),
sourceUrl: request.url,
});
log.info(`Saved ${examples.length} request/response example(s)`);
statistics?.recordExamples(examples.length);
}
// Generate test suites (DEVELOPER DREAM FEATURE!)
log.info('Generating test suites...');
const testSuites = generateAllTestSuites(discoveredAPIs, baseUrl);
await Dataset.pushData({
_type: 'test_suites',
suites: testSuites,
frameworks: ['jest', 'pytest', 'mocha', 'vitest', 'playwright'],
totalApis: discoveredAPIs.length,
generatedAt: new Date().toISOString(),
sourceUrl: request.url,
});
log.info(`Generated test suites for ${discoveredAPIs.length} API(s) in 5 frameworks`);
// Generate SDK packages (DEVELOPER DREAM FEATURE!)
log.info('Generating SDK packages...');
const sdkPackages = generateSDKPackages(discoveredAPIs, undefined, baseUrl);
for (const sdk of sdkPackages) {
await Dataset.pushData({
_type: 'sdk_package',
language: sdk.language,
packageName: sdk.packageName,
files: sdk.files,
description: sdk.description,
totalApis: discoveredAPIs.length,
generatedAt: new Date().toISOString(),
sourceUrl: request.url,
});
}
log.info(`Generated ${sdkPackages.length} SDK package(s) (TypeScript, Python, Go)`);
// Also save a summary of discovered APIs
await Dataset.pushData({
_type: 'api_summary',
totalApis: discoveredAPIs.length,
apis: discoveredAPIs.map((api) => ({
url: api.baseUrl,
method: api.method,
hasPagination: !!api.paginationInfo,
dataPath: api.dataPath,
})),
generatedAt: new Date().toISOString(),
sourceUrl: request.url,
});
}
}
//# sourceMappingURL=discovery-handler.js.map