@devmehq/open-graph-extractor
Version:
Fast, lightweight Open Graph, Twitter Card, and structured data extractor for Node.js with caching and validation
121 lines (120 loc) • 3.75 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.extractOpenGraphBulk = extractOpenGraphBulk;
const index_1 = require("./index");
/**
* Create a new concurrency limiter state
*/
function createConcurrencyLimiter(concurrency) {
return {
running: 0,
queue: [],
concurrency,
};
}
/**
* Run a function with concurrency control
*/
async function runWithConcurrency(state, fn) {
while (state.running >= state.concurrency) {
await new Promise((resolve) => state.queue.push(resolve));
}
state.running++;
try {
return await fn();
}
finally {
state.running--;
const next = state.queue.shift();
if (next)
next();
}
}
/**
* Extract Open Graph data from multiple URLs with concurrency control
*/
async function extractOpenGraphBulk(options, extractOptions) {
const { urls, concurrency = 5, rateLimit, onProgress, onError, continueOnError = true } = options;
// Create concurrency limiter
const limiter = createConcurrencyLimiter(concurrency);
// Rate limiting setup
let requestCount = 0;
let windowStart = Date.now();
const checkRateLimit = async () => {
if (rateLimit) {
requestCount++;
const elapsed = Date.now() - windowStart;
if (requestCount >= rateLimit.requests) {
if (elapsed < rateLimit.window) {
// Wait for the remaining time in the window
const waitTime = rateLimit.window - elapsed;
await new Promise((resolve) => setTimeout(resolve, waitTime));
}
// Reset the window
requestCount = 0;
windowStart = Date.now();
}
}
};
const results = [];
let completed = 0;
// Process URLs
const promises = urls.map((url) => runWithConcurrency(limiter, async () => {
const startTime = Date.now();
const result = {
url,
success: false,
timestamp: new Date(),
duration: 0,
};
try {
// Check rate limit before making request
await checkRateLimit();
// Extract Open Graph data
const data = await (0, index_1.extractOpenGraphAsync)(url, extractOptions);
result.success = true;
result.data = data;
}
catch (error) {
result.success = false;
result.error = error instanceof Error ? error : new Error(String(error));
if (onError) {
onError(url, result.error);
}
if (!continueOnError) {
throw result.error;
}
}
finally {
result.duration = Date.now() - startTime;
results.push(result);
completed++;
if (onProgress) {
onProgress(completed, urls.length, url);
}
}
return result;
}));
try {
await Promise.all(promises);
}
catch (error) {
// If continueOnError is false, we'll get here
// Results array will contain partial results
}
// Calculate summary statistics
const successful = results.filter((r) => r.success).length;
const failed = results.length - successful;
const totalDuration = results.reduce((sum, r) => sum + r.duration, 0);
const averageDuration = results.length > 0 ? totalDuration / results.length : 0;
return {
results,
summary: {
total: results.length,
successful,
failed,
totalDuration,
averageDuration,
},
};
}