@crawlee/utils
Version:
A set of shared utilities that can be used by crawlers
221 lines • 8.07 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.getCurrentCpuTicks = getCurrentCpuTicks;
exports.getCpuQuota = getCpuQuota;
exports.getCpuPeriod = getCpuPeriod;
exports.getContainerCpuUsage = getContainerCpuUsage;
exports.getSystemCpuUsage = getSystemCpuUsage;
exports.sampleCpuUsage = sampleCpuUsage;
exports.getCurrentCpuTicksV2 = getCurrentCpuTicksV2;
const tslib_1 = require("tslib");
const node_child_process_1 = require("node:child_process");
const promises_1 = require("node:fs/promises");
const node_os_1 = tslib_1.__importDefault(require("node:os"));
const log_1 = tslib_1.__importDefault(require("@apify/log"));
const general_1 = require("../general");
const CPU_FILE_PATHS = {
STAT: {
V1: '/sys/fs/cgroup/cpuacct/cpuacct.usage',
V2: '/sys/fs/cgroup/cpu.stat',
},
QUOTA: {
V1: '/sys/fs/cgroup/cpu/cpu.cfs_quota_us',
V2: '/sys/fs/cgroup/cpu.max',
},
PERIOD: {
V1: '/sys/fs/cgroup/cpu/cpu.cfs_period_us',
V2: '/sys/fs/cgroup/cpu.max',
},
};
let CLOCK_TICKS_PER_SECOND = 100;
let CLOCK_TICKS_CHECKED = false;
const NANOSECONDS_PER_SECOND = 1e9;
const previousTicks = { idle: 0, total: 0 };
/**
* Gets the "bare metal" cpu load.
* Used in
* - AWS Lambda
* - Containers without a cGroup quota
* - Uncontainerized environments
* @returns a number between 0 and 1 for the cpu load
* @internal
*/
function getCurrentCpuTicks() {
const cpusCores = node_os_1.default.cpus();
const ticks = cpusCores.reduce((acc, cpu) => {
const cpuTimes = Object.values(cpu.times);
return {
idle: acc.idle + cpu.times.idle,
total: acc.total + cpuTimes.reduce((sum, num) => sum + num),
};
}, { idle: 0, total: 0 });
const idleTicksDelta = ticks.idle - previousTicks.idle;
const totalTicksDelta = ticks.total - previousTicks.total;
return totalTicksDelta ? 1 - idleTicksDelta / totalTicksDelta : 0;
}
/**
* Reads the linux tick rate
* @returns the number of ticks per second
*/
function getClockTicks() {
try {
const result = (0, node_child_process_1.execSync)('getconf CLK_TCK').toString().trim();
return parseInt(result, 10);
}
catch (err) {
log_1.default.warningOnce('Failed to get clock ticks; defaulting to 100');
return 100;
}
}
/**
* Reads the cgroup cpu quota.
* In V1, a quota of -1 means “unlimited.”
* In V2, a first field of "max" means unlimited.
* @param cgroupsVersion the cGroup version
* @returns The Cpu Quota
* @internal
*/
async function getCpuQuota(cgroupsVersion) {
if (cgroupsVersion === 'V1') {
const quotaStr = await (0, promises_1.readFile)(CPU_FILE_PATHS.QUOTA.V1, 'utf8');
const quota = parseInt(quotaStr.trim(), 10);
return quota === -1 ? null : quota;
}
// cgroup v2
const maxStr = await (0, promises_1.readFile)(CPU_FILE_PATHS.QUOTA.V2, 'utf8');
const parts = maxStr.trim().split(/\s+/);
if (parts[0] === 'max') {
return null;
}
return parseInt(parts[0], 10);
}
/**
* Reads the cgroup cpu period.
* @param cgroupsVersion the cGroup version
* @returns The Cpu quota period
* @internal
*/
async function getCpuPeriod(cgroupsVersion) {
if (cgroupsVersion === 'V1') {
const quotaStr = await (0, promises_1.readFile)(CPU_FILE_PATHS.PERIOD.V1, 'utf8');
const quota = parseInt(quotaStr.trim(), 10);
return quota;
}
// cgroup v2
const maxStr = await (0, promises_1.readFile)(CPU_FILE_PATHS.PERIOD.V2, 'utf8');
const parts = maxStr.trim().split(/\s+/);
return parseInt(parts[1], 10);
}
/**
* Reads the cgroup cpu usage of the container
*
* @param cgroupsVersion the cGroup version
* @returns the cpu usage
* @internal
*/
async function getContainerCpuUsage(cgroupsVersion) {
if (cgroupsVersion === 'V1') {
const data = await (0, promises_1.readFile)(CPU_FILE_PATHS.STAT.V1, 'utf8');
return Number(data.trim());
}
// cgroup v2
const data = await (0, promises_1.readFile)(CPU_FILE_PATHS.STAT.V2, 'utf8');
const lines = data.split('\n');
let usageUsec = 0;
for (const line of lines) {
const parts = line.trim().split(/\s+/);
if (parts[0] === 'usage_usec') {
usageUsec = Number(parts[1]);
break;
}
}
// Convert microseconds to nanoseconds.
return usageUsec * 1000;
}
/**
* Reads the cgroup cpu usage of the system from cgroup
*
* @returns the cpu usage
* @internal
*/
async function getSystemCpuUsage() {
const statData = await (0, promises_1.readFile)('/proc/stat', 'utf8');
const lines = statData.split('\n');
for (const line of lines) {
if (line.startsWith('cpu ')) {
// Split the line and extract the first seven numeric fields:
// user, nice, system, idle, iowait, irq, softirq
const parts = line.split(/\s+/).slice(1, 8);
let totalTicks = 0;
for (const part of parts) {
totalTicks += Number(part);
}
// Convert clock ticks to nanoseconds.
return (totalTicks * NANOSECONDS_PER_SECOND) / CLOCK_TICKS_PER_SECOND;
}
}
throw new Error('no cpu line'); // shouldnt ever happen
}
/**
* Takes a CPU usage sample for both the container and the system.
*
* @returns An object containing the container and system CPU usage.
* @internal
*/
async function sampleCpuUsage(cGroupsVersion) {
const [containerUsage, systemUsage] = await Promise.all([
getContainerCpuUsage(cGroupsVersion),
getSystemCpuUsage(),
]);
return { containerUsage, systemUsage };
}
let previousSample = { containerUsage: 0, systemUsage: 0 };
/**
* Gets the cpu usage of the system.
* If the crawler is running in a containerized environment, crawlee will check for a cgroup enforced cpu limit.
* If a cgroup limit is found, it will be taken as the maximum load against which the current load will be gauged.
* @returns a number between 0 and 1 for the cpu load
* @internal
*/
async function getCurrentCpuTicksV2(containerized = false) {
try {
// if not containerized
if (!containerized) {
// bare metal cpu limit
return getCurrentCpuTicks();
}
if (!CLOCK_TICKS_CHECKED) {
CLOCK_TICKS_PER_SECOND = getClockTicks();
CLOCK_TICKS_CHECKED = true;
}
const cgroupsVersion = await (0, general_1.getCgroupsVersion)();
// if cgroup is not detected, return bare metal cpu limit
if (cgroupsVersion === null) {
log_1.default.deprecated('Your environment is containerized, but your system does not support cgroups.\n' +
"If you're running containers with limited cpu, cpu auto-scaling will not work properly.");
return getCurrentCpuTicks();
}
// cgroup aware cpu limit. If no limits are set, default to returning getCurrentCpuTicks.
const quota = await getCpuQuota(cgroupsVersion);
if (quota === null) {
// no cgroup limit, return host cpu load
return getCurrentCpuTicks();
}
const period = await getCpuPeriod(cgroupsVersion);
// eg. having a 200000us quots per 100000us means the cGroup can fully use 2 cores
const cpuAllowance = quota / period;
const sample = await sampleCpuUsage(cgroupsVersion);
const containerDelta = sample.containerUsage - previousSample.containerUsage;
const systemDelta = sample.systemUsage - previousSample.systemUsage;
previousSample = sample;
const numCpus = node_os_1.default.cpus().length;
// Calculate the CPU usage percentage.
return ((containerDelta / systemDelta) * numCpus) / cpuAllowance;
}
catch (err) {
// if anything fails, default to bare metal metrics
log_1.default.exception(err, 'Cpu snapshot failed.');
return getCurrentCpuTicks();
}
}
//# sourceMappingURL=cpu-info.js.map