website-scrap-engine
Version:
Configurable website scraper in typescript
92 lines • 3.2 kB
JavaScript
import { createHash } from 'node:crypto';
const forbiddenChar = /[:*?"<>|&]|%3A|%2A|%3F|%22|%3C|%3E|%7C|%26/ig;
export const sleep = (ms) => new Promise(r => setTimeout(r, ms | 0));
export const escapePath = (str) => str && str.replace(forbiddenChar, '_');
export const isSiteMap = (url) => url &&
url.includes('/sitemaps/') &&
(url.endsWith('sitemap.xml') || url.endsWith('sitemap_other.xml'));
export const arrayToMap = (array, freeze) => {
const obj = {};
for (const item of array) {
obj[item] = 1;
}
return freeze ? Object.freeze(obj) : obj;
};
export const toString = (body, encoding) => {
let stringValue;
if (Buffer.isBuffer(body)) {
stringValue = body.toString(encoding || 'utf8');
}
else if (ArrayBuffer.isView(body)) {
// note: this would not copy the buffer
stringValue = Buffer.from(body.buffer, body.byteOffset, body.byteLength)
.toString(encoding || 'utf8');
}
else if (body instanceof ArrayBuffer) {
// note: this would not copy the buffer
stringValue = Buffer.from(body).toString(encoding || 'utf8');
}
else {
stringValue = body;
}
return stringValue;
};
export const importDefaultFromPath = (path) => {
return import(path).then(mod => {
return mod.default || mod;
});
};
export const orderUrlSearch = (search) => {
const parts = (search[0] === '?' ? search.slice(1) : search)
.split('&');
const searchKeys = [], searchMap = {};
let searchParam, searchKey;
for (let i = 0; i < parts.length; i++) {
searchParam = parts[i].split('=');
if (searchMap[searchKey = searchParam.shift() || parts[i]]) {
searchMap[searchKey].push(searchParam.join('='));
}
else {
searchKeys.push(searchKey);
searchMap[searchKey] = [searchParam.join('=')];
}
}
return '?' + searchKeys
.sort()
.map(k => { var _a; return (_a = searchMap[k]) === null || _a === void 0 ? void 0 : _a.map(v => k + '=' + v).join('&'); })
.join('&');
};
export const simpleHashString = (str) => createHash('sha256')
.update(str)
.digest()
.toString('base64')
// making it url-safe
.replace(/\+/g, '-')
.replace(/\//g, '_')
.replace(/=/g, '');
export const hasOwnProperty = Object.prototype.hasOwnProperty;
/**
* Merge values from source to target only if key not exists in target
* Note that using this function against incompatible type or null | undefined
* may lead to typescript parser errors.
*/
export const weakAssign = (target, source) => {
if (!target)
return Object.assign({}, source);
if (!source)
return target;
for (const key in source) {
if (hasOwnProperty.call(source, key) &&
!hasOwnProperty.call(target, key)) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
Reflect.set(target, key, source[key]);
}
}
return target;
};
/**
* Test if the given url is http url
* @param url
*/
export const isUrlHttp = (url) => url.startsWith('http://') || url.startsWith('https://');
//# sourceMappingURL=util.js.map