shopify-product-scraper
Version:
A lightweight JavaScript library for scraping products from Shopify stores. This package provides a simple and efficient way to extract product data from Shopify websites.
69 lines (66 loc) • 1.81 kB
JavaScript
var __async = (__this, __arguments, generator) => {
return new Promise((resolve, reject) => {
var fulfilled = (value) => {
try {
step(generator.next(value));
} catch (e) {
reject(e);
}
};
var rejected = (value) => {
try {
step(generator.throw(value));
} catch (e) {
reject(e);
}
};
var step = (x) => x.done ? resolve(x.value) : Promise.resolve(x.value).then(fulfilled, rejected);
step((generator = generator.apply(__this, __arguments)).next());
});
};
// src/url.ts
var shopifyIndicators = [
/shopify\.com/i,
/cdn\.shopify\.com/i,
/Shopify\.Buy\.SDK/i,
/var Shopify =/i,
/{{ '.*' | asset_url }}/i
];
function isValidURL(url) {
return __async(this, null, function* () {
try {
const html = yield fetch(url).then((res) => res.text());
return shopifyIndicators.some((pattern) => pattern.test(html));
} catch (e) {
return false;
}
});
}
// src/index.ts
function scrape(_0) {
return __async(this, arguments, function* (url, { limit = Infinity, onProgress } = {}) {
var _a;
const isValid = yield isValidURL(url);
if (!isValid) return null;
const data = [];
let page = 0;
while (data.length < limit) {
const pageLimit = Math.min(250, limit - data.length);
const requestedData = yield fetch(
`${url}/products.json/?limit=${pageLimit}&page=${page}`
).then((res) => res.json());
if (!((_a = requestedData == null ? void 0 : requestedData.products) == null ? void 0 : _a.length)) break;
data.push(...requestedData.products);
if (onProgress)
onProgress({
progress: data.length / limit,
products: data
});
page++;
}
return data;
});
}
export {
scrape
};