@internwave/scrapers-api
Version:
A helper API for web scrapers in the InternWave desktop application
798 lines (787 loc) • 20.2 kB
JavaScript
;
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
var __async = (__this, __arguments, generator) => {
return new Promise((resolve, reject) => {
var fulfilled = (value) => {
try {
step(generator.next(value));
} catch (e) {
reject(e);
}
};
var rejected = (value) => {
try {
step(generator.throw(value));
} catch (e) {
reject(e);
}
};
var step = (x) => x.done ? resolve(x.value) : Promise.resolve(x.value).then(fulfilled, rejected);
step((generator = generator.apply(__this, __arguments)).next());
});
};
// src/index.ts
var src_exports = {};
__export(src_exports, {
API: () => API,
ChartType: () => ChartType,
MessageType: () => MessageType,
ProgressReporter: () => ProgressReporter,
ScraperInputType: () => ScraperInputType,
getRandomJob: () => getRandomJob,
onStartScraping: () => onStartScraping
});
module.exports = __toCommonJS(src_exports);
// src/ipc/types/message/message.ts
var MessageType = /* @__PURE__ */ ((MessageType2) => {
MessageType2["START_SCRAPING"] = "START_SCRAPING";
MessageType2["REPORT_SCRAPING_ERROR"] = "REPORT_SCRAPING_ERROR";
MessageType2["REPORT_SCRAPING_ACTION_REQUEST"] = "REPORT_SCRAPING_ACTION_REQUEST";
MessageType2["REPORT_SCRAPING_PROGRESS"] = "REPORT_SCRAPING_PROGRESS";
MessageType2["SEND_REPORT_SCRAPING_INPUT_REQUEST"] = "REPORT_SCRAPING_INPUT_REQUEST";
MessageType2["RCV_REPORT_SCRAPING_INPUT_REQUEST"] = "RCV_REPORT_SCRAPING_INPUT_REQUEST";
MessageType2["DONE_SCRAPING"] = "DONE_SCRAPING";
return MessageType2;
})(MessageType || {});
// src/ipc/reportScraping/reportDone.ts
var reportDone = (jobs) => {
var _a;
const msg = {
type: "DONE_SCRAPING" /* DONE_SCRAPING */,
payload: {
jobs
}
};
(_a = process.send) == null ? void 0 : _a.call(process, msg);
};
// src/ipc/reportScraping/reportError.ts
var reportError = (message) => {
var _a;
const msg = {
type: "REPORT_SCRAPING_ERROR" /* REPORT_SCRAPING_ERROR */,
payload: {
message
}
};
(_a = process.send) == null ? void 0 : _a.call(process, msg);
};
// src/ipc/utils/messageApp.ts
var messageApp = (handleResponse, initMessage) => __async(void 0, null, function* () {
return new Promise((resolve, reject) => {
if (!process.send) {
reject(new Error("No IPC channel available"));
return;
}
const messageHandler = (message) => {
process.off("message", messageHandler);
handleResponse(message, resolve, reject);
};
process.on("message", messageHandler);
process.send(initMessage);
});
});
// src/ipc/ProgressReporter/ProgressReporter.ts
var ProgressReporter = class {
constructor(_totalSteps) {
this._totalSteps = _totalSteps;
this._currentStep = 0;
}
nextStep(message, workUnits, initWorkUnits) {
this._currentStep += 1;
this._workReporter = new WorkReporter(
workUnits,
(percentage, callbackMsg) => {
var _a;
const msg = {
type: "REPORT_SCRAPING_PROGRESS" /* REPORT_SCRAPING_PROGRESS */,
payload: {
message: callbackMsg != null ? callbackMsg : message,
percentage,
totalSteps: this._totalSteps,
currentStep: this._currentStep
}
};
(_a = process.send) == null ? void 0 : _a.call(process, msg);
},
initWorkUnits
);
}
reportProgress(message) {
var _a;
(_a = this._workReporter) == null ? void 0 : _a.reportProgress(message);
}
requestAction(message) {
var _a;
const msg = {
type: "REPORT_SCRAPING_ACTION_REQUEST" /* REPORT_SCRAPING_ACTION_REQUEST */,
payload: {
message
}
};
(_a = process.send) == null ? void 0 : _a.call(process, msg);
}
requestInput(inputs, message) {
const initMsg = {
type: "RCV_REPORT_SCRAPING_INPUT_REQUEST" /* RCV_REPORT_SCRAPING_INPUT_REQUEST */,
payload: {
inputs,
message
}
};
return messageApp(
(message2, resolve, reject) => {
if (message2.type === "REPORT_SCRAPING_INPUT_REQUEST" /* SEND_REPORT_SCRAPING_INPUT_REQUEST */) {
resolve(message2.payload.values);
} else {
reject(new Error("Unexpected message type"));
}
},
initMsg
);
}
};
var WorkReporter = class {
constructor(_totalWork, _callback, initWorkUnits) {
this._totalWork = _totalWork;
this._callback = _callback;
this._currentPercentage = 0;
this._currentWork = 0;
this._currentPercentage = 0;
this._currentWork = initWorkUnits != null ? initWorkUnits : 0;
this._callback(Math.min(100, Math.floor(this._currentWork / this._totalWork * 100)));
}
reportProgress(message) {
this._currentWork += 1;
const newPercentage = Math.min(100, Math.floor(this._currentWork / this._totalWork * 100));
if (newPercentage > this._currentPercentage) {
this._currentPercentage = newPercentage;
this._callback(newPercentage, message);
}
}
};
// src/ipc/onStartScraping/onStartScraping.ts
var onStartScraping = (totalSteps) => (callback) => {
const progressReporter = new ProgressReporter(totalSteps);
process.on("message", (message) => __async(void 0, null, function* () {
try {
reportDone(yield callback(message.payload.optionValues, progressReporter));
} catch (e) {
if (e instanceof Error) {
reportError(e.message);
} else if (typeof e === "string") {
reportError(e);
}
}
}));
};
// src/jobs/types/Charts.ts
var ChartType = /* @__PURE__ */ ((ChartType2) => {
ChartType2[ChartType2["PIE"] = 0] = "PIE";
return ChartType2;
})(ChartType || {});
// src/jobs/utils/getRandomJob.ts
var getRandomJob = (index) => {
const id = index.toString();
return {
id,
url: Math.random() > 0.5 ? "uwwave.ca" : void 0,
company: getRandomCompany(),
jobTitle: getRandomJobTitle(),
openings: Math.floor(Math.random() * 100),
applications: Math.floor(Math.random() * 1e4),
location: getRandomLocation(),
dates: {
postedAt: getRandomDatetime(),
deadlineAt: getRandomDatetime(),
startAt: getRandomDatetime(),
endAt: getRandomDatetime(),
duration: getRandomDuration()
},
categorizations: {
technologies: getRandomTechnologies(),
skills: [],
industries: [],
applicationDocuments: getRandomApplicationDocuments(),
specialRequirements: getRandomSpecialRequirements(),
compensationAndBenefits: []
},
descriptions: getRandomDescriptions(),
charts: getRandomCharts(),
jobType: getRandomJobType(),
salaries: getRandomSalaries()
};
};
var getRandomCompany = () => {
var _a;
const names = [
"Facebook",
"Shopify",
"Faire",
"Google",
"Workday",
"Bloomberg",
"Blackberry",
"IBM",
"Amazon",
"Microsoft",
"Tesla",
"Apple",
"Netflix",
"Twitter",
"Uber",
"Lyft",
"Airbnb",
"Pinterest",
"Reddit",
"Slack",
"Zoom",
"Salesforce",
"Dropbox"
];
const name = (_a = names[Math.floor(Math.random() * names.length)]) != null ? _a : "Facebook";
return {
name
};
};
var getRandomJobTitle = () => {
var _a;
const jobTitles = [
"Software Engineer",
"Product Manager",
"Data Scientist",
"UX Designer",
"UI Designer",
"Frontend Developer",
"Backend Developer",
"Fullstack Developer",
"DevOps Engineer",
"QA Engineer",
"Technical Writer",
"Technical Support",
"Technical Recruiter",
"Technical Program Manager",
"Technical Account Manager",
"Technical Sales",
"Technical Marketing",
"Technical Trainer",
"Technical Consultant",
"Technical Architect",
"Technical Analyst",
"Technical Specialist",
"Technical Director",
"Technical Lead",
"Technical Advisor"
];
return (_a = jobTitles[Math.floor(Math.random() * jobTitles.length)]) != null ? _a : "Software Engineer";
};
var getRandomLocation = () => {
const locationTypes = ["Onsite", "remote", "hybrid", void 0];
const countries = [
"Canada",
"United States",
"United Kingdom",
"Germany",
"France",
"Italy",
"Spain",
"Australia",
"Japan",
"China",
"India",
"Mexico",
"Russia",
"South Korea",
"Egypt",
void 0
];
const cities = [
"Toronto",
"Vancouver",
"Montreal",
"Calgary",
"Ottawa",
"Edmonton",
"Winnipeg",
"Quebec City",
"Halifax",
"Victoria",
"Regina",
"Saskatoon",
"Charlottetown",
"St. John's",
"Whitehorse",
"Yellowknife",
"Iqaluit",
void 0
];
const regions = [
"Ontario",
"British Columbia",
"Quebec",
"Alberta",
"Manitoba",
"Saskatchewan",
"Nova Scotia",
"New Brunswick",
"Prince Edward Island",
"Newfoundland and Labrador",
"Northwest Territories",
"Yukon",
"Nunavut",
void 0
];
const states = [
"California",
"Texas",
"Florida",
"New York",
"Pennsylvania",
"Illinois",
"Ohio",
"Georgia",
"North Carolina",
"Michigan",
"New Jersey",
"Virginia",
"Washington",
"Arizona",
"Massachusetts",
"Tennessee",
"Indiana",
"Missouri",
"Maryland",
"Wisconsin",
"Colorado",
"Minnesota",
"South Carolina",
"Alabama",
"Louisiana",
"Kentucky",
"Oregon",
"Oklahoma",
"Connecticut",
"Iowa",
"Mississippi",
"Arkansas",
"Utah",
"Nevada",
"Kansas",
"New Mexico",
"Nebraska",
"West Virginia",
"Idaho",
"Hawaii",
"Maine",
"New Hampshire",
"Montana",
"Rhode Island",
"Delaware",
"South Dakota",
"North Dakota",
"Alaska",
void 0
];
const addresses = [
"123 Main St",
"456 Elm St",
"789 Oak St",
"101 Pine St",
"202 Maple St",
"303 Cedar St",
"404 Birch St",
"505 Walnut St",
"606 Spruce St",
"707 Pine St",
"808 Elm St",
"909 Oak St",
"1000 Main St",
"1111 Maple St",
"1222 Cedar St",
"1333 Birch St",
"1444 Walnut St",
"1555 Spruce St",
"1666 Pine St",
"1777 Elm St",
"1888 Oak St",
"1999 Main St",
void 0
];
const type = locationTypes[Math.floor(Math.random() * locationTypes.length)];
const country = countries[Math.floor(Math.random() * countries.length)];
const city = cities[Math.floor(Math.random() * cities.length)];
const region = regions[Math.floor(Math.random() * regions.length)];
const state = states[Math.floor(Math.random() * states.length)];
const address = addresses[Math.floor(Math.random() * addresses.length)];
return {
type,
country,
city,
region,
state,
address
};
};
var getRandomDatetime = () => {
const now = (/* @__PURE__ */ new Date()).getTime();
const randomTime = Math.floor(Math.random() * now);
return Math.random() > 0.5 ? void 0 : new Date(randomTime).getTime();
};
var getRandomDuration = () => {
const out = [];
if (Math.random() > 0.5) {
out.push("8 months");
}
if (Math.random() > 0.5) {
out.push("4 months");
}
if (Math.random() > 0.5) {
out.push("12 months");
}
return out;
};
var getRandomTechnologies = () => {
const out = [];
const techs = [
"React",
"Vue",
"Angular",
"Svelte",
"Ember",
"Backbone",
"jQuery",
"Vanilla JS",
"Node",
"Express",
"Koa",
"Nest",
"Fastify",
"Hapi",
"Sails",
"Meteor",
"Next",
"Nuxt",
"Gatsby",
"Gridsome",
"Sapper",
"Blitz",
"Redwood",
"React Native",
"Flutter",
"Swift",
"Kotlin",
"Java",
"C#",
"C++",
"Python",
"Ruby",
"PHP",
"Go",
"Rust",
"Scala",
"Haskell",
"Erlang",
"Clojure",
"F#",
"OCaml",
"Elm",
"Reason",
"PureScript",
"TypeScript",
"JavaScript",
"HTML",
"CSS",
"SASS",
"LESS",
"Stylus",
"PostCSS",
"Tailwind",
"Bootstrap",
"Material UI",
"Ant Design",
"Chakra UI",
"Styled Components",
"Emotion",
"Framer Motion",
"GSAP",
"D3",
"Three",
"P5",
"Canvas",
"WebGL",
"WebRTC",
"WebSockets",
"GraphQL",
"REST",
"gRPC",
"SOAP",
"JSON",
"XML",
"YAML",
"TOML",
"CSV",
"Markdown",
"LaTeX",
"SQL",
"NoSQL",
"MongoDB",
"Cassandra",
"CouchDB",
"Firebase",
"DynamoDB",
"Postgres",
"MySQL",
"MariaDB",
"SQLite",
"Redis",
"Memcached",
"Elasticsearch",
"Solr",
"Lucene",
"Algolia",
"Azure Search",
"AWS Lambda",
"Google Cloud Functions",
"Azure Functions",
"AWS EC2",
"Google Compute Engine",
"Azure Virtual Machines",
"AWS S3"
];
for (const tech of techs) {
if (Math.random() > 0.7) {
out.push(tech);
}
}
return out;
};
function getRandomApplicationDocuments() {
const out = [];
const docs = [
"Resume",
"Cover Letter",
"Portfolio",
"Transcript",
"Criminal record",
"Drivers License"
];
for (const doc of docs) {
if (Math.random() > 0.6) {
out.push(doc);
}
}
return Math.random() > 0.7 ? out : void 0;
}
function getRandomSpecialRequirements() {
const out = [];
const docs = ["Canadian Citizen", "LGTBQAIPIESF+ or minority group"];
for (const doc of docs) {
if (Math.random() > 0.6) {
out.push(doc);
}
}
return Math.random() > 0.7 ? out : void 0;
}
var getRandomDescriptions = () => {
const out = [];
const titles = [
"Description",
"Requirements",
"Responsibilities",
"Qualifications",
"Expectations",
"Perks",
"Benefits",
"Compensation",
"Culture",
"Team"
];
const descriptions = [
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed in magna id nulla molestie suscipit. Nullam nec semper turpis. Sed nec felis nec justo ultricies ultricies. Donec nec ex nec libero lacinia euismod. Nullam in ex auctor, sagittis nisl sit amet, tincidunt eros. Nulla facilisi. Nam nec nunc nec tortor aliquet venenatis. Nullam auctor, libero nec fermentum tincidunt, nunc eros tincidunt nunc, nec fermentum nisl nunc et eros. Quisque nec orci nec turpis tincidunt fermentum. Nullam nec ex nec libero lacinia euismod. Nullam in ex auctor, sagittis nisl sit amet, tincidunt eros. Nulla facilisi. Nam nec nunc nec tortor aliquet venenatis. Nullam auctor, libero nec fermentum tincidunt, nunc eros tincidunt nunc, nec fermentum nisl nunc et eros. Quisque nec orci nec turpis tincidunt fermentum.",
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed in magna id nulla molestie suscipit. Nullam nec semper turpis. Sed nec felis nec justo ultricies ultricies. Donec nec ex nec libero lacinia euismod. Nullam in ex auctor, sagittis nisl sit amet, tincidunt eros. Nulla facilisi. Nam nec nunc nec tortor aliquet venenatis. Nullam auctor, libero nec fermentum tincidunt, nunc eros tincidunt nunc, nec fermentum nisl nunc et eros. Quisque nec orci nec turpis tincidunt fermentum. Nullam nec ex nec libero lacinia euismod. Nullam in ex auctor, sagittis nisl sit amet, tincidunt eros. Nulla facilisi. Nam nec nunc nec tortor aliquet venenatis. Nullam auctor, libero nec fermentum tincidunt, nunc eros tincidunt nunc, nec fermentum nisl nunc et eros. Quisque nec orci nec turpis tincidunt fermentum.",
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed in magna id nulla molestie suscipit. Nullam nec sem"
];
for (let i = 0; i < Math.max(titles.length, descriptions.length); i++) {
if (Math.random() > 0.5) {
const title = titles[i % titles.length];
const content = descriptions[i % descriptions.length];
if (!title || !content) {
continue;
}
out.push({
title,
content,
type: "type"
});
}
}
return out;
};
var getRandomCharts = () => {
const out = {};
const chartTitles = ["Co-op term", "Faculty", "Program"];
const keys = ["A", "B", "C", "D", "E"];
for (let i = 0; i < chartTitles.length; i++) {
if (Math.random() > 0.7) {
continue;
}
const data = {};
for (let j = 0; j < keys.length; j++) {
const key = keys[j];
if (Math.random() > 0.8 || !key) {
continue;
}
data[key] = Math.floor(Math.random() * 200);
}
const chartTitle = chartTitles[i];
if (!chartTitle) {
continue;
}
out[chartTitle] = {
title: chartTitle,
data,
type: 0 /* PIE */
};
}
return out;
};
var getRandomJobType = () => {
const types = [
"Full-time",
"Part-time",
"Contract",
"Internship",
"Freelance",
"Volunteer",
"Apprenticeship"
];
const out = [];
for (const x of types) {
if (Math.random() > 0.7) {
out.push(x);
}
}
return Math.random() > 0.7 ? out : void 0;
};
var getRandomSalaries = () => {
if (Math.random() > 0.7) {
return void 0;
}
return {
salary: getRandomSalary(),
salaryMin: getRandomSalary(),
salaryMax: getRandomSalary(),
coop1Salary: getRandomSalary(),
coop2Salary: getRandomSalary(),
coop3Salary: getRandomSalary(),
coop4Salary: getRandomSalary(),
coop5Salary: getRandomSalary(),
coop6Salary: getRandomSalary(),
coop7PlusSalary: getRandomSalary()
};
};
var getRandomSalary = () => {
if (Math.random() > 0.7) {
return void 0;
}
const currencies = [
"CAD",
"USD",
"EUR",
"GBP",
"JPY",
"CNY",
"INR",
"MXN",
"RUB",
"KRW",
"EGP",
void 0,
void 0,
void 0,
void 0,
void 0
];
const periods = [
"hourly",
"weekly",
"bi-weekly",
"monthly",
"quarterly",
"uhoh",
"yearly",
void 0,
void 0,
void 0,
void 0
];
const currency = currencies[Math.floor(Math.random() * currencies.length)];
const period = periods[Math.floor(Math.random() * periods.length)];
let amount = "";
switch (period) {
case "Hourly":
amount = Math.floor(Math.random() * 100).toString();
break;
case "Weekly":
amount = Math.floor(Math.random() * 2e3).toString();
break;
case "Biweekly":
amount = Math.floor(Math.random() * 4e3).toString();
break;
case "Monthly":
amount = Math.floor(Math.random() * 8e3).toString();
break;
case "Quarterly":
amount = Math.floor(Math.random() * 24e3).toString();
break;
case "Yearly":
amount = Math.floor(Math.random() * 96e3).toString();
break;
default:
amount = Math.floor(Math.random() * 50).toString();
break;
}
return {
amount,
currency,
period
};
};
// src/scrapers/types/ScraperInput/ScraperInput.ts
var ScraperInputType = /* @__PURE__ */ ((ScraperInputType2) => {
ScraperInputType2["SELECT"] = "Select";
ScraperInputType2["TEXT"] = "Text";
ScraperInputType2["PASSWORD"] = "Password";
ScraperInputType2["CHECKBOX"] = "Checkbox";
return ScraperInputType2;
})(ScraperInputType || {});
// src/api/API.ts
var API = {
onStartScraping
};
// Annotate the CommonJS export names for ESM import in node:
0 && (module.exports = {
API,
ChartType,
MessageType,
ProgressReporter,
ScraperInputType,
getRandomJob,
onStartScraping
});
//# sourceMappingURL=index.js.map