UNPKG

hotels-scraper-js

Version:

Parser for Airbnb, Booking, Hotels.com in JavaScript. Sponsored by SerpApi.

369 lines (346 loc) 16.3 kB
import moment from "moment"; import { getBrowserInstance } from "../helpers/browserInstance.cjs"; import getHotelsComFilters from "./getHotelsComFilters.js"; let multiplier = 1; const scrollPage = async (page, resultsLimit) => { const getFullHeight = async () => await page.evaluate(`document.body.scrollHeight`); const getCurrentHeight = async () => (await page.evaluate(`window.scrollY`)) + (await page.evaluate(`window.innerHeight`)); const getResultsLength = async () => Array.from(await page.$$('[data-stid="section-results"] [data-stid="lodging-card-responsive"]')).length; let resultsLength = await getResultsLength(); while (resultsLimit >= resultsLength) { let fullHeight = await getFullHeight(); let currentHeight = await getCurrentHeight(); while (fullHeight !== currentHeight) { await page.waitForTimeout(500 * multiplier); await page.keyboard.press("PageDown"); fullHeight = await getFullHeight(); currentHeight = await getCurrentHeight(); } await page.waitForTimeout(5000 * multiplier); resultsLength = await getResultsLength(); if (resultsLimit <= resultsLength) break; const isNextPage = await page.$('[data-stid="show-more-results"]'); if (!isNextPage) break; await isNextPage.click(); await page.waitForTimeout(2000 * multiplier); let isLoader = await page.$(".uitk-loader.is-visible"); while (isLoader) { await page.waitForTimeout(2000 * multiplier); isLoader = await page.$(".uitk-loader.is-visible"); } const newResultsLength = await getResultsLength(); if (newResultsLength === resultsLength) break; } }; const getHotelsInfo = async (page) => { return await page.evaluate(() => { return Array.from(document.querySelectorAll('[data-stid="section-results"] [data-stid="lodging-card-responsive"]')).map((el) => { const priceBlock = el.querySelector(".uitk-layout-grid-item .uitk-layout-grid-item.uitk-layout-flex > .uitk-layout-flex"); const rawLink = el.querySelector('[data-stid="open-hotel-information"]').getAttribute("href").slice(1); const link = `${window.location.href.slice(0, window.location.href.indexOf("Hotel-Search"))}${rawLink.slice(0, rawLink.indexOf("?"))}`; return { title: el.querySelector("h4").textContent.trim(), isAd: Boolean(el.querySelector(".uitk-badge-promoted")), location: el.querySelector("h4+div").textContent.trim(), snippet: { title: el .querySelectorAll( ".uitk-layout-grid-item .uitk-layout-grid-item > .uitk-layout-flex > .uitk-layout-flex-item > .uitk-text:not(.uitk-type-300)" )[0] ?.textContent.trim(), text: el .querySelectorAll( ".uitk-layout-grid-item .uitk-layout-grid-item > .uitk-layout-flex > .uitk-layout-flex-item > .uitk-text:not(.uitk-type-300)" )[1] ?.textContent.trim(), }, paymentOptions: Array.from( el.querySelectorAll( ".uitk-layout-grid-item .uitk-layout-grid-item > .uitk-layout-flex > .uitk-layout-flex-item > div:not(.uitk-text) > .uitk-text" ) ).map((el) => el.textContent.trim()), highlightedAmenities: Array.from(el.querySelectorAll("h4+div+div.uitk-layout-flex > div")).map((el) => el.textContent.trim()), price: priceBlock ? { currency: priceBlock .querySelector("div:first-child > div > span") ?.textContent.trim() .replace(/[\d|,|.]+/gm, "") .replace(/\s/gm, ""), value: parseFloat( priceBlock .querySelector("div:first-child > div > span") ?.textContent.trim() .match(/[\d|,|.]+/gm)?.[0] .replace(",", "") ), withTaxesAndCharges: parseFloat( priceBlock .querySelector("div.uitk-layout-flex:nth-child(2)") ?.textContent.trim() .match(/[\d|,|.]+/gm)?.[0] .replace(",", "") ), } : "Sold out", rating: { score: parseFloat(el.querySelector(".uitk-layout-grid-item .uitk-layout-grid-item .uitk-layout-flex > span:nth-child(1)")?.textContent.trim()) || "No rating", reviews: parseInt( el .querySelector(".uitk-layout-grid-item .uitk-layout-grid-item .uitk-layout-flex > span:last-child") ?.textContent.trim() .match(/[\d|,|.]+/gm)?.[0] .replace(",", "") ) || "No rating", }, link, }; }); }); }; const getHotelsComHotels = async ( multiplierArgument, appliedFilters, priceFrom, priceTo, country, language, limit, selectedLocation, selectedCheckIn, selectedCheckOut, selectedAdults, selectedChildren ) => { const resultsLimit = limit || 20; const location = selectedLocation || "paris"; const checkInDateArray = selectedCheckIn?.replace(/(\d+)\/(\d+)\/(\d+)/, "$3,$1,$2").split(","); const checkOutDateArray = selectedCheckOut?.replace(/(\d+)\/(\d+)\/(\d+)/, "$3,$1,$2").split(","); const adults = selectedAdults || 2; const children = selectedChildren || 0; multiplier = multiplierArgument; const { locales, filters } = getHotelsComFilters(true); let parsedFilters = []; const badFilters = []; if (appliedFilters) { if (!Array.isArray(appliedFilters)) { throw new Error(`"filters" value is not valid. It must be an array with available filters. Use "getFilters" method to get available filters.`); } appliedFilters.forEach((el) => { let parsedFilter; for (const filtersArray in filters) { if (parsedFilter) break; parsedFilter = filters[filtersArray].find( (filter) => filter.value.toLowerCase() === el.toLowerCase() || filter.name.toLowerCase() === el.toLowerCase() ); } if (parsedFilter) parsedFilters.push(parsedFilter.value); else badFilters.push(el); }); } const parsedLocale = country ? locales.find((el) => el.country.toLowerCase() === country.toLowerCase()) : true; const parsedlanguage = language ? parsedLocale.languages?.find((el) => el.name.toLowerCase() === language.toLowerCase() || el.value.toLowerCase() === language.toLowerCase()) : true; if (appliedFilters && badFilters.length && !parsedLocale && !parsedlanguage) { throw new Error( `Provided filters "${badFilters}", country "${country}" and language "${language}" are not valid. Use "getFilters" method to get available filters.` ); } if (!parsedLocale && !parsedlanguage) { throw new Error(`Provided country "${country}" and language "${language}" are not valid. Use "getFilters" method to get available filters.`); } if (appliedFilters && badFilters.length && !parsedLocale) { throw new Error(`Provided filters "${badFilters}" and country "${country}" are not valid. Use "getFilters" method to get available filters.`); } if (appliedFilters && badFilters.length && !parsedlanguage) { throw new Error(`Provided filters "${badFilters}" and language "${language}" are not valid. Use "getFilters" method to get available filters.`); } if (appliedFilters && badFilters.length) { throw new Error(`Provided filters "${badFilters}" is not valid. Use "getFilters" method to get available filters.`); } if (!parsedLocale) { throw new Error(`Provided country "${country}" is not valid. Use "getFilters" method to get available filters.`); } if (!parsedlanguage) { throw new Error(`Provided language "${language}" is not valid for this country "${country}". Use "getFilters" method to get available filters.`); } if ((checkInDateArray && !checkOutDateArray) || (!checkInDateArray && checkOutDateArray)) { throw new Error(`If you want to define dates, it must be both check-in and check-out dates.`); } let checkInMonthAfterNow; let checkOutMonthAfterNow; if (checkInDateArray && checkOutDateArray) { checkInDateArray[1] = checkInDateArray[1] - 1; checkOutDateArray[1] = checkOutDateArray[1] - 1; const momentCheckInDate = moment(checkInDateArray); const momentCheckOutDate = moment(checkOutDateArray); if (momentCheckInDate.toString() === "Invalid date" || momentCheckOutDate.toString() === "Invalid date") { throw new Error(`Invalid date format. Correct format is "12/31/2023"`); } if (momentCheckInDate.diff(moment(), "days") < 0) { throw new Error(`Check-in date can't be in past.`); } if (momentCheckOutDate.diff(momentCheckInDate, "days") < 1) { throw new Error(`Check-out date must be greater than check-in date.`); } if (momentCheckOutDate.diff(momentCheckInDate, "days") >= 29) { throw new Error(`Check-out date must be within 29 days of check-in date.`); } const currentMonth = parseInt(moment().format("M")) + parseInt(moment().format("YYYY")) * 12; const checkInMonth = parseInt(momentCheckInDate.format("M")) + parseInt(momentCheckInDate.format("YYYY")) * 12; const checkOutMonth = parseInt(momentCheckOutDate.format("M")) + parseInt(momentCheckOutDate.format("YYYY")) * 12; checkInMonthAfterNow = checkInMonth - currentMonth; checkOutMonthAfterNow = checkOutMonth - currentMonth; } if (priceFrom || priceFrom) { if ((priceFrom && typeof priceFrom !== "number") || (priceTo && typeof priceTo !== "number") || (priceFrom && priceFrom < 0)) { throw new Error(`Provided price is not valid. Price must be a number greater than 0.`); } if (priceFrom && priceFrom) { if (priceFrom > priceTo) { throw new Error(`Provided prices are not valid. "Price to" value must be greater than "price from" value.`); } } } const url = `https://hotels.com/`; const { page, closeBrowser } = await getBrowserInstance(); await page.goto(url); try { await page.waitForSelector('[data-stid="destination_form_field-menu-trigger"]'); } catch (e) { await page.goto(url); await page.waitForSelector('[data-stid="destination_form_field-menu-trigger"]'); } if (country) { await page.click('[data-stid="button-type-picker-trigger"]'); await page.waitForTimeout(2000 * multiplier); const regionSelect = await page.$("#site-selector"); await regionSelect.select(parsedLocale.code); await page.waitForTimeout(1000 * multiplier); if (language) { const languageSelect = await page.$("#language-selector"); await languageSelect.select(parsedlanguage.value); await page.waitForTimeout(1000 * multiplier); } await page.click(`#app-layer-disp-settings-picker button.uitk-button`); await page.waitForTimeout(5000 * multiplier); } await page.waitForSelector('[data-stid="destination_form_field-menu-trigger"]'); await page.click('[data-stid="destination_form_field-menu-trigger"]'); await page.waitForTimeout(1000 * multiplier); await page.keyboard.type(location); await page.waitForTimeout(5000 * multiplier); await page.click('[data-stid="destination_form_field-result-item-button"]'); await page.waitForTimeout(1000 * multiplier); if (selectedCheckIn && selectedCheckOut) { await page.click("#date_form_field-btn"); await page.waitForTimeout(1000 * multiplier); for (let i = 0; i < checkInMonthAfterNow; i++) { await page.click('[data-stid="date-picker-paging"]:last-child'); await page.waitForTimeout(1000 * multiplier); } await page.click(`[data-stid="date-picker-month"] [data-day="${checkInDateArray[2]}"]`); await page.waitForTimeout(1000 * multiplier); for (let i = 0; i < checkOutMonthAfterNow - checkInMonthAfterNow; i++) { await page.click('[data-stid="date-picker-paging"]:last-child'); await page.waitForTimeout(1000 * multiplier); } await page.click(`[data-stid="date-picker-month"] [data-day="${checkOutDateArray[2]}"]`); await page.waitForTimeout(1000 * multiplier); await page.click('[data-stid="apply-date-picker"]'); await page.waitForTimeout(1000 * multiplier); } if (adults !== 2 && children !== 0) { await page.click('[data-stid="open-room-picker"]'); await page.waitForTimeout(1000 * multiplier); if (adults === 1) { await page.keyboard.press("Tab"); await page.waitForTimeout(500 * multiplier); await page.keyboard.press("Enter"); await page.waitForTimeout(500 * multiplier); await page.keyboard.press("Tab"); await page.waitForTimeout(500 * multiplier); } else if (adults !== 2) { await page.keyboard.press("Tab"); await page.waitForTimeout(500 * multiplier); await page.keyboard.press("Tab"); await page.waitForTimeout(500 * multiplier); for (let i = 0; i < adults - 2; i++) { await page.keyboard.press("Enter"); await page.waitForTimeout(500 * multiplier); } } await page.keyboard.press("Tab"); await page.waitForTimeout(500 * multiplier); for (let i = 0; i < children; i++) { await page.keyboard.press("Enter"); await page.waitForTimeout(500 * multiplier); } for (let i = 0; i < children; i++) { await page.keyboard.press("Tab"); await page.waitForTimeout(500 * multiplier); await page.keyboard.type("10"); await page.waitForTimeout(500 * multiplier); } await page.click("#traveler_selector_done_button"); await page.waitForTimeout(1000 * multiplier); } await page.click("#search_button"); await page.waitForSelector('[data-stid="section-results"] [data-stid="lodging-card-responsive"]'); await page.waitForTimeout(1000 * multiplier); if (parsedFilters.length) { for (const filter of parsedFilters) { if (filter.includes("guestRating")) { await page.evaluate( (filter) => Array.from(document.querySelectorAll("label")).forEach((el) => { if (el.getAttribute("for")?.includes(filter)) el.click(); }), filter ); await page.waitForTimeout(2000 * multiplier); } else { await page.click(`[for="${filter}"]`); await page.waitForTimeout(2000 * multiplier); } } } if (priceFrom) { await page.focus('[for="price-primary"]'); await page.waitForTimeout(2000 * multiplier); const step = await page.evaluate(() => parseInt(document.querySelector("#price-primary")?.getAttribute("step"))); const fromClickTimes = Math.floor(priceFrom / step); for (let i = 0; i < fromClickTimes; i++) { await page.keyboard.press("ArrowRight"); await page.waitForTimeout(3000 * multiplier); await page.keyboard.press("Tab"); await page.waitForTimeout(500 * multiplier); } if (priceTo) { const max = await page.evaluate(() => parseInt(document.querySelector("#price-primary")?.getAttribute("max"))); const toClickTimes = Math.floor((max - priceTo) / step); for (let i = 0; i < toClickTimes; i++) { await page.keyboard.press("Tab"); await page.waitForTimeout(500 * multiplier); await page.keyboard.press("ArrowLeft"); await page.waitForTimeout(3000 * multiplier); await page.keyboard.press("Tab"); await page.waitForTimeout(500 * multiplier); } } } await page.waitForTimeout(1000 * multiplier); await page.focus("header a"); await page.waitForTimeout(3000 * multiplier); await page.waitForSelector('[data-stid="section-results"] [data-stid="lodging-card-responsive"]'); await page.waitForTimeout(1000 * multiplier); await scrollPage(page, resultsLimit); const results = await getHotelsInfo(page); await closeBrowser(); return results.filter((el, i) => i < resultsLimit); }; export default getHotelsComHotels;