mercari
Version:
Scraper for JP site 'mercari' to check for stuff
86 lines (73 loc) • 2.15 kB
text/typescript
import Xray from "x-ray"
import qs from "qs"
const x = Xray({
filters: {
trim: value => (typeof value === "string" ? value.trim() : value),
priceFind: value =>
typeof value === "string" ? value.replace(/[^0-9]/g, "") : value,
parseInt: value => (typeof value === "string" ? Number(value) : value),
parseProductCode: value => {
if (typeof value !== "string") {
return value
}
const match = value.match(/\/jp\/items\/(.*?)\//)
if (!match) return value
return match[1]
},
proxy: value => (typeof value === "string" ? makeProxyURL(value) : value),
formImageUrl: productCode =>
`https://www.suruga-ya.jp/pics/boxart_m/${productCode}m.jpg`,
makeProductUrl: value => {
if (typeof value !== "string") {
return value
}
return `${baseURL}items/${value}`
},
},
})
type Options = {
maxPages: number
}
const defaultOptions = {
maxPages: Infinity,
}
const baseURL = "https://www.mercari.com/jp/"
const searchURL = `${baseURL}search/`
export async function search(
query: string,
options: Options = defaultOptions
): Promise<Item[]> {
const data = {
keyword: query,
page: 1,
status_on_sale: 1,
}
const mercariURL = `${searchURL}?${qs.stringify(data)}`
const results: Item[] = await x(makeProxyURL(mercariURL), ".items-box", [
{
productURL: "a@href | parseProductCode | makeProductUrl",
imageURL: "img@data-src",
productName: ".items-box-name",
price: ".items-box-price | priceFind | parseInt",
productCode: "a@href | parseProductCode",
},
])
.paginate(".items-box a@href | proxy")
.limit(options.maxPages)
return results
}
type Item = {
productURL: string
imageURL: string
productName: string
price: number
productCode: string
}
const makeProxyURL = (url: string) => {
return getRandomGoogleProxyURL() + escape(url)
}
const getRandomGoogleProxyURL = () => {
const maxProxies = 32
const idx = Math.floor(Math.random() * maxProxies)
return `https://images${idx}-focus-opensocial.googleusercontent.com/gadgets/proxy?container=none&url=`
}