UNPKG

focus-product-extractor2

Version:

Extract product information from chat/order data

133 lines (128 loc) 4.88 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.default = exports.RuleMatcher = void 0; class RuleMatcher { constructor() { this.builtInRules = { jd: [/item\.jd\.com\/(\d+)\.html/ig, /item\.m\.jd\.com\/product\/(\d+)\.html/ig], tb: [ // 淘宝商品详情页 /item\.taobao\.com\/item\.htm\?id=(\d+)/ig, // 天猫商品详情页 /detail\.tmall\.com\/item\.htm\?id=(\d+)/ig, // 通用匹配规则 - 匹配id参数 /[?&]id=(\d+)/ig, // 匹配短链接格式 /taobao\.com\/item\/\?id=(\d+)/ig, // 匹配带其他参数的URL /(?:item|detail)\.(?:taobao|tmall)\.com\/item\.htm\?.*\bid=(\d+)/ig], pdd: [ // 匹配各种PDD商品URL格式 /(?:yangkeduo\.com|pinduoduo\.com)\/group\.html\?goods_id=(\d+)/ig, /(?:m|mobile)\.(?:yangkeduo|pinduoduo)\.com\/goods\.html\?goods_id=(\d+)/ig, /(?:www\.)?(?:yangkeduo|pinduoduo)\.com\/goods\/\?goods_id=(\d+)/ig, // 新增更通用的匹配规则 /[?&]goods_id=(\d+)/ig, // 匹配短链接格式 /(?:yangkeduo\.com|pinduoduo\.com)\/g\/(\d+)/ig], // 抖音规则 dy: [ // 匹配抖音电商商品详情页URL /haohuo\.jinritemai\.com\/ecommerce\/trade\/detail\/index\.html\?id=(\d+)/ig, // 匹配抖音电商移动端URL /haohuo\.jinritemai\.com\/m\/trade\/detail\?id=(\d+)/ig, // 通用匹配规则 /[?&]id=(\d+)/ig], ks: [ // 匹配快手电商商品详情页URL /kwaishop\.com\/merchant\/shop\/detail\?id=(\d+)/ig, // 匹配快手电商移动端URL /kwaishop\.com\/m\/shop\/detail\?id=(\d+)/ig, // 通用匹配规则 /[?&]id=(\d+)/ig], xhs: [ // 匹配小红书商品详情页URL /xiaohongshu\.com\/goods-detail\/([a-f0-9]{24})/ig, // 匹配带参数的URL /xiaohongshu\.com\/goods-detail\/([a-f0-9]{24})\?/ig, // 通用ID匹配规则 /([a-f0-9]{24})/ig], lazada: [ // 确保能匹配.co.th和.com等域名 /lazada\.[a-z]{2,3}(?:\.[a-z]{2})?\/(?:products\/)?[^\/]+-i(\d+)(?:-s\d+)?/ig, /lazada\.[a-z]{2,3}(?:\.[a-z]{2})?\/products\/[^\/]+\/i(\d+)(?:-s\d+)?/ig, // 更宽松的匹配规则 /lazada[^\/]+\/[^\/]+-i(\d+)/ig], shopee: [ //更宽松地匹配商品名称部分 /shopee\.[a-z]{2,3}\/(?:[^?]+\/)?[^?]+i\.\d+\.(\d+)(?:\?|$)/ig, /xiapibuy\.com\/[^\/]+-i\.\d+\.(\d+)/ig, // 简单的匹配规则 /i\.\d+\.(\d+)/ig], tiktok: [/tiktok\.com\/view\/product\/(\d+)/ig // 匹配TikTok商品ID ] }; } extractSkuIdFromUrl(url, platform) { if (!url) return null; // 使用指定平台的规则进行匹配 if (this.builtInRules[platform]) { for (const pattern of this.builtInRules[platform]) { const matches = [...url.matchAll(pattern)]; if (matches.length > 0) return matches[0][1]; } } // 通用skuId匹配规则 const generalPattern = /[?&;]sku[=:]([\d]+)/ig; const matches = [...url.matchAll(generalPattern)]; return matches.length > 0 ? matches[0][1] : null; } match(text, platform, customRules = []) { // 优先使用自定义规则匹配 if (customRules.length > 0) { const customMatches = []; customRules.forEach(rule => { try { // 使用传入的flags,如果没有则默认'g' const regex = new RegExp(rule.pattern, rule.flags || 'g'); const matches = [...text.matchAll(regex)]; matches.forEach(match => { customMatches.push({ // 匹配到的文本 matchedText: match[0], metadata: { pattern: rule.pattern, flags: rule.flags, // 捕获组,从1开始 groups: match.slice(1), // 原始文本 text: text } }); }); } catch (error) { console.error('Invalid custom rule:', rule, error); } }); if (customMatches.length > 0) return customMatches; } // 内置规则匹配 if (this.builtInRules[platform]) { // console.log('\nUsing built-in rules for platform:', platform); const results = []; for (const pattern of this.builtInRules[platform]) { // console.log('\nTesting pattern:', pattern); // console.log('\nTesting text:', text); const matches = [...text.matchAll(pattern)]; if (matches.length > 0) { // console.log('\nbind pattern:', pattern); // 捕获skuid results.push(...matches.map(match => match[1])); } } // console.log('\nBuilt-in matches:', results); return results; } return []; } } exports.RuleMatcher = RuleMatcher; var _default = exports.default = new RuleMatcher();