focus-product-extractor2
Version:
Extract product information from chat/order data
342 lines (320 loc) • 11.8 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", {
value: true
});
exports.default = exports.CoreExtractor = void 0;
var _lodash = _interopRequireDefault(require("lodash"));
var _ruleMatcher = _interopRequireDefault(require("./ruleMatcher.js"));
var _messageTypes = require("../constants/messageTypes.js");
function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
class CoreExtractor {
constructor() {
this.ruleMatcher = _ruleMatcher.default;
}
extract(data, rules = {}, scene) {
const sources = rules.sources || ['buyer', 'service', 'system'];
// console.log('source--',JSON.stringify(sources));
const customRules = rules.customRules || [];
// console.log('customRules--',JSON.stringify(customRules));
const filteredResults = data.flatMap(item => {
this._currentTimestamp = item.timestamp;
// 根据 scene 参数决定处理逻辑
if (!scene || item.scene === scene) {
if (item.scene === 'chat') {
return this._extractFromChat(item, sources, customRules);
} else if (item.scene === 'order') {
// 未来可添加 order 处理逻辑
// return this._extractFromOrder(item, customRules);
}
}
return null;
}).filter(item => item && (item.goodsId || item.matchedText !== undefined));
// 如果是自定义规则匹配,返回所有匹配结果
if (customRules.length > 0 && filteredResults.some(item => item.matchedText)) {
return filteredResults.filter(item => item.matchedText);
}
return filteredResults;
// 否则返回最新的一条结果(内置规则逻辑)
// const latestResult = filteredResults.reduce((latest, item) => {
// if (!latest || item.metadata.timestamp > latest.metadata.timestamp) {
// return item;
// }
// return latest;
// }, null);
// return latestResult ? [latestResult] : [];
}
// _extractFromOrder(orderItem, customRules) {
// const text = orderItem?.skuId || null;
// if (!text) return [];
// const productName = orderItem?.name || null;
// // 处理纯数字文本
// if (/^\d+$/.test(text)) {
// return this._createSkuResult(text, productName, orderItem);
// }
// const matches = this.ruleMatcher.match(text, chatItem.platform || '', customRules);
// if (matches[0]?.matchedText) {
// return matches.map(match => ({
// matchedText: match.matchedText,
// metadata: {
// ...match.metadata,
// text,
// sender: chatItem.sender,
// timestamp: chatItem.timestamp
// }
// }));
// }
// // 预设规则匹配结果
// return matches.map(skuId => this._createSkuResult(skuId, productName, orderItem));
// }
_extractFromChat(chatItem, sources, customRules) {
var _chatItem$cardData, _matches$2;
// 来源过滤
if (sources !== null && sources !== void 0 && sources.length && !sources.includes(chatItem.sender)) return [];
const textResult = this._getTextFromMessage(chatItem);
// console.log('\n[_extractFromChat] textResult--',JSON.stringify(textResult));
if (!textResult) return [];
// 处理返回的{value, type}格式或普通字符串
let text, detectedType;
if (typeof textResult === 'object' && textResult.value) {
text = textResult.value;
detectedType = textResult.type;
} else {
text = textResult;
detectedType = 'sku'; // 默认类型
}
// 获取商品名称
const productName = (_chatItem$cardData = chatItem.cardData) === null || _chatItem$cardData === void 0 || (_chatItem$cardData = _chatItem$cardData.find(card => card.title)) === null || _chatItem$cardData === void 0 ? void 0 : _chatItem$cardData.title;
if (chatItem.messageSubType === 4) {
// 处理卡片消息
const platform = chatItem.platform || '';
if (customRules !== null && customRules !== void 0 && customRules.length) {
var _matches$;
// 卡片有自定义规则,返回匹配结果
const matches = this.ruleMatcher.match(text, platform, customRules);
if ((_matches$ = matches[0]) !== null && _matches$ !== void 0 && _matches$.matchedText) {
// 卡片有匹配,返回匹配结果
return matches.map(match => ({
matchedText: match.matchedText,
metadata: {
...match.metadata,
text,
sender: chatItem.sender,
timestamp: chatItem.timestamp,
cardData: chatItem.cardData
}
}));
}
// 卡片无匹配,返回原始文本
return [{
matchedText: text,
metadata: {
text,
sender: chatItem.sender,
timestamp: chatItem.timestamp,
cardData: chatItem.cardData
}
}];
}
if (/^\d+$/.test(text)) {
// 无自定义规则,处理纯数字文本,使用检测到的类型
return this._createSkuResult(text, productName, chatItem, detectedType);
}
}
// 处理纯数字文本
if (/^\d+$/.test(text)) {
return this._createSkuResult(text, productName, chatItem, detectedType);
}
// 处理海外订单卡片有多个商品id的情况,匹配任意多个数字和,隔开的文本
if (/^[\d,]+$/.test(text)) {
const nums = [...text.matchAll(/(\d+)/g)];
// 对于海外平台的进线订单,如果有orderId,则所有ID都标记为对应类型
const finalType = _messageTypes.OVERSEAS_PLATFORMS.includes(chatItem.platform) && chatItem.orderId ? 'order' : detectedType;
return nums.map(num => this._createSkuResult(num[1], productName, chatItem, finalType));
}
// console.log('\n[_extractFromChat] chatItem--',JSON.stringify(chatItem));
const matches = this.ruleMatcher.match(text, chatItem.platform || '', customRules);
// console.log('\n[_extractFromChat] matches--',JSON.stringify(matches));
// 自定义规则匹配结果
if ((_matches$2 = matches[0]) !== null && _matches$2 !== void 0 && _matches$2.matchedText) {
return matches.map(match => ({
matchedText: match.matchedText,
metadata: {
...match.metadata,
text,
sender: chatItem.sender,
timestamp: chatItem.timestamp
}
}));
}
// 预设规则匹配结果 - 根据卡片类型和消息类型确定type
if (matches.length > 0) {
var _chatItem$cardData2;
// 根据卡片数据确定type
const hasOrderCard = (_chatItem$cardData2 = chatItem.cardData) === null || _chatItem$cardData2 === void 0 ? void 0 : _chatItem$cardData2.some(card => card.type === 'order');
const resultType = hasOrderCard ? 'order' : detectedType;
return matches.map(skuId => this._createSkuResult(skuId, productName, chatItem, resultType));
}
return [];
}
_createSkuResult(skuId, productName, chatItem, type = 'sku') {
// 如果没有有效的skuId,返回null(会被过滤掉)
if (!skuId) {
return null;
}
const result = {
goodsId: skuId,
name: productName || `${skuId}`,
source: 'chat',
metadata: _lodash.default.omit(chatItem, ['scene'])
};
// 同时根据type设置对应的专用字段用于标识
switch (type) {
case 'sku':
// sku类型只设置goodsId即可
break;
case 'order':
result.orderId = skuId; // 同时保留orderId字段用于标识
break;
case 'spu':
result.spuId = skuId; // 同时保留spuId字段用于标识
break;
}
// 设置type字段
result.type = type;
return result;
}
_getTextFromMessage(chatItem, customRules) {
const {
messageSubType,
cardData = [],
skuId,
spuId,
orderId,
transferInfo,
platform
} = chatItem;
switch (messageSubType) {
case 1:
// 文本消息
return chatItem.text;
case 2:
// 图片消息
return chatItem.imageUrl;
// case 3: // 视频消息
case 4:
// 卡片消息,当type为ware时,skuId是一个有效的参考值
return this._extractFromCardData(cardData, skuId, spuId, orderId, platform, customRules);
// case 5: // 文件消息
case 6:
// 买家进线消息, 优先级:orderId > skuId > spuId
if (skuId) return {
value: skuId,
type: 'sku'
};
if (orderId) return {
value: orderId,
type: 'order'
};
if (spuId) return {
value: spuId,
type: 'spu'
};
return null;
case 7: // 转出消息
case 8:
// 转入消息
return transferInfo !== null && transferInfo !== void 0 && transferInfo.platGoodsId ? {
value: transferInfo.platGoodsId,
type: 'sku'
} : null;
// case 9: // 语音消息
default:
return null;
}
}
_extractFromCardData(cardData, skuId, spuId, orderId, platform, customRules) {
// 海外平台优先返回对应类型的ID(无论cardData是否为空)
if (_messageTypes.OVERSEAS_PLATFORMS.includes(platform) && (skuId || spuId || orderId)) {
// 优先级:orderId > skuId > spuId(订单场景优先)
if (skuId) return {
value: skuId,
type: 'sku'
};
if (orderId) return {
value: orderId,
type: 'order'
};
if (spuId) return {
value: spuId,
type: 'spu'
};
}
if (!cardData || !cardData.length) return null;
const results = [];
for (const card of cardData) {
switch (card.type) {
case 'ware':
if (skuId) return {
value: skuId,
type: 'sku'
};
if (spuId) return {
value: spuId,
type: 'spu'
};
if (card.wareUrl) {
// 有自定义规则时返回完整URL
if (customRules && customRules.length > 0) {
return {
value: card.wareUrl,
type: 'sku'
};
}
// 否则保持原有逻辑提取skuId
const skuIdFromUrl = this.ruleMatcher.extractSkuIdFromUrl(card.wareUrl, platform);
if (skuIdFromUrl) return {
value: skuIdFromUrl,
type: 'sku'
};
}
break;
case 'order':
if (card.wareUrl) results.push(card.wareUrl);
if (card.imageUrl) results.push(card.imageUrl);
break;
case 'header':
if (card.title) results.push(card.title);
break;
case 'content':
if (card.content) results.push(card.content);
break;
case 'img':
if (card.url) results.push(card.url);
break;
case 'info':
if (card.desc) results.push(card.desc);
break;
case 'progress':
if (card.desc) results.push(card.desc);
if (card.title) results.push(card.title);
break;
}
}
if (results.length > 0) {
// 如果results只包含URL,不提取ID,返回null让上层处理
const allUrls = results.every(item => item.includes('http'));
if (allUrls) {
return null;
}
// 根据卡片类型确定type
const hasOrderCard = cardData.some(card => card.type === 'order');
return {
value: results.join(' '),
type: hasOrderCard ? 'order' : 'sku'
};
}
return null;
}
}
exports.CoreExtractor = CoreExtractor;
var _default = exports.default = new CoreExtractor();