@plust/datasleuth
Version:
Build LLM-powered research pipelines and output structured data.
217 lines • 8.85 kB
JavaScript
/**
* Result merger utility to combine results from multiple research tracks
*/
export class ResultMerger {
/**
* Merges research data from multiple tracks
*
* @param tracks Track results to merge
* @param options Conflict resolution options
* @returns Merged data object
*/
static mergeTrackData(tracks, options = { strategy: 'last' }) {
const merged = {};
const trackEntries = Object.entries(tracks);
// Collect all unique data keys across all tracks
const allKeys = new Set();
trackEntries.forEach(([_, track]) => {
if (track.data) {
Object.keys(track.data).forEach((key) => allKeys.add(key));
}
});
// Resolve each key's value across tracks
allKeys.forEach((key) => {
// Skip the 'tracks' key as it's handled specially
if (key === 'tracks')
return;
// Collect all values for this key across tracks
const values = [];
const metadata = [];
trackEntries.forEach(([trackName, track]) => {
if (track.data && key in track.data) {
values.push(track.data[key]);
metadata.push({
trackName,
confidence: track.metadata?.confidence || 0.5,
timestamp: track.metadata?.completedAt,
});
}
});
// If we have values to merge, apply the resolution strategy
if (values.length > 0) {
merged[key] = ResultMerger.resolveConflict(values, metadata, options);
}
});
return merged;
}
/**
* Merges results from multiple tracks into a cohesive output
*
* @param tracks Track results to merge
* @param state Current research state
* @param options Conflict resolution options
* @returns Merged results object
*/
static mergeTrackResults(tracks, state, options = { strategy: 'mostConfident' }) {
// Collect and categorize results from all tracks
const resultsByType = {};
const metadataByType = {};
Object.entries(tracks).forEach(([trackName, track]) => {
if (track.results && track.results.length > 0) {
track.results.forEach((result) => {
// Get result type (first key in the object)
const type = Object.keys(result)[0];
if (!resultsByType[type]) {
resultsByType[type] = [];
metadataByType[type] = [];
}
resultsByType[type].push(result[type]);
metadataByType[type].push({
trackName,
confidence: track.metadata?.confidence || 0.5,
timestamp: track.metadata?.completedAt,
});
});
}
});
// Resolve conflicts for each result type
const mergedResults = {};
Object.keys(resultsByType).forEach((type) => {
const values = resultsByType[type];
const metadata = metadataByType[type];
if (values.length === 1) {
// No conflict, just use the single value
mergedResults[type] = values[0];
}
else {
// Resolve conflicts
mergedResults[type] = ResultMerger.resolveConflict(values, metadata, options);
}
});
return mergedResults;
}
/**
* Resolves conflicts between multiple values using the specified strategy
*
* @param values Array of values to resolve
* @param metadata Metadata for each value
* @param options Conflict resolution options
* @returns Resolved value
*/
static resolveConflict(values, metadata, options) {
const { strategy, weights, customResolver, confidenceExtractor } = options;
if (values.length === 0)
return undefined;
if (values.length === 1)
return values[0];
switch (strategy) {
case 'first':
return values[0];
case 'last':
return values[values.length - 1];
case 'mostConfident':
// Use the value with the highest confidence
if (confidenceExtractor) {
// Use custom confidence extractor
const confidences = metadata.map((m, i) => ({
value: values[i],
confidence: confidenceExtractor(m),
}));
confidences.sort((a, b) => b.confidence - a.confidence);
return confidences[0].value;
}
else {
// Use confidence from metadata
const withConfidence = metadata.map((m, i) => ({
value: values[i],
confidence: m.confidence || 0,
}));
withConfidence.sort((a, b) => b.confidence - a.confidence);
return withConfidence[0].value;
}
case 'majority':
// Use the most common value
const counts = new Map();
values.forEach((value) => {
const key = JSON.stringify(value);
if (!counts.has(key)) {
counts.set(key, { count: 0, value });
}
counts.get(key).count++;
});
let maxCount = 0;
let maxValue;
counts.forEach(({ count, value }) => {
if (count > maxCount) {
maxCount = count;
maxValue = value;
}
});
return maxValue;
case 'weighted':
// Apply weights to each track's value
if (!weights) {
throw new Error('Weights required for weighted conflict resolution strategy');
}
let weightedSum = 0;
let totalWeight = 0;
// Can only use weighted for numeric values
if (typeof values[0] === 'number') {
values.forEach((value, i) => {
const trackName = metadata[i].trackName;
const weight = weights[trackName] || 1;
weightedSum += value * weight;
totalWeight += weight;
});
return weightedSum / totalWeight;
}
else {
// For non-numeric values, use the value with the highest weight
let highestWeightValue;
let highestWeight = -1;
values.forEach((value, i) => {
const trackName = metadata[i].trackName;
const weight = weights[trackName] || 1;
if (weight > highestWeight) {
highestWeight = weight;
highestWeightValue = value;
}
});
return highestWeightValue;
}
case 'custom':
// Use custom resolution function
if (!customResolver) {
throw new Error('Custom resolver required for custom conflict resolution strategy');
}
return customResolver(values, metadata);
default:
// Default to last value
return values[values.length - 1];
}
}
/**
* Creates a merge function for use with parallel research
*
* @param options Conflict resolution options
* @returns A merge function that can be used with the parallel step
*/
static createMergeFunction(options = { strategy: 'mostConfident' }) {
return function (tracks, state) {
// Merge track data
const mergedData = ResultMerger.mergeTrackData(tracks, options);
// Merge track results
const mergedResults = ResultMerger.mergeTrackResults(tracks, state, options);
return {
data: mergedData,
results: mergedResults,
metadata: {
mergeStrategy: options.strategy,
tracksCount: Object.keys(tracks).length,
mergedAt: new Date().toISOString(),
},
};
};
}
}
//# sourceMappingURL=merge.js.map