guessit-js
Version:
GuessIt JS (WASM) - Extract metadata from video filenames with WebAssembly performance
663 lines (581 loc) • 22.8 kB
JavaScript
/**
* JavaScript implementation of Rebulk pattern matching engine
* Simplified version focusing on the core pattern matching functionality
*/
/**
* Represents a single match found in the input string
*/
export class Match {
constructor(start, end, value = null, name = null, options = {}) {
this.start = start;
this.end = end;
this.value = value !== null ? value : '';
this.name = name;
this.tags = options.tags || [];
this.private = options.private || false;
this.children = options.children || [];
this.parent = options.parent || null;
this.raw = options.raw || '';
this.initiator = options.initiator || null;
this.formatter = options.formatter || null;
this.validator = options.validator || null;
}
/**
* Get the span (start, end) of this match
*/
get span() {
return [this.start, this.end];
}
/**
* Get the length of this match
*/
get length() {
return this.end - this.start;
}
/**
* Apply formatter to the match value
*/
format() {
if (this.formatter && typeof this.formatter === 'function') {
try {
this.value = this.formatter(this.value);
} catch (error) {
// If formatting fails, keep original value
console.warn('Formatting failed for match:', this, error);
}
}
}
/**
* Validate the match
*/
validate() {
if (this.validator && typeof this.validator === 'function') {
try {
return this.validator(this);
} catch (error) {
console.warn('Validation failed for match:', this, error);
return false;
}
}
return true;
}
/**
* Split this match using separators
*/
split(separators, valueFunction = null) {
const parts = [];
let currentStart = this.start;
for (let i = this.start; i < this.end; i++) {
const char = this.raw[i - this.start];
if (separators.includes(char)) {
if (currentStart < i) {
const part = new Match(currentStart, i);
part.raw = this.raw.slice(currentStart - this.start, i - this.start);
part.value = valueFunction ? valueFunction(part) : part.raw;
parts.push(part);
}
currentStart = i + 1;
}
}
// Add final part
if (currentStart < this.end) {
const part = new Match(currentStart, this.end);
part.raw = this.raw.slice(currentStart - this.start);
part.value = valueFunction ? valueFunction(part) : part.raw;
parts.push(part);
}
return parts;
}
}
/**
* Collection of matches with utility methods
*/
export class Matches {
constructor(inputString = '') {
this.inputString = inputString;
this.matches = [];
this.markers = new Markers();
}
/**
* Add a match to the collection
*/
add(match) {
if (match instanceof Match) {
this.matches.push(match);
}
}
/**
* Get matches by name
*/
named(name, predicate = null) {
const filtered = this.matches.filter(match => match.name === name);
return predicate ? filtered.filter(predicate) : filtered;
}
/**
* Get matches with specific tags
*/
tagged(tag, predicate = null) {
const filtered = this.matches.filter(match => match.tags.includes(tag));
return predicate ? filtered.filter(predicate) : filtered;
}
/**
* Get matches in a specific range
*/
range(start, end, predicate = null, index = null) {
let filtered = this.matches.filter(match =>
match.start >= start && match.end <= end
);
if (predicate) {
filtered = filtered.filter(predicate);
}
if (index !== null) {
return filtered[index] || null;
}
return filtered;
}
/**
* Get previous match
*/
previous(match, predicate = null, index = 0) {
let candidates = this.matches.filter(m => m.end <= match.start);
if (predicate) {
candidates = candidates.filter(predicate);
}
candidates.sort((a, b) => b.end - a.end); // Sort by end position descending
return candidates[index] || null;
}
/**
* Get next match
*/
next(match, predicate = null, index = 0) {
let candidates = this.matches.filter(m => m.start >= match.end);
if (predicate) {
candidates = candidates.filter(predicate);
}
candidates.sort((a, b) => a.start - b.start); // Sort by start position ascending
return candidates[index] || null;
}
/**
* Find holes (unmatched parts) in the input string
*/
holes(start, end, options = {}) {
const holes = [];
const rangeMatches = this.range(start, end).sort((a, b) => a.start - b.start);
let currentPos = start;
for (const match of rangeMatches) {
if (match.start > currentPos) {
const hole = new Match(currentPos, match.start);
hole.raw = this.inputString.slice(currentPos, match.start);
hole.value = hole.raw;
holes.push(hole);
}
currentPos = Math.max(currentPos, match.end);
}
// Final hole
if (currentPos < end) {
const hole = new Match(currentPos, end);
hole.raw = this.inputString.slice(currentPos, end);
hole.value = hole.raw;
holes.push(hole);
}
return holes;
}
/**
* Convert matches to dictionary format
*/
toDict(advanced = false, singleValue = false, enforceList = false) {
const result = {};
const propertyGroups = {};
const matchesWithPriority = {};
// Group matches by property name with priority tracking
for (const match of this.matches) {
if (match.private) continue;
if (!propertyGroups[match.name]) {
propertyGroups[match.name] = [];
matchesWithPriority[match.name] = [];
}
propertyGroups[match.name].push(match.value);
matchesWithPriority[match.name].push(match);
}
// Process each property
for (const [property, values] of Object.entries(propertyGroups)) {
let uniqueValues = [...new Set(values)];
// Apply priority logic for specific properties
if (property === 'season' && uniqueValues.length > 1) {
// Prioritize season from filename over directory
const matches = matchesWithPriority[property];
const filenameMatches = matches.filter(m =>
m.tags && (m.tags.includes('season-SxE') || m.tags.includes('season-episode'))
);
if (filenameMatches.length > 0) {
uniqueValues = [filenameMatches[0].value];
} else {
// If no explicit filename matches, prefer the higher number (more specific)
const sortedValues = uniqueValues.sort((a, b) => b - a);
uniqueValues = [sortedValues[0]];
}
}
if (property === 'title' && Array.isArray(uniqueValues) && uniqueValues.length > 1) {
// For title, prioritize matches from beginning of filename
const matches = matchesWithPriority[property];
const titleMatches = matches.filter(m =>
m.tags && (m.tags.includes('title') || m.tags.includes('title-unicode'))
);
if (titleMatches.length > 0) {
// Join multiple title parts if they form a coherent title
const titleParts = titleMatches.map(m => m.value).filter(v => v && v.trim());
if (titleParts.length > 0) {
uniqueValues = [titleParts.join(' ')];
}
} else {
// Join array elements for title
uniqueValues = [uniqueValues.join(' ')];
}
}
if (singleValue && uniqueValues.length > 0) {
result[property] = uniqueValues[0];
} else if (enforceList || uniqueValues.length > 1) {
result[property] = uniqueValues;
} else if (uniqueValues.length === 1) {
result[property] = uniqueValues[0];
}
}
// Post-process: flatten season_episode objects into separate season and episode properties
if (result.season_episode && typeof result.season_episode === 'object') {
const seasonEpisode = result.season_episode;
if (seasonEpisode.season !== undefined) {
result.season = seasonEpisode.season;
}
if (seasonEpisode.episode !== undefined) {
result.episode = seasonEpisode.episode;
}
// Keep the season_episode object as well for backwards compatibility
}
return result;
}
}
/**
* Markers for structural elements like groups and paths
*/
export class Markers {
constructor() {
this.markerList = [];
}
/**
* Add a marker
*/
add(marker) {
this.markerList.push(marker);
}
/**
* Get markers by name
*/
named(name) {
return this.markerList.filter(marker => marker.name === name);
}
/**
* Get marker at specific match position
*/
atMatch(match, predicate = null, index = 0) {
let candidates = this.markerList.filter(marker =>
marker.start <= match.start && marker.end >= match.end
);
if (predicate) {
candidates = candidates.filter(predicate);
}
return candidates[index] || null;
}
/**
* Get markers starting at position
*/
starting(position, predicate = null) {
let candidates = this.markerList.filter(marker => marker.start === position);
if (predicate) {
candidates = candidates.filter(predicate);
}
return candidates;
}
}
/**
* Rule for pattern matching
*/
export class Rule {
constructor(pattern, options = {}) {
this.pattern = pattern;
this.name = options.name || null;
this.value = options.value || null;
this.tags = options.tags || [];
this.formatter = options.formatter || null;
this.validator = options.validator || null;
this.private = options.private || false;
this.children = options.children || false;
this.conflictSolver = options.conflictSolver || null;
}
/**
* Apply this rule to input string
*/
apply(inputString, matches, options = {}) {
let regex;
if (this.pattern instanceof RegExp) {
regex = this.pattern;
} else if (typeof this.pattern === 'string') {
// Handle case insensitive matching
const flags = options.ignoreCase ? 'gi' : 'g';
regex = new RegExp(this.pattern, flags);
} else {
return [];
}
// Debug logging (remove in production)
const isDebugging = (typeof process !== 'undefined' && process.env && process.env.DEBUG_RULES === 'true') || false;
if (isDebugging && this.name === 'container') {
console.log(`[DEBUG] Applying ${this.name} rule with pattern ${regex} to "${inputString}"`);
}
const newMatches = [];
let match;
let lastIndex = 0;
let iterations = 0;
const maxIterations = 1000; // Prevent infinite loops
while ((match = regex.exec(inputString)) !== null && iterations < maxIterations) {
iterations++;
// Prevent infinite loop on zero-length matches
if (match.index === lastIndex && match[0].length === 0) {
regex.lastIndex = lastIndex + 1;
continue;
}
lastIndex = match.index + match[0].length;
const matchObj = new Match(
match.index,
match.index + match[0].length,
this.value || match[0],
this.name,
{
tags: [...this.tags],
private: this.private,
raw: match[0],
formatter: this.formatter,
validator: this.validator
}
);
// Debug logging
if (isDebugging && this.name === 'container' && match) {
console.log(`[DEBUG] Found match: ${JSON.stringify(match)} -> matchObj: ${JSON.stringify({start: matchObj.start, end: matchObj.end, name: matchObj.name, value: matchObj.value})}`);
}
// Apply formatting
matchObj.format();
// Validate
const isValid = matchObj.validate();
if (isDebugging && this.name === 'container' && match) {
console.log(`[DEBUG] Validation result: ${isValid}`);
}
if (isValid) {
newMatches.push(matchObj);
if (isDebugging && this.name === 'container') {
console.log(`[DEBUG] Added match to newMatches, total: ${newMatches.length}`);
}
}
// If regex doesn't have global flag, break after first match
if (!regex.global) {
break;
}
}
if (isDebugging && this.name === 'container') {
console.log(`[DEBUG] Returning ${newMatches.length} matches from ${this.name} rule`);
}
return newMatches;
}
}
/**
* Main Rebulk class - coordinates pattern matching
*/
export class Rebulk {
constructor(options = {}) {
this.rules = [];
this.options = {
ignoreCase: options.ignoreCase || false,
...options
};
}
/**
* Add rules to this Rebulk instance
*/
addRules(rules) {
if (Array.isArray(rules)) {
this.rules.push(...rules);
} else if (rules) {
this.rules.push(rules);
}
}
/**
* Add a string pattern rule
*/
string(pattern, options = {}) {
const rule = new Rule(pattern, options);
this.rules.push(rule);
return this;
}
/**
* Add a regex pattern rule
*/
regex(pattern, options = {}) {
const rule = new Rule(new RegExp(pattern, this.options.ignoreCase ? 'gi' : 'g'), options);
this.rules.push(rule);
return this;
}
/**
* Apply all rules to input string and return matches
*/
matches(inputString, options = {}) {
const matches = new Matches(inputString);
const mergedOptions = { ...this.options, ...options };
// Add path markers (simplified)
this.addPathMarkers(matches, inputString);
// Apply all rules
for (const rule of this.rules) {
const ruleMatches = rule.apply(inputString, matches, mergedOptions);
const isDebugging = (typeof process !== 'undefined' && process.env && process.env.DEBUG_RULES === 'true') || false;
if (isDebugging && rule.name === 'container' && ruleMatches.length > 0) {
console.log(`[DEBUG] Rule ${rule.name} returned ${ruleMatches.length} matches`);
}
for (const match of ruleMatches) {
matches.add(match);
if (isDebugging && rule.name === 'container') {
console.log(`[DEBUG] Added match to collection, total matches: ${matches.matches.length}`);
}
}
}
// Post-process matches (remove conflicts, apply final formatting, etc.)
this.postProcessMatches(matches);
return matches;
}
/**
* Add basic path markers for file structure
*/
addPathMarkers(matches, inputString) {
// Split by common path separators and file extensions
const pathSeparators = /[\/\\]/g;
const parts = inputString.split(pathSeparators);
let currentPos = 0;
for (let i = 0; i < parts.length; i++) {
const part = parts[i];
if (part.length > 0) {
const marker = new Match(currentPos, currentPos + part.length);
marker.name = 'path';
marker.private = true;
matches.markers.add(marker);
}
currentPos += part.length + 1; // +1 for separator
}
}
/**
* Post-process matches to resolve conflicts and clean up
*/
postProcessMatches(matches) {
const isDebugging = (typeof process !== 'undefined' && process.env && process.env.DEBUG_RULES === 'true') || false;
if (isDebugging) {
console.log(`[DEBUG] Post-processing ${matches.matches.length} matches`);
matches.matches.forEach((match, i) => {
console.log(`[DEBUG] ${i}: ${match.start}-${match.end} "${match.name}": "${match.value}" (private: ${match.private})`);
});
}
// Separate private and non-private matches
const privateMatches = matches.matches.filter(m => m.private);
const publicMatches = matches.matches.filter(m => !m.private);
if (isDebugging) {
console.log(`[DEBUG] Separated into ${privateMatches.length} private and ${publicMatches.length} public matches`);
}
// Only resolve conflicts among non-private matches
// Sort matches by start position
publicMatches.sort((a, b) => a.start - b.start || (b.end - b.start) - (a.end - a.start));
// Smart conflict resolution - prioritize specific matches over generic ones
const getMatchPriority = (match) => {
// Higher number = higher priority
const priorities = {
'container': 100,
'video_codec': 90,
'audio_codec': 90,
'source': 80,
'screen_size': 80,
'year': 70,
'episode': 60,
'season': 60,
'title': 10, // Title should have low priority as it's often very broad
'cleanup': 5,
'path': 1
};
return priorities[match.name] || 50; // Default priority for unknown types
};
const filtered = [];
for (const match of publicMatches) {
const overlapping = filtered.filter(existing =>
!(match.end <= existing.start || match.start >= existing.end)
);
if (overlapping.length === 0) {
filtered.push(match);
if (isDebugging) {
console.log(`[DEBUG] Keeping non-overlapping match: ${match.name} (${match.start}-${match.end})`);
}
} else {
if (isDebugging) {
console.log(`[DEBUG] Found ${overlapping.length} overlapping matches for ${match.name} (${match.start}-${match.end})`);
}
const currentPriority = getMatchPriority(match);
let shouldReplace = false;
let toReplace = [];
for (const existing of overlapping) {
const existingPriority = getMatchPriority(existing);
if (currentPriority > existingPriority) {
shouldReplace = true;
toReplace.push(existing);
} else if (currentPriority === existingPriority && match.length > existing.length) {
// Same priority, prefer longer match
shouldReplace = true;
toReplace.push(existing);
}
}
if (shouldReplace) {
// Remove all overlapping matches with lower priority
for (const existing of toReplace) {
const index = filtered.indexOf(existing);
if (index !== -1) {
filtered.splice(index, 1);
}
}
filtered.push(match);
if (isDebugging) {
console.log(`[DEBUG] Replaced ${toReplace.length} lower priority matches with ${match.name} (priority: ${currentPriority})`);
}
} else {
if (isDebugging) {
console.log(`[DEBUG] Discarding ${match.name} (priority: ${currentPriority}) in favor of higher priority matches`);
}
}
}
}
// Combine filtered public matches with all private matches
const finalMatches = [...filtered, ...privateMatches];
if (isDebugging) {
console.log(`[DEBUG] After post-processing: ${finalMatches.length} matches (${filtered.length} public + ${privateMatches.length} private)`);
finalMatches.forEach((match, i) => {
console.log(`[DEBUG] ${i}: ${match.start}-${match.end} "${match.name}": "${match.value}" (private: ${match.private})`);
});
}
matches.matches = finalMatches;
}
/**
* Introspect the rebulk to get available properties
*/
introspect(options = {}) {
const properties = {};
for (const rule of this.rules) {
if (rule.name && !rule.private) {
if (!properties[rule.name]) {
properties[rule.name] = new Set();
}
if (rule.value) {
properties[rule.name].add(rule.value);
}
}
}
return { properties };
}
}