guessit-js
Version:
GuessIt JS (WASM) - Extract metadata from video filenames with WebAssembly performance
1,596 lines (1,383 loc) • 60.1 kB
JavaScript
/**
* GuessIt JS - Bundled Version
* Generated at 2025-07-14T13:28:46.385Z
*/
// === Exceptions Module ===
/**
* Exception classes for GuessIt JS
*/
/**
* Exception raised when guessit fails to perform a guess because of an internal error
*/
class GuessItException extends Error {
constructor(inputString, options, originalError = null) {
const version = "1.0.0"; // TODO: Get from package.json
const message = [
"An internal error has occurred in guessit-js.",
"===================== Guessit Exception Report =====================",
`version=${version}`,
`string=${inputString}`,
`options=${JSON.stringify(options)}`,
"--------------------------------------------------------------------",
originalError ? originalError.stack || originalError.message : "Unknown error",
"--------------------------------------------------------------------",
"Please report at https://github.com/guessit-io/guessit/issues.",
"===================================================================="
].join('\n');
super(message);
this.name = 'GuessItException';
this.inputString = inputString;
this.options = options;
this.originalError = originalError;
// Maintains proper stack trace for where our error was thrown (only available on V8)
if (Error.captureStackTrace) {
Error.captureStackTrace(this, GuessItException);
}
}
}
/**
* Exception related to configuration
*/
class ConfigurationException extends Error {
constructor(message) {
super(message);
this.name = 'ConfigurationException';
if (Error.captureStackTrace) {
Error.captureStackTrace(this, ConfigurationException);
}
}
}
// === Configuration Module ===
/**
* Default configuration for GuessIt JS
* Ported from guessit/config/options.json
*/
const defaultConfig = {
"expected_title": [
"OSS 117",
"This is Us"
],
"allowed_countries": [
"au", "gb", "us"
],
"allowed_languages": [
"ca", "cs", "de", "en", "es", "fr", "he", "hi", "hu", "it",
"ja", "ko", "mul", "nl", "no", "pl", "pt", "ro", "ru", "sv",
"te", "uk", "und"
],
"advanced_config": {
"common_words": [
"ca", "cat", "de", "he", "it", "no", "por", "rum", "se", "st", "sub"
],
"groups": {
"starting": "([{",
"ending": ")]}"
},
"audio_codec": {
"audio_codec": {
"MP3": { "patterns": ["MP3", "LAME"], "regex": ["LAME(?:\\d)+-?(?:\\d)+"] },
"MP2": { "patterns": ["MP2"] },
"Dolby Digital": { "patterns": ["Dolby", "DolbyDigital"], "regex": ["Dolby-Digital", "DD", "AC-?3D?"] },
"Dolby Atmos": { "patterns": ["Atmos"], "regex": ["Dolby-?Atmos"] },
"AAC": { "patterns": ["AAC"] },
"Dolby Digital Plus": { "patterns": ["DDP", "DD+"], "regex": ["E-?AC-?3"] },
"FLAC": { "patterns": ["Flac"] },
"DTS": { "patterns": ["DTS"] },
"DTS-HD": { "regex": ["DTS-?HD", "DTS(?=-?MA)"] },
"DTS:X": { "patterns": ["DTS:X", "DTS-X", "DTSX"] },
"Dolby TrueHD": { "regex": ["True-?HD"] },
"Opus": { "patterns": ["Opus"] },
"Vorbis": { "patterns": ["Vorbis"] },
"PCM": { "patterns": ["PCM"] },
"LPCM": { "patterns": ["LPCM"] }
}
},
"container": {
"subtitles": ["srt", "idx", "sub", "ssa", "ass"],
"info": ["nfo"],
"videos": [
"3g2", "3gp", "3gp2", "asf", "avi", "divx", "flv", "iso", "m4v",
"mk2", "mk3d", "mka", "mkv", "mov", "mp4", "mp4a", "mpeg", "mpg",
"ogg", "ogm", "ogv", "qt", "ra", "ram", "rm", "ts", "m2ts", "vob",
"wav", "webm", "wma", "wmv"
],
"torrent": ["torrent"],
"nzb": ["nzb"]
},
"episodes": {
"season_max_range": 100,
"episode_max_range": 100,
"max_range_gap": 1,
"season_markers": ["s"],
"season_ep_markers": ["x"],
"disc_markers": ["d"],
"episode_markers": ["xe", "ex", "ep", "e", "x"],
"range_separators": ["-", "~", "to", "a"],
"discrete_separators": ["+", "&", "and", "et"],
"season_words": [
"season", "saison", "seizoen", "seasons", "saisons", "tem",
"temp", "temporada", "temporadas", "stagione"
],
"episode_words": [
"episode", "episodes", "eps", "ep", "episodio", "episodios",
"capitulo", "capitulos"
],
"of_words": ["of", "sur"],
"all_words": ["All"]
},
"language": {
"synonyms": {
"ell": ["gr", "greek"],
"spa": ["esp", "español", "espanol"],
"fra": ["français", "vf", "vff", "vfi", "vfq"],
"swe": ["se"],
"por_BR": ["po", "pb", "pob", "ptbr", "br", "brazilian"],
"deu_CH": ["swissgerman", "swiss german"],
"nld_BE": ["flemish"],
"cat": ["català", "castellano", "espanol castellano", "español castellano"],
"ces": ["cz"],
"ukr": ["ua"],
"zho": ["cn"],
"jpn": ["jp"],
"hrv": ["scr"],
"mul": ["multi", "multiple", "dl"]
}
},
"screen_size": {
"frame_rates": [
"23\\.976", "24(?:\\.0{1,3})?", "25(?:\\.0{1,3})?", "29\\.970",
"30(?:\\.0{1,3})?", "48(?:\\.0{1,3})?", "50(?:\\.0{1,3})?",
"60(?:\\.0{1,3})?", "120(?:\\.0{1,3})?"
],
"min_ar": 1.333,
"max_ar": 1.898,
"interlaced": ["360", "480", "540", "576", "900", "1080"],
"progressive": ["360", "480", "540", "576", "900", "1080", "368", "720", "1440", "2160", "4320"]
},
"source": {
"rip_prefix": "(?P<other>Rip)-?",
"rip_suffix": "-?(?P<other>Rip)"
},
"website": {
"safe_tlds": ["com", "net", "org"],
"safe_subdomains": ["www"],
"safe_prefixes": ["co", "com", "net", "org"],
"prefixes": ["from"]
}
}
};
// === Options Module ===
/**
* Options parsing and configuration management
*/
/**
* Parse command line style options string or object
* @param {string|Object|Array} options - Options to parse
* @param {boolean} api - Whether this is for API use
* @returns {Object} Parsed options object
*/
function parseOptions(options = null, api = false) {
if (typeof options === 'string') {
// Simple string parsing - in real implementation you'd want a proper CLI parser
const args = options.split(/\s+/).filter(arg => arg.length > 0);
return parseArgs(args);
} else if (options === null || options === undefined) {
return api ? {} : {};
} else if (Array.isArray(options)) {
return parseArgs(options);
} else if (typeof options === 'object') {
return { ...options };
}
return {};
}
/**
* Simple argument parser for basic CLI-style options
* @param {Array} args - Array of argument strings
* @returns {Object} Parsed options
*/
function parseArgs(args) {
const options = {};
for (let i = 0; i < args.length; i++) {
const arg = args[i];
if (arg.startsWith('--')) {
const key = arg.slice(2).replace(/-/g, '_');
if (i + 1 < args.length && !args[i + 1].startsWith('-')) {
options[key] = args[++i];
} else {
options[key] = true;
}
} else if (arg.startsWith('-') && arg.length === 2) {
const key = getShortOptionKey(arg[1]);
if (key) {
if (i + 1 < args.length && !args[i + 1].startsWith('-')) {
options[key] = args[++i];
} else {
options[key] = true;
}
}
} else if (!arg.startsWith('-')) {
// Positional argument (filename)
if (!options.filename) {
options.filename = [arg];
} else {
options.filename.push(arg);
}
}
}
return options;
}
/**
* Map short options to their full names
* @param {string} shortOpt - Single character option
* @returns {string} Full option name
*/
function getShortOptionKey(shortOpt) {
const mapping = {
't': 'type',
'n': 'name_only',
'Y': 'date_year_first',
'D': 'date_day_first',
'L': 'allowed_languages',
'C': 'allowed_countries',
'E': 'episode_prefer_number',
'T': 'expected_title',
'G': 'expected_group',
'f': 'input_file',
'v': 'verbose',
'P': 'show_property',
'a': 'advanced',
's': 'single_value',
'l': 'enforce_list',
'j': 'json',
'y': 'yaml',
'i': 'output_input_string',
'c': 'config',
'p': 'properties',
'V': 'values'
};
return mapping[shortOpt];
}
/**
* Load configuration from various sources
* @param {Object} options - Options that may specify config sources
* @returns {Object} Merged configuration
*/
function loadConfig(options = {}) {
const configurations = [];
// Load default configuration unless disabled
if (!options.no_default_config) {
configurations.push(defaultConfig);
}
// In a real implementation, you'd load from:
// - ~/.guessit/options.json
// - ~/.config/guessit/options.json
// - Custom config files from options.config
let config = {};
if (configurations.length > 0) {
config = mergeOptions(...configurations);
}
// Ensure advanced_config is always present
if (!config.advanced_config && defaultConfig.advanced_config) {
config.advanced_config = defaultConfig.advanced_config;
}
return config;
}
/**
* Merge multiple options objects
* @param {...Object} optionsArray - Multiple options objects to merge
* @returns {Object} Merged options
*/
function mergeOptions(...optionsArray) {
let merged = {};
for (const options of optionsArray) {
if (!options) continue;
const pristine = options.pristine;
if (pristine === true) {
merged = {};
} else if (Array.isArray(pristine)) {
for (const key of pristine) {
delete merged[key];
}
}
for (const [key, value] of Object.entries(options)) {
mergeOptionValue(key, value, merged);
}
}
return merged;
}
/**
* Merge a single option value into the merged object
* @param {string} option - Option key
* @param {*} value - Option value
* @param {Object} merged - Target object to merge into
*/
function mergeOptionValue(option, value, merged) {
if (value !== null && value !== undefined && option !== 'pristine') {
if (merged[option] && Array.isArray(merged[option])) {
const values = Array.isArray(value) ? value : [value];
for (const val of values) {
if (val !== null && val !== undefined && !merged[option].includes(val)) {
merged[option].push(val);
}
}
} else if (merged[option] && typeof merged[option] === 'object' && typeof value === 'object') {
merged[option] = mergeOptions(merged[option], value);
} else if (Array.isArray(value)) {
merged[option] = [...value];
} else {
merged[option] = value;
}
}
}
// === Rebulk Engine ===
/**
* JavaScript implementation of Rebulk pattern matching engine
* Simplified version focusing on the core pattern matching functionality
*/
/**
* Represents a single match found in the input string
*/
class Match {
constructor(start, end, value = null, name = null, options = {}) {
this.start = start;
this.end = end;
this.value = value !== null ? value : '';
this.name = name;
this.tags = options.tags || [];
this.private = options.private || false;
this.children = options.children || [];
this.parent = options.parent || null;
this.raw = options.raw || '';
this.initiator = options.initiator || null;
this.formatter = options.formatter || null;
this.validator = options.validator || null;
}
/**
* Get the span (start, end) of this match
*/
get span() {
return [this.start, this.end];
}
/**
* Get the length of this match
*/
get length() {
return this.end - this.start;
}
/**
* Apply formatter to the match value
*/
format() {
if (this.formatter && typeof this.formatter === 'function') {
try {
this.value = this.formatter(this.value);
} catch (error) {
// If formatting fails, keep original value
console.warn('Formatting failed for match:', this, error);
}
}
}
/**
* Validate the match
*/
validate() {
if (this.validator && typeof this.validator === 'function') {
try {
return this.validator(this);
} catch (error) {
console.warn('Validation failed for match:', this, error);
return false;
}
}
return true;
}
/**
* Split this match using separators
*/
split(separators, valueFunction = null) {
const parts = [];
let currentStart = this.start;
for (let i = this.start; i < this.end; i++) {
const char = this.raw[i - this.start];
if (separators.includes(char)) {
if (currentStart < i) {
const part = new Match(currentStart, i);
part.raw = this.raw.slice(currentStart - this.start, i - this.start);
part.value = valueFunction ? valueFunction(part) : part.raw;
parts.push(part);
}
currentStart = i + 1;
}
}
// Add final part
if (currentStart < this.end) {
const part = new Match(currentStart, this.end);
part.raw = this.raw.slice(currentStart - this.start);
part.value = valueFunction ? valueFunction(part) : part.raw;
parts.push(part);
}
return parts;
}
}
/**
* Collection of matches with utility methods
*/
class Matches {
constructor(inputString = '') {
this.inputString = inputString;
this.matches = [];
this.markers = new Markers();
}
/**
* Add a match to the collection
*/
add(match) {
if (match instanceof Match) {
this.matches.push(match);
}
}
/**
* Get matches by name
*/
named(name, predicate = null) {
const filtered = this.matches.filter(match => match.name === name);
return predicate ? filtered.filter(predicate) : filtered;
}
/**
* Get matches with specific tags
*/
tagged(tag, predicate = null) {
const filtered = this.matches.filter(match => match.tags.includes(tag));
return predicate ? filtered.filter(predicate) : filtered;
}
/**
* Get matches in a specific range
*/
range(start, end, predicate = null, index = null) {
let filtered = this.matches.filter(match =>
match.start >= start && match.end <= end
);
if (predicate) {
filtered = filtered.filter(predicate);
}
if (index !== null) {
return filtered[index] || null;
}
return filtered;
}
/**
* Get previous match
*/
previous(match, predicate = null, index = 0) {
let candidates = this.matches.filter(m => m.end <= match.start);
if (predicate) {
candidates = candidates.filter(predicate);
}
candidates.sort((a, b) => b.end - a.end); // Sort by end position descending
return candidates[index] || null;
}
/**
* Get next match
*/
next(match, predicate = null, index = 0) {
let candidates = this.matches.filter(m => m.start >= match.end);
if (predicate) {
candidates = candidates.filter(predicate);
}
candidates.sort((a, b) => a.start - b.start); // Sort by start position ascending
return candidates[index] || null;
}
/**
* Find holes (unmatched parts) in the input string
*/
holes(start, end, options = {}) {
const holes = [];
const rangeMatches = this.range(start, end).sort((a, b) => a.start - b.start);
let currentPos = start;
for (const match of rangeMatches) {
if (match.start > currentPos) {
const hole = new Match(currentPos, match.start);
hole.raw = this.inputString.slice(currentPos, match.start);
hole.value = hole.raw;
holes.push(hole);
}
currentPos = Math.max(currentPos, match.end);
}
// Final hole
if (currentPos < end) {
const hole = new Match(currentPos, end);
hole.raw = this.inputString.slice(currentPos, end);
hole.value = hole.raw;
holes.push(hole);
}
return holes;
}
/**
* Convert matches to dictionary format
*/
toDict(advanced = false, singleValue = false, enforceList = false) {
const result = {};
const propertyGroups = {};
// Group matches by property name
for (const match of this.matches) {
if (match.private) continue;
if (!propertyGroups[match.name]) {
propertyGroups[match.name] = [];
}
propertyGroups[match.name].push(match.value);
}
// Process each property
for (const [property, values] of Object.entries(propertyGroups)) {
// Remove duplicates while preserving order
const uniqueValues = [...new Set(values)];
if (singleValue && uniqueValues.length > 0) {
result[property] = uniqueValues[0];
} else if (enforceList || uniqueValues.length > 1) {
result[property] = uniqueValues;
} else if (uniqueValues.length === 1) {
result[property] = uniqueValues[0];
}
}
// Post-process: flatten season_episode objects into separate season and episode properties
if (result.season_episode && typeof result.season_episode === 'object') {
const seasonEpisode = result.season_episode;
if (seasonEpisode.season !== undefined) {
result.season = seasonEpisode.season;
}
if (seasonEpisode.episode !== undefined) {
result.episode = seasonEpisode.episode;
}
// Keep the season_episode object as well for backwards compatibility
}
return result;
}
}
/**
* Markers for structural elements like groups and paths
*/
class Markers {
constructor() {
this.markerList = [];
}
/**
* Add a marker
*/
add(marker) {
this.markerList.push(marker);
}
/**
* Get markers by name
*/
named(name) {
return this.markerList.filter(marker => marker.name === name);
}
/**
* Get marker at specific match position
*/
atMatch(match, predicate = null, index = 0) {
let candidates = this.markerList.filter(marker =>
marker.start <= match.start && marker.end >= match.end
);
if (predicate) {
candidates = candidates.filter(predicate);
}
return candidates[index] || null;
}
/**
* Get markers starting at position
*/
starting(position, predicate = null) {
let candidates = this.markerList.filter(marker => marker.start === position);
if (predicate) {
candidates = candidates.filter(predicate);
}
return candidates;
}
}
/**
* Rule for pattern matching
*/
class Rule {
constructor(pattern, options = {}) {
this.pattern = pattern;
this.name = options.name || null;
this.value = options.value || null;
this.tags = options.tags || [];
this.formatter = options.formatter || null;
this.validator = options.validator || null;
this.private = options.private || false;
this.children = options.children || false;
this.conflictSolver = options.conflictSolver || null;
}
/**
* Apply this rule to input string
*/
apply(inputString, matches, options = {}) {
let regex;
if (this.pattern instanceof RegExp) {
regex = this.pattern;
} else if (typeof this.pattern === 'string') {
// Handle case insensitive matching
const flags = options.ignoreCase ? 'gi' : 'g';
regex = new RegExp(this.pattern, flags);
} else {
return [];
}
// Debug logging (remove in production)
const isDebugging = (typeof process !== 'undefined' && process.env && process.env.DEBUG_RULES === 'true') || false;
if (isDebugging && this.name === 'container') {
console.log(`[DEBUG] Applying ${this.name} rule with pattern ${regex} to "${inputString}"`);
}
const newMatches = [];
let match;
let lastIndex = 0;
let iterations = 0;
const maxIterations = 1000; // Prevent infinite loops
while ((match = regex.exec(inputString)) !== null && iterations < maxIterations) {
iterations++;
// Prevent infinite loop on zero-length matches
if (match.index === lastIndex && match[0].length === 0) {
regex.lastIndex = lastIndex + 1;
continue;
}
lastIndex = match.index + match[0].length;
const matchObj = new Match(
match.index,
match.index + match[0].length,
this.value || match[0],
this.name,
{
tags: [...this.tags],
private: this.private,
raw: match[0],
formatter: this.formatter,
validator: this.validator
}
);
// Debug logging
if (isDebugging && this.name === 'container' && match) {
console.log(`[DEBUG] Found match: ${JSON.stringify(match)} -> matchObj: ${JSON.stringify({start: matchObj.start, end: matchObj.end, name: matchObj.name, value: matchObj.value})}`);
}
// Apply formatting
matchObj.format();
// Validate
const isValid = matchObj.validate();
if (isDebugging && this.name === 'container' && match) {
console.log(`[DEBUG] Validation result: ${isValid}`);
}
if (isValid) {
newMatches.push(matchObj);
if (isDebugging && this.name === 'container') {
console.log(`[DEBUG] Added match to newMatches, total: ${newMatches.length}`);
}
}
// If regex doesn't have global flag, break after first match
if (!regex.global) {
break;
}
}
if (isDebugging && this.name === 'container') {
console.log(`[DEBUG] Returning ${newMatches.length} matches from ${this.name} rule`);
}
return newMatches;
}
}
/**
* Main Rebulk class - coordinates pattern matching
*/
class Rebulk {
constructor(options = {}) {
this.rules = [];
this.options = {
ignoreCase: options.ignoreCase || false,
...options
};
}
/**
* Add rules to this Rebulk instance
*/
addRules(rules) {
if (Array.isArray(rules)) {
this.rules.push(...rules);
} else if (rules) {
this.rules.push(rules);
}
}
/**
* Add a string pattern rule
*/
string(pattern, options = {}) {
const rule = new Rule(pattern, options);
this.rules.push(rule);
return this;
}
/**
* Add a regex pattern rule
*/
regex(pattern, options = {}) {
const rule = new Rule(new RegExp(pattern, this.options.ignoreCase ? 'gi' : 'g'), options);
this.rules.push(rule);
return this;
}
/**
* Apply all rules to input string and return matches
*/
matches(inputString, options = {}) {
const matches = new Matches(inputString);
const mergedOptions = { ...this.options, ...options };
// Add path markers (simplified)
this.addPathMarkers(matches, inputString);
// Apply all rules
for (const rule of this.rules) {
const ruleMatches = rule.apply(inputString, matches, mergedOptions);
const isDebugging = (typeof process !== 'undefined' && process.env && process.env.DEBUG_RULES === 'true') || false;
if (isDebugging && rule.name === 'container' && ruleMatches.length > 0) {
console.log(`[DEBUG] Rule ${rule.name} returned ${ruleMatches.length} matches`);
}
for (const match of ruleMatches) {
matches.add(match);
if (isDebugging && rule.name === 'container') {
console.log(`[DEBUG] Added match to collection, total matches: ${matches.matches.length}`);
}
}
}
// Post-process matches (remove conflicts, apply final formatting, etc.)
this.postProcessMatches(matches);
return matches;
}
/**
* Add basic path markers for file structure
*/
addPathMarkers(matches, inputString) {
// Split by common path separators and file extensions
const pathSeparators = /[\/\\]/g;
const parts = inputString.split(pathSeparators);
let currentPos = 0;
for (let i = 0; i < parts.length; i++) {
const part = parts[i];
if (part.length > 0) {
const marker = new Match(currentPos, currentPos + part.length);
marker.name = 'path';
marker.private = true;
matches.markers.add(marker);
}
currentPos += part.length + 1; // +1 for separator
}
}
/**
* Post-process matches to resolve conflicts and clean up
*/
postProcessMatches(matches) {
const isDebugging = (typeof process !== 'undefined' && process.env && process.env.DEBUG_RULES === 'true') || false;
if (isDebugging) {
console.log(`[DEBUG] Post-processing ${matches.matches.length} matches`);
matches.matches.forEach((match, i) => {
console.log(`[DEBUG] ${i}: ${match.start}-${match.end} "${match.name}": "${match.value}" (private: ${match.private})`);
});
}
// Separate private and non-private matches
const privateMatches = matches.matches.filter(m => m.private);
const publicMatches = matches.matches.filter(m => !m.private);
if (isDebugging) {
console.log(`[DEBUG] Separated into ${privateMatches.length} private and ${publicMatches.length} public matches`);
}
// Only resolve conflicts among non-private matches
// Sort matches by start position
publicMatches.sort((a, b) => a.start - b.start || (b.end - b.start) - (a.end - a.start));
// Smart conflict resolution - prioritize specific matches over generic ones
const getMatchPriority = (match) => {
// Higher number = higher priority
const priorities = {
'container': 100,
'video_codec': 90,
'audio_codec': 90,
'source': 80,
'screen_size': 80,
'year': 70,
'episode': 60,
'season': 60,
'title': 10, // Title should have low priority as it's often very broad
'cleanup': 5,
'path': 1
};
return priorities[match.name] || 50; // Default priority for unknown types
};
const filtered = [];
for (const match of publicMatches) {
const overlapping = filtered.filter(existing =>
!(match.end <= existing.start || match.start >= existing.end)
);
if (overlapping.length === 0) {
filtered.push(match);
if (isDebugging) {
console.log(`[DEBUG] Keeping non-overlapping match: ${match.name} (${match.start}-${match.end})`);
}
} else {
if (isDebugging) {
console.log(`[DEBUG] Found ${overlapping.length} overlapping matches for ${match.name} (${match.start}-${match.end})`);
}
const currentPriority = getMatchPriority(match);
let shouldReplace = false;
let toReplace = [];
for (const existing of overlapping) {
const existingPriority = getMatchPriority(existing);
if (currentPriority > existingPriority) {
shouldReplace = true;
toReplace.push(existing);
} else if (currentPriority === existingPriority && match.length > existing.length) {
// Same priority, prefer longer match
shouldReplace = true;
toReplace.push(existing);
}
}
if (shouldReplace) {
// Remove all overlapping matches with lower priority
for (const existing of toReplace) {
const index = filtered.indexOf(existing);
if (index !== -1) {
filtered.splice(index, 1);
}
}
filtered.push(match);
if (isDebugging) {
console.log(`[DEBUG] Replaced ${toReplace.length} lower priority matches with ${match.name} (priority: ${currentPriority})`);
}
} else {
if (isDebugging) {
console.log(`[DEBUG] Discarding ${match.name} (priority: ${currentPriority}) in favor of higher priority matches`);
}
}
}
}
// Combine filtered public matches with all private matches
const finalMatches = [...filtered, ...privateMatches];
if (isDebugging) {
console.log(`[DEBUG] After post-processing: ${finalMatches.length} matches (${filtered.length} public + ${privateMatches.length} private)`);
finalMatches.forEach((match, i) => {
console.log(`[DEBUG] ${i}: ${match.start}-${match.end} "${match.name}": "${match.value}" (private: ${match.private})`);
});
}
matches.matches = finalMatches;
}
/**
* Introspect the rebulk to get available properties
*/
introspect(options = {}) {
const properties = {};
for (const rule of this.rules) {
if (rule.name && !rule.private) {
if (!properties[rule.name]) {
properties[rule.name] = new Set();
}
if (rule.value) {
properties[rule.name].add(rule.value);
}
}
}
return { properties };
}
}
// === Rule Modules ===
/**
* Episode and season detection rules
*/
function episodeRules(config) {
const rules = [];
const seasonMarkers = config.season_markers || ['s'];
const episodeMarkers = config.episode_markers || ['e', 'ep', 'x'];
const rangeSeparators = config.range_separators || ['-', '~', 'to'];
const discreteSeparators = config.discrete_separators || ['+', '&', 'and'];
// SxxExx patterns (S01E02, S01E02, 1x02, etc.)
rules.push(new Rule(
/([Ss])(\d{1,2})[\s\-\.]*([Ee])(\d{1,3})/g,
{
name: 'season_episode',
formatter: (value) => {
const match = value.match(/([Ss])(\d{1,2})[\s\-\.]*([Ee])(\d{1,3})/);
if (match) {
return {
season: parseInt(match[2], 10),
episode: parseInt(match[4], 10)
};
}
return value;
},
tags: ['SxxExx']
}
));
// NxNN patterns (1x02, 2x10, etc.)
rules.push(new Rule(
/(\d{1,2})x(\d{1,3})/gi,
{
name: 'season_episode',
formatter: (value) => {
const match = value.match(/(\d{1,2})x(\d{1,3})/i);
if (match) {
return {
season: parseInt(match[1], 10),
episode: parseInt(match[2], 10)
};
}
return value;
},
tags: ['NxNN']
}
));
// Season only patterns (S02, etc.)
rules.push(new Rule(
/[Ss](\d{1,2})/g,
{
name: 'season',
formatter: (value) => {
const match = value.match(/(\d+)/);
return match ? parseInt(match[1], 10) : value;
},
tags: ['season-only']
}
));
// Episode only patterns (E02, etc.)
rules.push(new Rule(
/[Ee](\d{1,3})/g,
{
name: 'episode',
formatter: (value) => {
const match = value.match(/(\d+)/);
return match ? parseInt(match[1], 10) : value;
},
tags: ['episode-only']
}
));
// Episode words (Episode 5, Episodio 3, etc.)
rules.push(new Rule(
/\b(?:episode|episodes)\s*(\d{1,3})\b/gi,
{
name: 'episode',
formatter: (value) => {
const match = value.match(/(\d+)/);
return match ? parseInt(match[1], 10) : value;
},
tags: ['episode-word']
}
));
// Season words (Season 1, Temporada 2, etc.)
rules.push(new Rule(
/\b(?:season|seasons)\s*(\d{1,2})\b/gi,
{
name: 'season',
formatter: (value) => {
const match = value.match(/(\d+)/);
return match ? parseInt(match[1], 10) : value;
},
tags: ['season-word']
}
));
// Episode details
const episodeDetails = ['Special', 'Pilot', 'Unaired', 'Final'];
for (const detail of episodeDetails) {
rules.push(new Rule(
detail,
{
name: 'episode_details',
value: detail,
tags: ['episode-detail']
}
));
}
return rules;
}
/**
* Title detection rules
*/
function titleRules(config) {
const rules = [];
// Handle unicode and bracketed titles: [unicode].Title.year.etc
rules.push(new Rule(
/^(?:\[[^\]]*\]\.?)([a-zA-Z\u00C0-\u017F\u4e00-\u9fff][a-zA-Z0-9\u00C0-\u017F\u4e00-\u9fff\s\-\.\'\:]*?)(?=[\.\s\-](19|20)\d{2}|[\.\s\-]\d{3,4}p|[\.\s\-](?:bluray|hdtv|web|dvd|cam|x264|x265|h264|h265|xvid|divx|dts|aac|mkv|avi|mp4|french|english|german|spanish|italian))/gi,
{
name: 'title',
formatter: (value) => {
return value
.replace(/[\.\-_]/g, ' ')
.replace(/\s+/g, ' ')
.trim();
},
validator: (match) => {
const value = match.value.trim();
return value.length >= 2 && !/^\d+$/.test(value);
},
tags: ['title-unicode']
}
));
// Standard title pattern for regular filenames
rules.push(new Rule(
/^([a-zA-Z\u00C0-\u017F\u4e00-\u9fff][a-zA-Z0-9\u00C0-\u017F\u4e00-\u9fff\s\-\.\'\:]*?)(?=[\.\s\-](19|20)\d{2}|[\.\s\-]\d{3,4}p|[\.\s\-](?:bluray|hdtv|web|dvd|cam|x264|x265|h264|h265|xvid|divx|dts|aac|mkv|avi|mp4|french|english|german|spanish|italian))/gi,
{
name: 'title',
formatter: (value) => {
return value
.replace(/[\.\-_]/g, ' ')
.replace(/\s+/g, ' ')
.trim();
},
validator: (match) => {
const value = match.value.trim();
return value.length >= 3 && !/^\d+$/.test(value);
},
tags: ['title']
}
));
// Fallback: extract word sequences from anywhere in the filename
rules.push(new Rule(
/([a-zA-Z\u00C0-\u017F\u4e00-\u9fff][a-zA-Z0-9\u00C0-\u017F\u4e00-\u9fff\s]+[a-zA-Z0-9\u00C0-\u017F\u4e00-\u9fff])/g,
{
name: 'title',
formatter: (value) => {
return value
.replace(/[\.\-_]/g, ' ')
.replace(/\s+/g, ' ')
.trim();
},
validator: (match) => {
const value = match.value.trim();
// Avoid common technical terms
const skipTerms = ['bluray', 'hdtv', 'web', 'dvd', 'x264', 'x265', 'h264', 'h265', 'xvid', 'divx', 'aac', 'dts', 'french', 'english'];
return value.length >= 4 && !/^\d+$/.test(value) && !skipTerms.includes(value.toLowerCase());
},
tags: ['title-fallback'],
private: false
}
));
return rules;
}
/**
* Video codec detection rules
*/
function videoCodecRules(config) {
const rules = [];
const videoCodecs = {
'H.264': ['h264', 'h.264', 'x264', 'avc'],
'H.265': ['h265', 'h.265', 'x265', 'hevc'],
'XviD': ['xvid'],
'DivX': ['divx'],
'VP9': ['vp9'],
'AV1': ['av1'],
'MPEG-2': ['mpeg2', 'mpeg-2'],
'VC-1': ['vc1', 'vc-1'],
'WMV': ['wmv']
};
for (const [codecName, patterns] of Object.entries(videoCodecs)) {
for (const pattern of patterns) {
rules.push(new Rule(
new RegExp(`\\b${pattern}\\b`, 'i'),
{
name: 'video_codec',
value: codecName,
tags: ['video-codec']
}
));
}
}
return rules;
}
/**
* Screen size/resolution detection rules
*/
function screenSizeRules(config) {
const rules = [];
// Common resolution patterns
const resolutions = {
'240p': ['240p'],
'360p': ['360p'],
'480p': ['480p', 'sd'],
'720p': ['720p', 'hd'],
'1080p': ['1080p', '1080i', 'fhd', 'fullhd', 'full hd'],
'1440p': ['1440p', '2k'],
'2160p': ['2160p', '4k', 'uhd', 'ultra hd'],
'4320p': ['4320p', '8k']
};
for (const [size, patterns] of Object.entries(resolutions)) {
for (const pattern of patterns) {
rules.push(new Rule(
new RegExp(`\\b${pattern}\\b`, 'i'),
{
name: 'screen_size',
value: size,
tags: ['resolution']
}
));
}
}
// Width x Height patterns (1920x1080, 1280x720, etc.)
rules.push(new Rule(
/(\d{3,4})x(\d{3,4})/gi,
{
name: 'screen_size',
formatter: (value) => {
const match = value.match(/(\d{3,4})x(\d{3,4})/i);
if (match) {
const width = parseInt(match[1], 10);
const height = parseInt(match[2], 10);
// Map common resolutions
if (width === 1920 && height === 1080) return '1080p';
if (width === 1280 && height === 720) return '720p';
if (width === 3840 && height === 2160) return '2160p';
if (width === 2560 && height === 1440) return '1440p';
return `${width}x${height}`;
}
return value;
},
tags: ['resolution', 'dimensions']
}
));
return rules;
}
/**
* Container/file extension detection rules
*/
function containerRules(config) {
const rules = [];
const containers = {
videos: config.videos || [
'3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'iso', 'm4v',
'mk2', 'mk3d', 'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg',
'ogg', 'ogm', 'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'm2ts', 'vob',
'wav', 'webm', 'wma', 'wmv'
],
subtitles: config.subtitles || ['srt', 'idx', 'sub', 'ssa', 'ass'],
info: config.info || ['nfo'],
torrent: config.torrent || ['torrent'],
nzb: config.nzb || ['nzb']
};
for (const [type, extensions] of Object.entries(containers)) {
for (const ext of extensions) {
rules.push(new Rule(
new RegExp(`\\.${ext}$`, 'i'),
{
name: 'container',
value: ext.toLowerCase(),
tags: ['container', type]
}
));
}
}
return rules;
}
/**
* Source detection rules (BluRay, HDTV, WEB, etc.)
*/
function sourceRules(config) {
const rules = [];
const sources = {
'BluRay': ['bluray'],
'Blu-ray': ['blu-ray', 'bdrip', 'brrip'],
'HD-DVD': ['hddvd', 'hd-dvd'],
'HDTV': ['hdtv'],
'WEB': ['web', 'webrip', 'web-dl', 'webdl'],
'DVD': ['dvd', 'dvdrip'],
'CAM': ['cam', 'camrip'],
'Telesync': ['ts', 'telesync'],
'Telecine': ['tc', 'telecine'],
'Screener': ['scr', 'screener'],
'VHS': ['vhs']
};
for (const [sourceName, patterns] of Object.entries(sources)) {
for (const pattern of patterns) {
rules.push(new Rule(
new RegExp(`\\b${pattern}\\b`, 'i'),
{
name: 'source',
value: sourceName,
tags: ['source']
}
));
}
}
return rules;
}
/**
* Audio codec detection rules
*/
function audioCodecRules(config) {
const rules = [];
const audioCodecs = {
'DTS': ['dts'],
'DTS-HD': ['dts-hd', 'dtshd'],
'DTS:X': ['dts:x', 'dts-x', 'dtsx'],
'Dolby Digital': ['dd', 'ac3', 'dolby'],
'Dolby Digital Plus': ['dd+', 'ddp', 'e-ac3'],
'Dolby Atmos': ['atmos'],
'Dolby TrueHD': ['truehd', 'true-hd'],
'AAC': ['aac'],
'MP3': ['mp3'],
'FLAC': ['flac'],
'PCM': ['pcm'],
'LPCM': ['lpcm']
};
for (const [codecName, patterns] of Object.entries(audioCodecs)) {
for (const pattern of patterns) {
rules.push(new Rule(
new RegExp(`\\b${pattern}\\b`, 'i'),
{
name: 'audio_codec',
value: codecName,
tags: ['audio-codec']
}
));
}
}
return rules;
}
/**
* Stub implementations for remaining property modules
*/
// Stub implementations that return empty rules arrays
function websiteRules(config) { return []; }
function dateRules(config) {
const rules = [];
// Year detection - matches 4-digit years between 1900-2099
rules.push(new Rule(
/\b(19[0-9]{2}|20[0-9]{2})\b/g,
{
name: 'year',
formatter: (value) => parseInt(value, 10),
validator: (match) => {
const year = parseInt(match.value, 10);
return year >= 1900 && year <= 2099;
},
tags: ['date']
}
));
return rules;
}
function episodeTitleRules(config) { return []; }
function languageRules(config, commonWords) { return []; }
function countryRules(config, commonWords) { return []; }
function releaseGroupRules(config) {
const rules = [];
// Release groups before file extensions: match just the group name between dash and dot
rules.push(new Rule(
/(?<=-)[A-Z0-9]+(?=\.(?:mkv|avi|mp4|mov|wmv|flv|webm|m4v|3gp|ts|m2ts|vob|iso|img|bin|mdf|nrg|cue|rar|zip|7z|tar|gz|bz2|xz)$)/gi,
{
name: 'release_group',
validator: (match) => {
const group = match.value;
const excludeWords = ['REPACK', 'PROPER', 'REAL', 'FINAL', 'COMPLETE', 'UNCUT', 'EXTENDED', 'DIRECTORS', 'CUT'];
return group.length >= 2 && group.length <= 20 && !excludeWords.includes(group.toUpperCase());
},
tags: ['release-group']
}
));
// Bracketed release groups [GROUP] or (GROUP) - but exclude years
rules.push(new Rule(
/[\[\(]([A-Z0-9\-_.]+)[\]\)]/gi,
{
name: 'release_group',
formatter: (value) => {
const match = value.match(/[\[\(]([A-Z0-9\-_.]+)[\]\)]/i);
return match ? match[1] : value;
},
validator: (match) => {
const group = match.value;
// Exclude years (1900-2099) and short numeric sequences
if (/^(19|20)\d{2}$/.test(group)) return false;
if (/^\d{1,4}$/.test(group)) return false;
return group.length >= 2 && group.length <= 20;
},
tags: ['release-group-bracket']
}
));
return rules;
}
function streamingServiceRules(config) { return []; }
function otherRules(config) { return []; }
function sizeRules(config) { return []; }
function bitRateRules(config) { return []; }
function editionRules(config) { return []; }
function cdRules(config) { return []; }
function bonusRules(config) { return []; }
function filmRules(config) { return []; }
function partRules(config) { return []; }
function crcRules(config) { return []; }
function mimetypeRules(config) { return []; }
function typeRules(config) { return []; }
/**
* Path structure markers
*/
function pathRules(config) {
const rules = [];
// Basic path structure detection
rules.push(new Rule(
/([^\/\\]+)/g,
{
name: 'path',
private: true,
tags: ['path-segment']
}
));
return rules;
}
/**
* Group markers for content in brackets, parentheses, etc.
*/
function groupRules(config) {
const rules = [];
const starting = config.starting || '([{';
const ending = config.ending || ')]}';
// Create pairs of opening/closing characters
const pairs = [];
for (let i = 0; i < Math.min(starting.length, ending.length); i++) {
pairs.push([starting[i], ending[i]]);
}
for (const [open, close] of pairs) {
rules.push(new Rule(
new RegExp(`\\${open}([^\\${open}\\${close}]+)\\${close}`, 'g'),
{
name: 'group',
private: true,
tags: ['group-marker']
}
));
}
return rules;
}
/**
* Post-processing rules
*/
function processorsRules(config) {
const rules = [];
// Basic cleanup and validation rules
rules.push(new Rule(
/.*/,
{
name: 'cleanup',
private: true,
processor: true,
apply: (matches) => {
// Remove duplicate matches
const seen = new Set();
matches.matches = matches.matches.filter(match => {
const key = `${match.name}-${match.start}-${match.end}`;
if (seen.has(key)) {
return false;
}
seen.add(key);
return true;
});
return matches;
}
}
));
return rules;
}
// === Rules Builder ===
/**
* Main rules builder - JavaScript port of rebulk pattern matching
*/
// Import rule modules
/**
* Main rebulk builder function
* @param {Object} config - Configuration object
* @returns {Rebulk} Configured Rebulk instance
*/
function RebulkBuilder(config) {
function getConfig(name)