UNPKG

guessit-js

Version:

GuessIt JS (WASM) - Extract metadata from video filenames with WebAssembly performance

1,596 lines (1,383 loc) 60.1 kB
/** * GuessIt JS - Bundled Version * Generated at 2025-07-14T13:28:46.385Z */ // === Exceptions Module === /** * Exception classes for GuessIt JS */ /** * Exception raised when guessit fails to perform a guess because of an internal error */ class GuessItException extends Error { constructor(inputString, options, originalError = null) { const version = "1.0.0"; // TODO: Get from package.json const message = [ "An internal error has occurred in guessit-js.", "===================== Guessit Exception Report =====================", `version=${version}`, `string=${inputString}`, `options=${JSON.stringify(options)}`, "--------------------------------------------------------------------", originalError ? originalError.stack || originalError.message : "Unknown error", "--------------------------------------------------------------------", "Please report at https://github.com/guessit-io/guessit/issues.", "====================================================================" ].join('\n'); super(message); this.name = 'GuessItException'; this.inputString = inputString; this.options = options; this.originalError = originalError; // Maintains proper stack trace for where our error was thrown (only available on V8) if (Error.captureStackTrace) { Error.captureStackTrace(this, GuessItException); } } } /** * Exception related to configuration */ class ConfigurationException extends Error { constructor(message) { super(message); this.name = 'ConfigurationException'; if (Error.captureStackTrace) { Error.captureStackTrace(this, ConfigurationException); } } } // === Configuration Module === /** * Default configuration for GuessIt JS * Ported from guessit/config/options.json */ const defaultConfig = { "expected_title": [ "OSS 117", "This is Us" ], "allowed_countries": [ "au", "gb", "us" ], "allowed_languages": [ "ca", "cs", "de", "en", "es", "fr", "he", "hi", "hu", "it", "ja", "ko", "mul", "nl", "no", "pl", "pt", "ro", "ru", "sv", "te", "uk", "und" ], "advanced_config": { "common_words": [ "ca", "cat", "de", "he", "it", "no", "por", "rum", "se", "st", "sub" ], "groups": { "starting": "([{", "ending": ")]}" }, "audio_codec": { "audio_codec": { "MP3": { "patterns": ["MP3", "LAME"], "regex": ["LAME(?:\\d)+-?(?:\\d)+"] }, "MP2": { "patterns": ["MP2"] }, "Dolby Digital": { "patterns": ["Dolby", "DolbyDigital"], "regex": ["Dolby-Digital", "DD", "AC-?3D?"] }, "Dolby Atmos": { "patterns": ["Atmos"], "regex": ["Dolby-?Atmos"] }, "AAC": { "patterns": ["AAC"] }, "Dolby Digital Plus": { "patterns": ["DDP", "DD+"], "regex": ["E-?AC-?3"] }, "FLAC": { "patterns": ["Flac"] }, "DTS": { "patterns": ["DTS"] }, "DTS-HD": { "regex": ["DTS-?HD", "DTS(?=-?MA)"] }, "DTS:X": { "patterns": ["DTS:X", "DTS-X", "DTSX"] }, "Dolby TrueHD": { "regex": ["True-?HD"] }, "Opus": { "patterns": ["Opus"] }, "Vorbis": { "patterns": ["Vorbis"] }, "PCM": { "patterns": ["PCM"] }, "LPCM": { "patterns": ["LPCM"] } } }, "container": { "subtitles": ["srt", "idx", "sub", "ssa", "ass"], "info": ["nfo"], "videos": [ "3g2", "3gp", "3gp2", "asf", "avi", "divx", "flv", "iso", "m4v", "mk2", "mk3d", "mka", "mkv", "mov", "mp4", "mp4a", "mpeg", "mpg", "ogg", "ogm", "ogv", "qt", "ra", "ram", "rm", "ts", "m2ts", "vob", "wav", "webm", "wma", "wmv" ], "torrent": ["torrent"], "nzb": ["nzb"] }, "episodes": { "season_max_range": 100, "episode_max_range": 100, "max_range_gap": 1, "season_markers": ["s"], "season_ep_markers": ["x"], "disc_markers": ["d"], "episode_markers": ["xe", "ex", "ep", "e", "x"], "range_separators": ["-", "~", "to", "a"], "discrete_separators": ["+", "&", "and", "et"], "season_words": [ "season", "saison", "seizoen", "seasons", "saisons", "tem", "temp", "temporada", "temporadas", "stagione" ], "episode_words": [ "episode", "episodes", "eps", "ep", "episodio", "episodios", "capitulo", "capitulos" ], "of_words": ["of", "sur"], "all_words": ["All"] }, "language": { "synonyms": { "ell": ["gr", "greek"], "spa": ["esp", "español", "espanol"], "fra": ["français", "vf", "vff", "vfi", "vfq"], "swe": ["se"], "por_BR": ["po", "pb", "pob", "ptbr", "br", "brazilian"], "deu_CH": ["swissgerman", "swiss german"], "nld_BE": ["flemish"], "cat": ["català", "castellano", "espanol castellano", "español castellano"], "ces": ["cz"], "ukr": ["ua"], "zho": ["cn"], "jpn": ["jp"], "hrv": ["scr"], "mul": ["multi", "multiple", "dl"] } }, "screen_size": { "frame_rates": [ "23\\.976", "24(?:\\.0{1,3})?", "25(?:\\.0{1,3})?", "29\\.970", "30(?:\\.0{1,3})?", "48(?:\\.0{1,3})?", "50(?:\\.0{1,3})?", "60(?:\\.0{1,3})?", "120(?:\\.0{1,3})?" ], "min_ar": 1.333, "max_ar": 1.898, "interlaced": ["360", "480", "540", "576", "900", "1080"], "progressive": ["360", "480", "540", "576", "900", "1080", "368", "720", "1440", "2160", "4320"] }, "source": { "rip_prefix": "(?P<other>Rip)-?", "rip_suffix": "-?(?P<other>Rip)" }, "website": { "safe_tlds": ["com", "net", "org"], "safe_subdomains": ["www"], "safe_prefixes": ["co", "com", "net", "org"], "prefixes": ["from"] } } }; // === Options Module === /** * Options parsing and configuration management */ /** * Parse command line style options string or object * @param {string|Object|Array} options - Options to parse * @param {boolean} api - Whether this is for API use * @returns {Object} Parsed options object */ function parseOptions(options = null, api = false) { if (typeof options === 'string') { // Simple string parsing - in real implementation you'd want a proper CLI parser const args = options.split(/\s+/).filter(arg => arg.length > 0); return parseArgs(args); } else if (options === null || options === undefined) { return api ? {} : {}; } else if (Array.isArray(options)) { return parseArgs(options); } else if (typeof options === 'object') { return { ...options }; } return {}; } /** * Simple argument parser for basic CLI-style options * @param {Array} args - Array of argument strings * @returns {Object} Parsed options */ function parseArgs(args) { const options = {}; for (let i = 0; i < args.length; i++) { const arg = args[i]; if (arg.startsWith('--')) { const key = arg.slice(2).replace(/-/g, '_'); if (i + 1 < args.length && !args[i + 1].startsWith('-')) { options[key] = args[++i]; } else { options[key] = true; } } else if (arg.startsWith('-') && arg.length === 2) { const key = getShortOptionKey(arg[1]); if (key) { if (i + 1 < args.length && !args[i + 1].startsWith('-')) { options[key] = args[++i]; } else { options[key] = true; } } } else if (!arg.startsWith('-')) { // Positional argument (filename) if (!options.filename) { options.filename = [arg]; } else { options.filename.push(arg); } } } return options; } /** * Map short options to their full names * @param {string} shortOpt - Single character option * @returns {string} Full option name */ function getShortOptionKey(shortOpt) { const mapping = { 't': 'type', 'n': 'name_only', 'Y': 'date_year_first', 'D': 'date_day_first', 'L': 'allowed_languages', 'C': 'allowed_countries', 'E': 'episode_prefer_number', 'T': 'expected_title', 'G': 'expected_group', 'f': 'input_file', 'v': 'verbose', 'P': 'show_property', 'a': 'advanced', 's': 'single_value', 'l': 'enforce_list', 'j': 'json', 'y': 'yaml', 'i': 'output_input_string', 'c': 'config', 'p': 'properties', 'V': 'values' }; return mapping[shortOpt]; } /** * Load configuration from various sources * @param {Object} options - Options that may specify config sources * @returns {Object} Merged configuration */ function loadConfig(options = {}) { const configurations = []; // Load default configuration unless disabled if (!options.no_default_config) { configurations.push(defaultConfig); } // In a real implementation, you'd load from: // - ~/.guessit/options.json // - ~/.config/guessit/options.json // - Custom config files from options.config let config = {}; if (configurations.length > 0) { config = mergeOptions(...configurations); } // Ensure advanced_config is always present if (!config.advanced_config && defaultConfig.advanced_config) { config.advanced_config = defaultConfig.advanced_config; } return config; } /** * Merge multiple options objects * @param {...Object} optionsArray - Multiple options objects to merge * @returns {Object} Merged options */ function mergeOptions(...optionsArray) { let merged = {}; for (const options of optionsArray) { if (!options) continue; const pristine = options.pristine; if (pristine === true) { merged = {}; } else if (Array.isArray(pristine)) { for (const key of pristine) { delete merged[key]; } } for (const [key, value] of Object.entries(options)) { mergeOptionValue(key, value, merged); } } return merged; } /** * Merge a single option value into the merged object * @param {string} option - Option key * @param {*} value - Option value * @param {Object} merged - Target object to merge into */ function mergeOptionValue(option, value, merged) { if (value !== null && value !== undefined && option !== 'pristine') { if (merged[option] && Array.isArray(merged[option])) { const values = Array.isArray(value) ? value : [value]; for (const val of values) { if (val !== null && val !== undefined && !merged[option].includes(val)) { merged[option].push(val); } } } else if (merged[option] && typeof merged[option] === 'object' && typeof value === 'object') { merged[option] = mergeOptions(merged[option], value); } else if (Array.isArray(value)) { merged[option] = [...value]; } else { merged[option] = value; } } } // === Rebulk Engine === /** * JavaScript implementation of Rebulk pattern matching engine * Simplified version focusing on the core pattern matching functionality */ /** * Represents a single match found in the input string */ class Match { constructor(start, end, value = null, name = null, options = {}) { this.start = start; this.end = end; this.value = value !== null ? value : ''; this.name = name; this.tags = options.tags || []; this.private = options.private || false; this.children = options.children || []; this.parent = options.parent || null; this.raw = options.raw || ''; this.initiator = options.initiator || null; this.formatter = options.formatter || null; this.validator = options.validator || null; } /** * Get the span (start, end) of this match */ get span() { return [this.start, this.end]; } /** * Get the length of this match */ get length() { return this.end - this.start; } /** * Apply formatter to the match value */ format() { if (this.formatter && typeof this.formatter === 'function') { try { this.value = this.formatter(this.value); } catch (error) { // If formatting fails, keep original value console.warn('Formatting failed for match:', this, error); } } } /** * Validate the match */ validate() { if (this.validator && typeof this.validator === 'function') { try { return this.validator(this); } catch (error) { console.warn('Validation failed for match:', this, error); return false; } } return true; } /** * Split this match using separators */ split(separators, valueFunction = null) { const parts = []; let currentStart = this.start; for (let i = this.start; i < this.end; i++) { const char = this.raw[i - this.start]; if (separators.includes(char)) { if (currentStart < i) { const part = new Match(currentStart, i); part.raw = this.raw.slice(currentStart - this.start, i - this.start); part.value = valueFunction ? valueFunction(part) : part.raw; parts.push(part); } currentStart = i + 1; } } // Add final part if (currentStart < this.end) { const part = new Match(currentStart, this.end); part.raw = this.raw.slice(currentStart - this.start); part.value = valueFunction ? valueFunction(part) : part.raw; parts.push(part); } return parts; } } /** * Collection of matches with utility methods */ class Matches { constructor(inputString = '') { this.inputString = inputString; this.matches = []; this.markers = new Markers(); } /** * Add a match to the collection */ add(match) { if (match instanceof Match) { this.matches.push(match); } } /** * Get matches by name */ named(name, predicate = null) { const filtered = this.matches.filter(match => match.name === name); return predicate ? filtered.filter(predicate) : filtered; } /** * Get matches with specific tags */ tagged(tag, predicate = null) { const filtered = this.matches.filter(match => match.tags.includes(tag)); return predicate ? filtered.filter(predicate) : filtered; } /** * Get matches in a specific range */ range(start, end, predicate = null, index = null) { let filtered = this.matches.filter(match => match.start >= start && match.end <= end ); if (predicate) { filtered = filtered.filter(predicate); } if (index !== null) { return filtered[index] || null; } return filtered; } /** * Get previous match */ previous(match, predicate = null, index = 0) { let candidates = this.matches.filter(m => m.end <= match.start); if (predicate) { candidates = candidates.filter(predicate); } candidates.sort((a, b) => b.end - a.end); // Sort by end position descending return candidates[index] || null; } /** * Get next match */ next(match, predicate = null, index = 0) { let candidates = this.matches.filter(m => m.start >= match.end); if (predicate) { candidates = candidates.filter(predicate); } candidates.sort((a, b) => a.start - b.start); // Sort by start position ascending return candidates[index] || null; } /** * Find holes (unmatched parts) in the input string */ holes(start, end, options = {}) { const holes = []; const rangeMatches = this.range(start, end).sort((a, b) => a.start - b.start); let currentPos = start; for (const match of rangeMatches) { if (match.start > currentPos) { const hole = new Match(currentPos, match.start); hole.raw = this.inputString.slice(currentPos, match.start); hole.value = hole.raw; holes.push(hole); } currentPos = Math.max(currentPos, match.end); } // Final hole if (currentPos < end) { const hole = new Match(currentPos, end); hole.raw = this.inputString.slice(currentPos, end); hole.value = hole.raw; holes.push(hole); } return holes; } /** * Convert matches to dictionary format */ toDict(advanced = false, singleValue = false, enforceList = false) { const result = {}; const propertyGroups = {}; // Group matches by property name for (const match of this.matches) { if (match.private) continue; if (!propertyGroups[match.name]) { propertyGroups[match.name] = []; } propertyGroups[match.name].push(match.value); } // Process each property for (const [property, values] of Object.entries(propertyGroups)) { // Remove duplicates while preserving order const uniqueValues = [...new Set(values)]; if (singleValue && uniqueValues.length > 0) { result[property] = uniqueValues[0]; } else if (enforceList || uniqueValues.length > 1) { result[property] = uniqueValues; } else if (uniqueValues.length === 1) { result[property] = uniqueValues[0]; } } // Post-process: flatten season_episode objects into separate season and episode properties if (result.season_episode && typeof result.season_episode === 'object') { const seasonEpisode = result.season_episode; if (seasonEpisode.season !== undefined) { result.season = seasonEpisode.season; } if (seasonEpisode.episode !== undefined) { result.episode = seasonEpisode.episode; } // Keep the season_episode object as well for backwards compatibility } return result; } } /** * Markers for structural elements like groups and paths */ class Markers { constructor() { this.markerList = []; } /** * Add a marker */ add(marker) { this.markerList.push(marker); } /** * Get markers by name */ named(name) { return this.markerList.filter(marker => marker.name === name); } /** * Get marker at specific match position */ atMatch(match, predicate = null, index = 0) { let candidates = this.markerList.filter(marker => marker.start <= match.start && marker.end >= match.end ); if (predicate) { candidates = candidates.filter(predicate); } return candidates[index] || null; } /** * Get markers starting at position */ starting(position, predicate = null) { let candidates = this.markerList.filter(marker => marker.start === position); if (predicate) { candidates = candidates.filter(predicate); } return candidates; } } /** * Rule for pattern matching */ class Rule { constructor(pattern, options = {}) { this.pattern = pattern; this.name = options.name || null; this.value = options.value || null; this.tags = options.tags || []; this.formatter = options.formatter || null; this.validator = options.validator || null; this.private = options.private || false; this.children = options.children || false; this.conflictSolver = options.conflictSolver || null; } /** * Apply this rule to input string */ apply(inputString, matches, options = {}) { let regex; if (this.pattern instanceof RegExp) { regex = this.pattern; } else if (typeof this.pattern === 'string') { // Handle case insensitive matching const flags = options.ignoreCase ? 'gi' : 'g'; regex = new RegExp(this.pattern, flags); } else { return []; } // Debug logging (remove in production) const isDebugging = (typeof process !== 'undefined' && process.env && process.env.DEBUG_RULES === 'true') || false; if (isDebugging && this.name === 'container') { console.log(`[DEBUG] Applying ${this.name} rule with pattern ${regex} to "${inputString}"`); } const newMatches = []; let match; let lastIndex = 0; let iterations = 0; const maxIterations = 1000; // Prevent infinite loops while ((match = regex.exec(inputString)) !== null && iterations < maxIterations) { iterations++; // Prevent infinite loop on zero-length matches if (match.index === lastIndex && match[0].length === 0) { regex.lastIndex = lastIndex + 1; continue; } lastIndex = match.index + match[0].length; const matchObj = new Match( match.index, match.index + match[0].length, this.value || match[0], this.name, { tags: [...this.tags], private: this.private, raw: match[0], formatter: this.formatter, validator: this.validator } ); // Debug logging if (isDebugging && this.name === 'container' && match) { console.log(`[DEBUG] Found match: ${JSON.stringify(match)} -> matchObj: ${JSON.stringify({start: matchObj.start, end: matchObj.end, name: matchObj.name, value: matchObj.value})}`); } // Apply formatting matchObj.format(); // Validate const isValid = matchObj.validate(); if (isDebugging && this.name === 'container' && match) { console.log(`[DEBUG] Validation result: ${isValid}`); } if (isValid) { newMatches.push(matchObj); if (isDebugging && this.name === 'container') { console.log(`[DEBUG] Added match to newMatches, total: ${newMatches.length}`); } } // If regex doesn't have global flag, break after first match if (!regex.global) { break; } } if (isDebugging && this.name === 'container') { console.log(`[DEBUG] Returning ${newMatches.length} matches from ${this.name} rule`); } return newMatches; } } /** * Main Rebulk class - coordinates pattern matching */ class Rebulk { constructor(options = {}) { this.rules = []; this.options = { ignoreCase: options.ignoreCase || false, ...options }; } /** * Add rules to this Rebulk instance */ addRules(rules) { if (Array.isArray(rules)) { this.rules.push(...rules); } else if (rules) { this.rules.push(rules); } } /** * Add a string pattern rule */ string(pattern, options = {}) { const rule = new Rule(pattern, options); this.rules.push(rule); return this; } /** * Add a regex pattern rule */ regex(pattern, options = {}) { const rule = new Rule(new RegExp(pattern, this.options.ignoreCase ? 'gi' : 'g'), options); this.rules.push(rule); return this; } /** * Apply all rules to input string and return matches */ matches(inputString, options = {}) { const matches = new Matches(inputString); const mergedOptions = { ...this.options, ...options }; // Add path markers (simplified) this.addPathMarkers(matches, inputString); // Apply all rules for (const rule of this.rules) { const ruleMatches = rule.apply(inputString, matches, mergedOptions); const isDebugging = (typeof process !== 'undefined' && process.env && process.env.DEBUG_RULES === 'true') || false; if (isDebugging && rule.name === 'container' && ruleMatches.length > 0) { console.log(`[DEBUG] Rule ${rule.name} returned ${ruleMatches.length} matches`); } for (const match of ruleMatches) { matches.add(match); if (isDebugging && rule.name === 'container') { console.log(`[DEBUG] Added match to collection, total matches: ${matches.matches.length}`); } } } // Post-process matches (remove conflicts, apply final formatting, etc.) this.postProcessMatches(matches); return matches; } /** * Add basic path markers for file structure */ addPathMarkers(matches, inputString) { // Split by common path separators and file extensions const pathSeparators = /[\/\\]/g; const parts = inputString.split(pathSeparators); let currentPos = 0; for (let i = 0; i < parts.length; i++) { const part = parts[i]; if (part.length > 0) { const marker = new Match(currentPos, currentPos + part.length); marker.name = 'path'; marker.private = true; matches.markers.add(marker); } currentPos += part.length + 1; // +1 for separator } } /** * Post-process matches to resolve conflicts and clean up */ postProcessMatches(matches) { const isDebugging = (typeof process !== 'undefined' && process.env && process.env.DEBUG_RULES === 'true') || false; if (isDebugging) { console.log(`[DEBUG] Post-processing ${matches.matches.length} matches`); matches.matches.forEach((match, i) => { console.log(`[DEBUG] ${i}: ${match.start}-${match.end} "${match.name}": "${match.value}" (private: ${match.private})`); }); } // Separate private and non-private matches const privateMatches = matches.matches.filter(m => m.private); const publicMatches = matches.matches.filter(m => !m.private); if (isDebugging) { console.log(`[DEBUG] Separated into ${privateMatches.length} private and ${publicMatches.length} public matches`); } // Only resolve conflicts among non-private matches // Sort matches by start position publicMatches.sort((a, b) => a.start - b.start || (b.end - b.start) - (a.end - a.start)); // Smart conflict resolution - prioritize specific matches over generic ones const getMatchPriority = (match) => { // Higher number = higher priority const priorities = { 'container': 100, 'video_codec': 90, 'audio_codec': 90, 'source': 80, 'screen_size': 80, 'year': 70, 'episode': 60, 'season': 60, 'title': 10, // Title should have low priority as it's often very broad 'cleanup': 5, 'path': 1 }; return priorities[match.name] || 50; // Default priority for unknown types }; const filtered = []; for (const match of publicMatches) { const overlapping = filtered.filter(existing => !(match.end <= existing.start || match.start >= existing.end) ); if (overlapping.length === 0) { filtered.push(match); if (isDebugging) { console.log(`[DEBUG] Keeping non-overlapping match: ${match.name} (${match.start}-${match.end})`); } } else { if (isDebugging) { console.log(`[DEBUG] Found ${overlapping.length} overlapping matches for ${match.name} (${match.start}-${match.end})`); } const currentPriority = getMatchPriority(match); let shouldReplace = false; let toReplace = []; for (const existing of overlapping) { const existingPriority = getMatchPriority(existing); if (currentPriority > existingPriority) { shouldReplace = true; toReplace.push(existing); } else if (currentPriority === existingPriority && match.length > existing.length) { // Same priority, prefer longer match shouldReplace = true; toReplace.push(existing); } } if (shouldReplace) { // Remove all overlapping matches with lower priority for (const existing of toReplace) { const index = filtered.indexOf(existing); if (index !== -1) { filtered.splice(index, 1); } } filtered.push(match); if (isDebugging) { console.log(`[DEBUG] Replaced ${toReplace.length} lower priority matches with ${match.name} (priority: ${currentPriority})`); } } else { if (isDebugging) { console.log(`[DEBUG] Discarding ${match.name} (priority: ${currentPriority}) in favor of higher priority matches`); } } } } // Combine filtered public matches with all private matches const finalMatches = [...filtered, ...privateMatches]; if (isDebugging) { console.log(`[DEBUG] After post-processing: ${finalMatches.length} matches (${filtered.length} public + ${privateMatches.length} private)`); finalMatches.forEach((match, i) => { console.log(`[DEBUG] ${i}: ${match.start}-${match.end} "${match.name}": "${match.value}" (private: ${match.private})`); }); } matches.matches = finalMatches; } /** * Introspect the rebulk to get available properties */ introspect(options = {}) { const properties = {}; for (const rule of this.rules) { if (rule.name && !rule.private) { if (!properties[rule.name]) { properties[rule.name] = new Set(); } if (rule.value) { properties[rule.name].add(rule.value); } } } return { properties }; } } // === Rule Modules === /** * Episode and season detection rules */ function episodeRules(config) { const rules = []; const seasonMarkers = config.season_markers || ['s']; const episodeMarkers = config.episode_markers || ['e', 'ep', 'x']; const rangeSeparators = config.range_separators || ['-', '~', 'to']; const discreteSeparators = config.discrete_separators || ['+', '&', 'and']; // SxxExx patterns (S01E02, S01E02, 1x02, etc.) rules.push(new Rule( /([Ss])(\d{1,2})[\s\-\.]*([Ee])(\d{1,3})/g, { name: 'season_episode', formatter: (value) => { const match = value.match(/([Ss])(\d{1,2})[\s\-\.]*([Ee])(\d{1,3})/); if (match) { return { season: parseInt(match[2], 10), episode: parseInt(match[4], 10) }; } return value; }, tags: ['SxxExx'] } )); // NxNN patterns (1x02, 2x10, etc.) rules.push(new Rule( /(\d{1,2})x(\d{1,3})/gi, { name: 'season_episode', formatter: (value) => { const match = value.match(/(\d{1,2})x(\d{1,3})/i); if (match) { return { season: parseInt(match[1], 10), episode: parseInt(match[2], 10) }; } return value; }, tags: ['NxNN'] } )); // Season only patterns (S02, etc.) rules.push(new Rule( /[Ss](\d{1,2})/g, { name: 'season', formatter: (value) => { const match = value.match(/(\d+)/); return match ? parseInt(match[1], 10) : value; }, tags: ['season-only'] } )); // Episode only patterns (E02, etc.) rules.push(new Rule( /[Ee](\d{1,3})/g, { name: 'episode', formatter: (value) => { const match = value.match(/(\d+)/); return match ? parseInt(match[1], 10) : value; }, tags: ['episode-only'] } )); // Episode words (Episode 5, Episodio 3, etc.) rules.push(new Rule( /\b(?:episode|episodes)\s*(\d{1,3})\b/gi, { name: 'episode', formatter: (value) => { const match = value.match(/(\d+)/); return match ? parseInt(match[1], 10) : value; }, tags: ['episode-word'] } )); // Season words (Season 1, Temporada 2, etc.) rules.push(new Rule( /\b(?:season|seasons)\s*(\d{1,2})\b/gi, { name: 'season', formatter: (value) => { const match = value.match(/(\d+)/); return match ? parseInt(match[1], 10) : value; }, tags: ['season-word'] } )); // Episode details const episodeDetails = ['Special', 'Pilot', 'Unaired', 'Final']; for (const detail of episodeDetails) { rules.push(new Rule( detail, { name: 'episode_details', value: detail, tags: ['episode-detail'] } )); } return rules; } /** * Title detection rules */ function titleRules(config) { const rules = []; // Handle unicode and bracketed titles: [unicode].Title.year.etc rules.push(new Rule( /^(?:\[[^\]]*\]\.?)([a-zA-Z\u00C0-\u017F\u4e00-\u9fff][a-zA-Z0-9\u00C0-\u017F\u4e00-\u9fff\s\-\.\'\:]*?)(?=[\.\s\-](19|20)\d{2}|[\.\s\-]\d{3,4}p|[\.\s\-](?:bluray|hdtv|web|dvd|cam|x264|x265|h264|h265|xvid|divx|dts|aac|mkv|avi|mp4|french|english|german|spanish|italian))/gi, { name: 'title', formatter: (value) => { return value .replace(/[\.\-_]/g, ' ') .replace(/\s+/g, ' ') .trim(); }, validator: (match) => { const value = match.value.trim(); return value.length >= 2 && !/^\d+$/.test(value); }, tags: ['title-unicode'] } )); // Standard title pattern for regular filenames rules.push(new Rule( /^([a-zA-Z\u00C0-\u017F\u4e00-\u9fff][a-zA-Z0-9\u00C0-\u017F\u4e00-\u9fff\s\-\.\'\:]*?)(?=[\.\s\-](19|20)\d{2}|[\.\s\-]\d{3,4}p|[\.\s\-](?:bluray|hdtv|web|dvd|cam|x264|x265|h264|h265|xvid|divx|dts|aac|mkv|avi|mp4|french|english|german|spanish|italian))/gi, { name: 'title', formatter: (value) => { return value .replace(/[\.\-_]/g, ' ') .replace(/\s+/g, ' ') .trim(); }, validator: (match) => { const value = match.value.trim(); return value.length >= 3 && !/^\d+$/.test(value); }, tags: ['title'] } )); // Fallback: extract word sequences from anywhere in the filename rules.push(new Rule( /([a-zA-Z\u00C0-\u017F\u4e00-\u9fff][a-zA-Z0-9\u00C0-\u017F\u4e00-\u9fff\s]+[a-zA-Z0-9\u00C0-\u017F\u4e00-\u9fff])/g, { name: 'title', formatter: (value) => { return value .replace(/[\.\-_]/g, ' ') .replace(/\s+/g, ' ') .trim(); }, validator: (match) => { const value = match.value.trim(); // Avoid common technical terms const skipTerms = ['bluray', 'hdtv', 'web', 'dvd', 'x264', 'x265', 'h264', 'h265', 'xvid', 'divx', 'aac', 'dts', 'french', 'english']; return value.length >= 4 && !/^\d+$/.test(value) && !skipTerms.includes(value.toLowerCase()); }, tags: ['title-fallback'], private: false } )); return rules; } /** * Video codec detection rules */ function videoCodecRules(config) { const rules = []; const videoCodecs = { 'H.264': ['h264', 'h.264', 'x264', 'avc'], 'H.265': ['h265', 'h.265', 'x265', 'hevc'], 'XviD': ['xvid'], 'DivX': ['divx'], 'VP9': ['vp9'], 'AV1': ['av1'], 'MPEG-2': ['mpeg2', 'mpeg-2'], 'VC-1': ['vc1', 'vc-1'], 'WMV': ['wmv'] }; for (const [codecName, patterns] of Object.entries(videoCodecs)) { for (const pattern of patterns) { rules.push(new Rule( new RegExp(`\\b${pattern}\\b`, 'i'), { name: 'video_codec', value: codecName, tags: ['video-codec'] } )); } } return rules; } /** * Screen size/resolution detection rules */ function screenSizeRules(config) { const rules = []; // Common resolution patterns const resolutions = { '240p': ['240p'], '360p': ['360p'], '480p': ['480p', 'sd'], '720p': ['720p', 'hd'], '1080p': ['1080p', '1080i', 'fhd', 'fullhd', 'full hd'], '1440p': ['1440p', '2k'], '2160p': ['2160p', '4k', 'uhd', 'ultra hd'], '4320p': ['4320p', '8k'] }; for (const [size, patterns] of Object.entries(resolutions)) { for (const pattern of patterns) { rules.push(new Rule( new RegExp(`\\b${pattern}\\b`, 'i'), { name: 'screen_size', value: size, tags: ['resolution'] } )); } } // Width x Height patterns (1920x1080, 1280x720, etc.) rules.push(new Rule( /(\d{3,4})x(\d{3,4})/gi, { name: 'screen_size', formatter: (value) => { const match = value.match(/(\d{3,4})x(\d{3,4})/i); if (match) { const width = parseInt(match[1], 10); const height = parseInt(match[2], 10); // Map common resolutions if (width === 1920 && height === 1080) return '1080p'; if (width === 1280 && height === 720) return '720p'; if (width === 3840 && height === 2160) return '2160p'; if (width === 2560 && height === 1440) return '1440p'; return `${width}x${height}`; } return value; }, tags: ['resolution', 'dimensions'] } )); return rules; } /** * Container/file extension detection rules */ function containerRules(config) { const rules = []; const containers = { videos: config.videos || [ '3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'iso', 'm4v', 'mk2', 'mk3d', 'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm', 'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'm2ts', 'vob', 'wav', 'webm', 'wma', 'wmv' ], subtitles: config.subtitles || ['srt', 'idx', 'sub', 'ssa', 'ass'], info: config.info || ['nfo'], torrent: config.torrent || ['torrent'], nzb: config.nzb || ['nzb'] }; for (const [type, extensions] of Object.entries(containers)) { for (const ext of extensions) { rules.push(new Rule( new RegExp(`\\.${ext}$`, 'i'), { name: 'container', value: ext.toLowerCase(), tags: ['container', type] } )); } } return rules; } /** * Source detection rules (BluRay, HDTV, WEB, etc.) */ function sourceRules(config) { const rules = []; const sources = { 'BluRay': ['bluray'], 'Blu-ray': ['blu-ray', 'bdrip', 'brrip'], 'HD-DVD': ['hddvd', 'hd-dvd'], 'HDTV': ['hdtv'], 'WEB': ['web', 'webrip', 'web-dl', 'webdl'], 'DVD': ['dvd', 'dvdrip'], 'CAM': ['cam', 'camrip'], 'Telesync': ['ts', 'telesync'], 'Telecine': ['tc', 'telecine'], 'Screener': ['scr', 'screener'], 'VHS': ['vhs'] }; for (const [sourceName, patterns] of Object.entries(sources)) { for (const pattern of patterns) { rules.push(new Rule( new RegExp(`\\b${pattern}\\b`, 'i'), { name: 'source', value: sourceName, tags: ['source'] } )); } } return rules; } /** * Audio codec detection rules */ function audioCodecRules(config) { const rules = []; const audioCodecs = { 'DTS': ['dts'], 'DTS-HD': ['dts-hd', 'dtshd'], 'DTS:X': ['dts:x', 'dts-x', 'dtsx'], 'Dolby Digital': ['dd', 'ac3', 'dolby'], 'Dolby Digital Plus': ['dd+', 'ddp', 'e-ac3'], 'Dolby Atmos': ['atmos'], 'Dolby TrueHD': ['truehd', 'true-hd'], 'AAC': ['aac'], 'MP3': ['mp3'], 'FLAC': ['flac'], 'PCM': ['pcm'], 'LPCM': ['lpcm'] }; for (const [codecName, patterns] of Object.entries(audioCodecs)) { for (const pattern of patterns) { rules.push(new Rule( new RegExp(`\\b${pattern}\\b`, 'i'), { name: 'audio_codec', value: codecName, tags: ['audio-codec'] } )); } } return rules; } /** * Stub implementations for remaining property modules */ // Stub implementations that return empty rules arrays function websiteRules(config) { return []; } function dateRules(config) { const rules = []; // Year detection - matches 4-digit years between 1900-2099 rules.push(new Rule( /\b(19[0-9]{2}|20[0-9]{2})\b/g, { name: 'year', formatter: (value) => parseInt(value, 10), validator: (match) => { const year = parseInt(match.value, 10); return year >= 1900 && year <= 2099; }, tags: ['date'] } )); return rules; } function episodeTitleRules(config) { return []; } function languageRules(config, commonWords) { return []; } function countryRules(config, commonWords) { return []; } function releaseGroupRules(config) { const rules = []; // Release groups before file extensions: match just the group name between dash and dot rules.push(new Rule( /(?<=-)[A-Z0-9]+(?=\.(?:mkv|avi|mp4|mov|wmv|flv|webm|m4v|3gp|ts|m2ts|vob|iso|img|bin|mdf|nrg|cue|rar|zip|7z|tar|gz|bz2|xz)$)/gi, { name: 'release_group', validator: (match) => { const group = match.value; const excludeWords = ['REPACK', 'PROPER', 'REAL', 'FINAL', 'COMPLETE', 'UNCUT', 'EXTENDED', 'DIRECTORS', 'CUT']; return group.length >= 2 && group.length <= 20 && !excludeWords.includes(group.toUpperCase()); }, tags: ['release-group'] } )); // Bracketed release groups [GROUP] or (GROUP) - but exclude years rules.push(new Rule( /[\[\(]([A-Z0-9\-_.]+)[\]\)]/gi, { name: 'release_group', formatter: (value) => { const match = value.match(/[\[\(]([A-Z0-9\-_.]+)[\]\)]/i); return match ? match[1] : value; }, validator: (match) => { const group = match.value; // Exclude years (1900-2099) and short numeric sequences if (/^(19|20)\d{2}$/.test(group)) return false; if (/^\d{1,4}$/.test(group)) return false; return group.length >= 2 && group.length <= 20; }, tags: ['release-group-bracket'] } )); return rules; } function streamingServiceRules(config) { return []; } function otherRules(config) { return []; } function sizeRules(config) { return []; } function bitRateRules(config) { return []; } function editionRules(config) { return []; } function cdRules(config) { return []; } function bonusRules(config) { return []; } function filmRules(config) { return []; } function partRules(config) { return []; } function crcRules(config) { return []; } function mimetypeRules(config) { return []; } function typeRules(config) { return []; } /** * Path structure markers */ function pathRules(config) { const rules = []; // Basic path structure detection rules.push(new Rule( /([^\/\\]+)/g, { name: 'path', private: true, tags: ['path-segment'] } )); return rules; } /** * Group markers for content in brackets, parentheses, etc. */ function groupRules(config) { const rules = []; const starting = config.starting || '([{'; const ending = config.ending || ')]}'; // Create pairs of opening/closing characters const pairs = []; for (let i = 0; i < Math.min(starting.length, ending.length); i++) { pairs.push([starting[i], ending[i]]); } for (const [open, close] of pairs) { rules.push(new Rule( new RegExp(`\\${open}([^\\${open}\\${close}]+)\\${close}`, 'g'), { name: 'group', private: true, tags: ['group-marker'] } )); } return rules; } /** * Post-processing rules */ function processorsRules(config) { const rules = []; // Basic cleanup and validation rules rules.push(new Rule( /.*/, { name: 'cleanup', private: true, processor: true, apply: (matches) => { // Remove duplicate matches const seen = new Set(); matches.matches = matches.matches.filter(match => { const key = `${match.name}-${match.start}-${match.end}`; if (seen.has(key)) { return false; } seen.add(key); return true; }); return matches; } } )); return rules; } // === Rules Builder === /** * Main rules builder - JavaScript port of rebulk pattern matching */ // Import rule modules /** * Main rebulk builder function * @param {Object} config - Configuration object * @returns {Rebulk} Configured Rebulk instance */ function RebulkBuilder(config) { function getConfig(name)