UNPKG

@ctrl/video-filename-parser

Version:
138 lines (137 loc) 6.3 kB
import { parseTitleAndYear } from './title/index.js'; export var Language; (function (Language) { Language["English"] = "English"; Language["French"] = "French"; Language["Spanish"] = "Spanish"; Language["German"] = "German"; Language["Italian"] = "Italian"; Language["Danish"] = "Danish"; Language["Dutch"] = "Dutch"; Language["Japanese"] = "Japanese"; Language["Cantonese"] = "Cantonese"; Language["Mandarin"] = "Mandarin"; Language["Russian"] = "Russian"; Language["Polish"] = "Polish"; Language["Vietnamese"] = "Vietnamese"; Language["Nordic"] = "Nordic"; Language["Swedish"] = "Swedish"; Language["Norwegian"] = "Norwegian"; Language["Finnish"] = "Finnish"; Language["Turkish"] = "Turkish"; Language["Portuguese"] = "Portuguese"; Language["Flemish"] = "Flemish"; Language["Greek"] = "Greek"; Language["Korean"] = "Korean"; Language["Hungarian"] = "Hungarian"; Language["Persian"] = "Persian"; Language["Bengali"] = "Bengali"; Language["Bulgarian"] = "Bulgarian"; Language["Brazilian"] = "Brazilian"; Language["Hebrew"] = "Hebrew"; Language["Czech"] = "Czech"; Language["Ukrainian"] = "Ukrainian"; Language["Catalan"] = "Catalan"; Language["Chinese"] = "Chinese"; Language["Thai"] = "Thai"; Language["Hindi"] = "Hindi"; Language["Tamil"] = "Tamil"; Language["Arabic"] = "Arabic"; Language["Estonian"] = "Estonian"; Language["Icelandic"] = "Icelandic"; Language["Latvian"] = "Latvian"; Language["Lithuanian"] = "Lithuanian"; Language["Romanian"] = "Romanian"; Language["Slovak"] = "Slovak"; Language["Serbian"] = "Serbian"; })(Language || (Language = {})); const languageAliasRules = [ { language: Language.English, aliases: ['english', 'eng', 'en'] }, { language: Language.Spanish, aliases: ['spanish'] }, { language: Language.Danish, aliases: ['dk', 'dan', 'danish'] }, { language: Language.Japanese, aliases: ['japanese'] }, { language: Language.Cantonese, aliases: ['cantonese'] }, { language: Language.Mandarin, aliases: ['mandarin'] }, { language: Language.Korean, aliases: ['korean'] }, { language: Language.Vietnamese, aliases: ['vietnamese'] }, { language: Language.Swedish, aliases: ['se', 'swe', 'swedish'] }, { language: Language.Finnish, aliases: ['fi', 'finnish'] }, { language: Language.Turkish, aliases: ['turkish'] }, { language: Language.Portuguese, aliases: ['portuguese'] }, { language: Language.Hebrew, aliases: ['hebrew', 'hebdub'] }, { language: Language.Czech, aliases: ['cz', 'czech'] }, { language: Language.Ukrainian, aliases: ['ukr', 'ukrainian'] }, { language: Language.Catalan, aliases: ['catalan'] }, { language: Language.Estonian, aliases: ['estonian'] }, { language: Language.Icelandic, aliases: ['ice', 'icelandic'] }, { language: Language.Chinese, aliases: ['chi', 'chinese'] }, { language: Language.Thai, aliases: ['thai'] }, { language: Language.Italian, aliases: ['ita', 'italian'] }, { language: Language.German, aliases: ['german', 'videomann'] }, { language: Language.Flemish, aliases: ['flemish'] }, { language: Language.Greek, aliases: ['greek'] }, { language: Language.French, aliases: ['fr', 'french', 'vostfr', 'vo', 'vff', 'vfq', 'vf2', 'truefrench', 'subfrench'], }, { language: Language.Russian, aliases: ['rus', 'russian'] }, { language: Language.Norwegian, aliases: ['no', 'norwegian'] }, { language: Language.Hungarian, aliases: ['hun', 'hundub', 'hungarian'] }, { language: Language.Polish, aliases: ['pl', 'pldub', 'polish'] }, { language: Language.Dutch, aliases: ['nl', 'dutch'] }, { language: Language.Hindi, aliases: ['hin', 'hindi'] }, { language: Language.Tamil, aliases: ['tam', 'tamil'] }, { language: Language.Arabic, aliases: ['arabic'] }, { language: Language.Latvian, aliases: ['latvian'] }, { language: Language.Lithuanian, aliases: ['lithuanian'] }, { language: Language.Romanian, aliases: ['ro', 'romanian', 'rodubbed'] }, { language: Language.Slovak, aliases: ['sk', 'slovak'] }, { language: Language.Brazilian, aliases: ['brazilian'] }, { language: Language.Persian, aliases: ['persian'] }, { language: Language.Bengali, aliases: ['bengali'] }, { language: Language.Bulgarian, aliases: ['bulgarian'] }, { language: Language.Serbian, aliases: ['serbian'] }, { language: Language.Nordic, aliases: ['nordic', 'nordicsubs'] }, ]; const tokenExp = /[a-z0-9]+/gi; const multiTokens = new Set(['multi', 'dual', 'dl']); export function parseLanguage(title, parsedTitle) { parsedTitle ??= parseTitleAndYear(title).title; const titleTokens = removeParsedTitleTokens(tokenize(title), tokenize(parsedTitle)); const titleTokenSet = new Set(titleTokens); const languages = languageAliasRules .filter(({ aliases }) => aliases.some(alias => titleTokenSet.has(alias))) .map(({ language }) => language); if (hasMultiLanguageToken(titleTokens)) { languages.push(Language.English); } if (languages.length === 0) { languages.push(Language.English); } return [...new Set(languages)]; } function tokenize(title) { return Array.from(title.matchAll(tokenExp), match => match[0].toLowerCase()); } function removeParsedTitleTokens(titleTokens, parsedTitleTokens) { if (parsedTitleTokens.length === 0 || parsedTitleTokens.length > titleTokens.length) { return titleTokens; } const startIndex = findTokenSequence(titleTokens, parsedTitleTokens); if (startIndex === -1) { return titleTokens; } return [ ...titleTokens.slice(0, startIndex), ...titleTokens.slice(startIndex + parsedTitleTokens.length), ]; } function findTokenSequence(tokens, sequence) { return tokens.findIndex((_, index) => sequence.every((sequenceToken, sequenceIndex) => tokens[index + sequenceIndex] === sequenceToken)); } function hasMultiLanguageToken(tokens) { return tokens.some((token, index) => multiTokens.has(token) && !(token === 'dl' && tokens[index - 1] === 'web')); } export function isMulti(title) { return hasMultiLanguageToken(tokenize(title)) || undefined; }