@ctrl/video-filename-parser
Version:
A radarr style release name parser
138 lines (137 loc) • 6.3 kB
JavaScript
import { parseTitleAndYear } from './title/index.js';
export var Language;
(function (Language) {
Language["English"] = "English";
Language["French"] = "French";
Language["Spanish"] = "Spanish";
Language["German"] = "German";
Language["Italian"] = "Italian";
Language["Danish"] = "Danish";
Language["Dutch"] = "Dutch";
Language["Japanese"] = "Japanese";
Language["Cantonese"] = "Cantonese";
Language["Mandarin"] = "Mandarin";
Language["Russian"] = "Russian";
Language["Polish"] = "Polish";
Language["Vietnamese"] = "Vietnamese";
Language["Nordic"] = "Nordic";
Language["Swedish"] = "Swedish";
Language["Norwegian"] = "Norwegian";
Language["Finnish"] = "Finnish";
Language["Turkish"] = "Turkish";
Language["Portuguese"] = "Portuguese";
Language["Flemish"] = "Flemish";
Language["Greek"] = "Greek";
Language["Korean"] = "Korean";
Language["Hungarian"] = "Hungarian";
Language["Persian"] = "Persian";
Language["Bengali"] = "Bengali";
Language["Bulgarian"] = "Bulgarian";
Language["Brazilian"] = "Brazilian";
Language["Hebrew"] = "Hebrew";
Language["Czech"] = "Czech";
Language["Ukrainian"] = "Ukrainian";
Language["Catalan"] = "Catalan";
Language["Chinese"] = "Chinese";
Language["Thai"] = "Thai";
Language["Hindi"] = "Hindi";
Language["Tamil"] = "Tamil";
Language["Arabic"] = "Arabic";
Language["Estonian"] = "Estonian";
Language["Icelandic"] = "Icelandic";
Language["Latvian"] = "Latvian";
Language["Lithuanian"] = "Lithuanian";
Language["Romanian"] = "Romanian";
Language["Slovak"] = "Slovak";
Language["Serbian"] = "Serbian";
})(Language || (Language = {}));
const languageAliasRules = [
{ language: Language.English, aliases: ['english', 'eng', 'en'] },
{ language: Language.Spanish, aliases: ['spanish'] },
{ language: Language.Danish, aliases: ['dk', 'dan', 'danish'] },
{ language: Language.Japanese, aliases: ['japanese'] },
{ language: Language.Cantonese, aliases: ['cantonese'] },
{ language: Language.Mandarin, aliases: ['mandarin'] },
{ language: Language.Korean, aliases: ['korean'] },
{ language: Language.Vietnamese, aliases: ['vietnamese'] },
{ language: Language.Swedish, aliases: ['se', 'swe', 'swedish'] },
{ language: Language.Finnish, aliases: ['fi', 'finnish'] },
{ language: Language.Turkish, aliases: ['turkish'] },
{ language: Language.Portuguese, aliases: ['portuguese'] },
{ language: Language.Hebrew, aliases: ['hebrew', 'hebdub'] },
{ language: Language.Czech, aliases: ['cz', 'czech'] },
{ language: Language.Ukrainian, aliases: ['ukr', 'ukrainian'] },
{ language: Language.Catalan, aliases: ['catalan'] },
{ language: Language.Estonian, aliases: ['estonian'] },
{ language: Language.Icelandic, aliases: ['ice', 'icelandic'] },
{ language: Language.Chinese, aliases: ['chi', 'chinese'] },
{ language: Language.Thai, aliases: ['thai'] },
{ language: Language.Italian, aliases: ['ita', 'italian'] },
{ language: Language.German, aliases: ['german', 'videomann'] },
{ language: Language.Flemish, aliases: ['flemish'] },
{ language: Language.Greek, aliases: ['greek'] },
{
language: Language.French,
aliases: ['fr', 'french', 'vostfr', 'vo', 'vff', 'vfq', 'vf2', 'truefrench', 'subfrench'],
},
{ language: Language.Russian, aliases: ['rus', 'russian'] },
{ language: Language.Norwegian, aliases: ['no', 'norwegian'] },
{ language: Language.Hungarian, aliases: ['hun', 'hundub', 'hungarian'] },
{ language: Language.Polish, aliases: ['pl', 'pldub', 'polish'] },
{ language: Language.Dutch, aliases: ['nl', 'dutch'] },
{ language: Language.Hindi, aliases: ['hin', 'hindi'] },
{ language: Language.Tamil, aliases: ['tam', 'tamil'] },
{ language: Language.Arabic, aliases: ['arabic'] },
{ language: Language.Latvian, aliases: ['latvian'] },
{ language: Language.Lithuanian, aliases: ['lithuanian'] },
{ language: Language.Romanian, aliases: ['ro', 'romanian', 'rodubbed'] },
{ language: Language.Slovak, aliases: ['sk', 'slovak'] },
{ language: Language.Brazilian, aliases: ['brazilian'] },
{ language: Language.Persian, aliases: ['persian'] },
{ language: Language.Bengali, aliases: ['bengali'] },
{ language: Language.Bulgarian, aliases: ['bulgarian'] },
{ language: Language.Serbian, aliases: ['serbian'] },
{ language: Language.Nordic, aliases: ['nordic', 'nordicsubs'] },
];
const tokenExp = /[a-z0-9]+/gi;
const multiTokens = new Set(['multi', 'dual', 'dl']);
export function parseLanguage(title, parsedTitle) {
parsedTitle ??= parseTitleAndYear(title).title;
const titleTokens = removeParsedTitleTokens(tokenize(title), tokenize(parsedTitle));
const titleTokenSet = new Set(titleTokens);
const languages = languageAliasRules
.filter(({ aliases }) => aliases.some(alias => titleTokenSet.has(alias)))
.map(({ language }) => language);
if (hasMultiLanguageToken(titleTokens)) {
languages.push(Language.English);
}
if (languages.length === 0) {
languages.push(Language.English);
}
return [...new Set(languages)];
}
function tokenize(title) {
return Array.from(title.matchAll(tokenExp), match => match[0].toLowerCase());
}
function removeParsedTitleTokens(titleTokens, parsedTitleTokens) {
if (parsedTitleTokens.length === 0 || parsedTitleTokens.length > titleTokens.length) {
return titleTokens;
}
const startIndex = findTokenSequence(titleTokens, parsedTitleTokens);
if (startIndex === -1) {
return titleTokens;
}
return [
...titleTokens.slice(0, startIndex),
...titleTokens.slice(startIndex + parsedTitleTokens.length),
];
}
function findTokenSequence(tokens, sequence) {
return tokens.findIndex((_, index) => sequence.every((sequenceToken, sequenceIndex) => tokens[index + sequenceIndex] === sequenceToken));
}
function hasMultiLanguageToken(tokens) {
return tokens.some((token, index) => multiTokens.has(token) && !(token === 'dl' && tokens[index - 1] === 'web'));
}
export function isMulti(title) {
return hasMultiLanguageToken(tokenize(title)) || undefined;
}