UNPKG

linguist-js

Version:

Analyse the programming languages used in a folder or from raw content, using the same rules that GitHub Linguist does.

68 lines (67 loc) 2.61 kB
import pcre from '../../program/utils/pcre.js'; import byFilename from './byFilename.js'; import byModeline from './byModeline.js'; import byShebang from './byShebang.js'; function collectPatterns(heuristic, heuristicsData) { const patterns = []; const add = (value) => { if (!value) return; if (Array.isArray(value)) { patterns.push(...value); } else { patterns.push(value); } }; add(heuristic.pattern); add(heuristicsData.named_patterns?.[heuristic.named_pattern]); if (heuristic.and) { for (const entry of heuristic.and) { add(entry.pattern); add(heuristicsData.named_patterns?.[entry.named_pattern]); } } return patterns; } export default function byHeuristics(file, candidateLanguages, heuristicsData, langData, opts) { // Parse heuristics if applicable if (!opts.checkHeuristics || file.content === undefined) return []; // Skip if file has explicit association if (file.attributes?.language) return []; if (byFilename(file, langData).length) return []; if (byShebang(file, langData, opts).length) return []; if (byModeline(file, langData, opts).length) return []; if (candidateLanguages.length <= 1) return []; const extension = file.extension ?? ''; for (const heuristics of heuristicsData.disambiguations ?? []) { // Make sure the extension matches the current file if (!heuristics.extensions.includes(extension)) continue; // Load heuristic rules for (const heuristic of heuristics.rules) { // Make sure the language is not an array const language = Array.isArray(heuristic.language) ? heuristic.language[0] : heuristic.language; const languageGroup = langData[language]?.group; const matchesLang = candidateLanguages.includes(language); const matchesParent = languageGroup ? candidateLanguages.includes(languageGroup) : false; // Make sure the results includes this language if (!matchesLang && !matchesParent) continue; // Normalise heuristic data const patterns = collectPatterns(heuristic, heuristicsData); // Check file contents and apply heuristic patterns if (!patterns.length || patterns.some((pattern) => pcre(pattern).test(file.content))) { // Apply heuristics return [language]; } } } return []; }