UNPKG

linguist-js

Version:

Analyse the programming languages used in a folder or from raw content, using the same rules that GitHub Linguist does.

112 lines (111 loc) 5.7 kB
import commonPrefix from 'common-path-prefix'; import ignore from 'ignore'; import FS from 'node:fs'; import Path from 'node:path'; import Attributes from './classes/attributes.js'; import { normPath } from './fs/normalisedPath.js'; import readFileChunk from './fs/readFile.js'; import walkTree from './fs/walkTree.js'; import parseGitattributes from './parsing/parseGitattributes.js'; const binaryData = JSON.parse(FS.readFileSync(new URL('../../node_modules/binary-extensions/binary-extensions.json', import.meta.url), 'utf-8')); export default async function processFiles(input, opts, useRawContent, vendorPaths) { // Set a common root path so that vendor paths do not incorrectly match parent folders const resolvedInput = input.map((path) => normPath(Path.resolve(path))); const commonRoot = (input.length > 1 ? commonPrefix(resolvedInput) : resolvedInput[0]).replace(/\/?$/, ''); const relPath = (file) => (useRawContent ? file : normPath(Path.relative(commonRoot, file))); const unRelPath = (file) => (useRawContent ? file : normPath(Path.resolve(commonRoot, file))); // Other helper functions const filterOutIgnored = (files, ignored) => ignored.filter(files.map((file) => relPath(file))).map((file) => unRelPath(file)); //*PREPARE FILES AND DATA*// // Prepare list of ignored files const ignored = ignore(); ignored.add('.git/'); ignored.add(opts.ignoredFiles ?? []); const regexIgnores = opts.keepVendored ? [] : vendorPaths.map((path) => RegExp(path, 'i')); // Load file paths and folders let files; if (useRawContent) { // Uses raw file content files = input; } else { // Uses directory on disc const data = walkTree({ init: true, commonRoot, folderRoots: resolvedInput, folders: resolvedInput, ignored }); files = data.files; } // Fetch and normalise gitattributes data of all subfolders and save to metadata const manualAttributes = new Attributes(); if (!useRawContent && opts.checkAttributes) { const nestedAttrFiles = files.filter((file) => file.endsWith('.gitattributes')); for (const attrFile of nestedAttrFiles) { const relAttrFile = relPath(attrFile); const relAttrFolder = Path.dirname(relAttrFile); const contents = await readFileChunk(attrFile); const parsed = parseGitattributes(contents, relAttrFolder); for (const { glob, attrs } of parsed) { manualAttributes.add(glob, attrs); } } } // Remove files that are linguist-ignored via regex by default unless explicitly unignored in gitattributes const filesToIgnore = []; for (const file of files) { const relFile = relPath(file); const isRegexIgnored = regexIgnores.some((pattern) => pattern.test(relFile)); if (!isRegexIgnored) { // Checking overrides is moot if file is not even marked as ignored by default continue; } const fileAttrs = manualAttributes.findAttrsForPath(relPath(file)); if (fileAttrs?.generated === false || fileAttrs?.vendored === false) { // File is explicitly marked as *not* to be ignored // do nothing } else { filesToIgnore.push(file); } } files = files.filter((file) => !filesToIgnore.includes(file)); // Apply vendor file path matches and filter out vendored files if (!opts.keepVendored) { // Get data of files that have been manually marked with metadata const vendorTrueGlobs = [ ...manualAttributes.getFlaggedGlobs('vendored', true), ...manualAttributes.getFlaggedGlobs('generated', true), ...manualAttributes.getFlaggedGlobs('documentation', true), ]; const vendorFalseGlobs = [ ...manualAttributes.getFlaggedGlobs('vendored', false), ...manualAttributes.getFlaggedGlobs('generated', false), ...manualAttributes.getFlaggedGlobs('documentation', false), ]; // Set up glob ignore object to use for expanding globs to match files const vendorTrueIgnore = ignore().add(vendorTrueGlobs); const vendorFalseIgnore = ignore().add(vendorFalseGlobs); // Remove all files marked as vendored by default const excludedFiles = files.filter((file) => vendorPaths.some((pathPtn) => RegExp(pathPtn, 'i').test(relPath(file)))); files = files.filter((file) => !excludedFiles.includes(file)); // Re-add removed files that are overridden manually in gitattributes const overriddenExcludedFiles = excludedFiles.filter((file) => vendorFalseIgnore.ignores(relPath(file))); files.push(...overriddenExcludedFiles); // Remove files explicitly marked as vendored in gitattributes files = files.filter((file) => !vendorTrueIgnore.ignores(relPath(file))); } // Filter out binary files if (!opts.keepBinary) { // Filter out files that are binary by default files = files.filter((file) => !binaryData.some((ext) => file.endsWith('.' + ext))); // Filter out manually specified binary files const binaryIgnored = ignore().add(manualAttributes.getFlaggedGlobs('binary', true)); files = filterOutIgnored(files, binaryIgnored); // Re-add files manually marked not as binary const binaryUnignored = ignore().add(manualAttributes.getFlaggedGlobs('binary', false)); const unignoredList = filterOutIgnored(files, binaryUnignored); files.push(...unignoredList); } return { files, manualAttributes, relPath, }; }