UNPKG

linguist-js

Version:

Analyse the programming languages used in a folder or from raw content, using the same rules that GitHub Linguist does.

116 lines (115 loc) 5.89 kB
import commonPrefix from 'common-path-prefix'; import ignore from 'ignore'; import FS from 'node:fs'; import Path from 'node:path'; import Attributes from '../program/classes/attributes.js'; import { getFileExtension, normPath } from '../program/fs/normalisedPath.js'; import walkTree from '../program/fs/walkTree.js'; import parseGitattributes from '../program/parsing/parseGitattributes.js'; const binaryData = JSON.parse(FS.readFileSync(new URL('../../node_modules/binary-extensions/binary-extensions.json', import.meta.url), 'utf-8')); export default async function fromFilesystem(input, opts, vendorPaths) { const resolvedInput = input.map((path) => normPath(Path.resolve(path))); const commonRoot = (resolvedInput.length > 1 ? commonPrefix(resolvedInput) : resolvedInput[0]).replace(/\/?$/, ''); const relPath = (file) => normPath(Path.relative(commonRoot, file)); const ignored = ignore(); ignored.add('.git/'); ignored.add(opts.ignoredFiles ?? []); const regexIgnores = opts.keepVendored ? [] : vendorPaths.map((path) => RegExp(path, 'i')); const directoryInputs = resolvedInput.filter((path) => FS.existsSync(path) && FS.lstatSync(path).isDirectory()); const fileInputs = resolvedInput.filter((path) => FS.existsSync(path) && !FS.lstatSync(path).isDirectory()); let files = []; if (directoryInputs.length) { const data = walkTree({ init: true, commonRoot, folderRoots: directoryInputs, folders: directoryInputs, ignored }); files = [...data.files]; } files.push(...fileInputs); files = [...new Set(files)]; // Establish language overrides taken from gitattributes const manualAttributes = new Attributes(); if (opts.checkAttributes) { const nestedAttrFiles = files.filter((file) => file.endsWith('.gitattributes')); for (const attrFile of nestedAttrFiles) { const relAttrFile = relPath(attrFile); const relAttrFolder = Path.dirname(relAttrFile); const contents = FS.readFileSync(attrFile, 'utf-8'); const parsed = parseGitattributes(contents, relAttrFolder); for (const { glob, attrs } of parsed) { manualAttributes.add(glob, attrs); } } } const filesToIgnore = []; for (const file of files) { const relative = relPath(file); const isRegexIgnored = regexIgnores.some((pattern) => pattern.test(relative)); if (!isRegexIgnored) continue; const fileAttrs = manualAttributes.findAttrsForPath(relative); if (fileAttrs?.generated === false || fileAttrs?.vendored === false) continue; filesToIgnore.push(file); } files = files.filter((file) => !filesToIgnore.includes(file)); if (!opts.keepVendored) { const vendorTrueGlobs = [ ...manualAttributes.getFlaggedGlobs('vendored', true), ...manualAttributes.getFlaggedGlobs('generated', true), ...manualAttributes.getFlaggedGlobs('documentation', true), ]; const vendorFalseGlobs = [ ...manualAttributes.getFlaggedGlobs('vendored', false), ...manualAttributes.getFlaggedGlobs('generated', false), ...manualAttributes.getFlaggedGlobs('documentation', false), ]; const vendorTrueIgnore = ignore().add(vendorTrueGlobs); const vendorFalseIgnore = ignore().add(vendorFalseGlobs); const excludedFiles = files.filter((file) => vendorPaths.some((pathPtn) => RegExp(pathPtn, 'i').test(relPath(file)))); files = files.filter((file) => !excludedFiles.includes(file)); const overriddenExcludedFiles = excludedFiles.filter((file) => vendorFalseIgnore.ignores(relPath(file))); files.push(...overriddenExcludedFiles); files = files.filter((file) => !vendorTrueIgnore.ignores(relPath(file))); } if (!opts.keepBinary) { files = files.filter((file) => !binaryData.some((ext) => file.toLowerCase().endsWith(`.${ext}`))); const binaryIgnored = ignore().add(manualAttributes.getFlaggedGlobs('binary', true)); files = files.filter((file) => !binaryIgnored.ignores(relPath(file))); const binaryUnignored = ignore().add(manualAttributes.getFlaggedGlobs('binary', false)); const binaryUnignoredList = files.filter((file) => binaryUnignored.ignores(relPath(file))); files.push(...binaryUnignoredList); } const fileSet = new Set(files); const virtualFiles = []; for (const file of fileSet) { if (!FS.existsSync(file) || FS.lstatSync(file).isDirectory()) continue; const content = FS.readFileSync(file, 'utf-8'); const firstLine = content.split(/\r?\n/)[0] ?? ''; const relative = relPath(file); const fileAttrs = manualAttributes.findAttrsForPath(relative); const metadata = {}; if (fileAttrs?.vendored === true) metadata.vendored = true; if (fileAttrs?.generated === true) metadata.generated = true; if (fileAttrs?.documentation === true) metadata.documentation = true; const attributes = fileAttrs ? { language: fileAttrs.language ?? undefined, binary: fileAttrs.binary === true ? true : undefined, detectable: fileAttrs.detectable === true ? true : undefined, } : undefined; virtualFiles.push({ path: file, content, firstLine, size: Buffer.byteLength(content, 'utf-8'), extension: getFileExtension(file), isBinary: fileAttrs?.binary === true || binaryData.some((ext) => file.toLowerCase().endsWith(`.${ext}`)), metadata: Object.keys(metadata).length ? metadata : undefined, attributes, }); } return virtualFiles; }