UNPKG

lisense

Version:

A simple but working CLI tool to extract NPM package licenses reliably

669 lines (581 loc) 17.3 kB
const fs = require('fs'); const path = require('path'); var readdirRec = function(dir) { var results = []; var list = fs.readdirSync(dir); list.forEach(function(file) { file = path.join(dir, file); var stat = fs.statSync(file); if (stat && stat.isDirectory()) { /* Recurse into a subdirectory */ results = results.concat(readdirRec(file)); } else { /* Is a file */ results.push(file); } }); return results; } var readdir = function(dir) { var results = []; var list = fs.readdirSync(dir); list.forEach(function(file) { file = path.join(dir, file); results.push(file); }); return results; } var ioutil = { ReadFile: (path) => { try { return fs.readFileSync(path).toString() || ''; } catch (ex) { return null; } }, Readdir: readdir, }; // ---- const GIT_REF_STR_MATCHER = new RegExp('(.*?)@(.*?):(.*?)\\/(.*?)$', 'im'); const CUSTOM_COPYRIGHT_MATCHER = new RegExp('^copyright.*?\\(?c?\\)?.*?[0-9]?.*?$', 'i'); const README_MD_LICENSE_HEADING = new RegExp('^#*.*?licen[s|c]e$', 'i'); const URL_USER_IN_HOSTNAME_MATCHER = new RegExp('://.*?@(.*?)/', 'i'); const GITHUB_REPO_SHORT_REGEX_MATCHER = new RegExp('^[^\/]+\/[^\/]+$', 'i') const SCM_INFO_PARSERS = [ // GitHub ref: 'github:user/repo' { canApply: (str) => str.toLowerCase().startsWith('github:'), apply: (str) => { const parts = str.substring(7).split('/'); if (parts.length >= 2) { return { _valid: true, type: 'git', directory: '', url: `https://github.com/${parts[0]}/${parts.slice(1).join('/')}`, }; } } }, // Bitbucket ref: 'bitbucked:user/repo' { canApply: (str) => str.toLowerCase().startsWith('bitbucket:'), apply: (str) => { const parts = str.substring(10).split('/'); if (parts.length >= 2) { let fullPath = parts.slice(1).join('/'); if (!fullPath.endsWith('/')) { fullPath += '/' } return { _valid: true, type: 'git', directory: '', url: `https://bitbucket.org/${parts[0]}/${fullPath}`, }; } } }, // GitLab ref: 'gitlab:user/repo' { canApply: (str) => str.toLowerCase().startsWith('gitlab:'), apply: (str) => { const parts = str.substring(7).split('/'); if (parts.length >= 2) { return { _valid: true, type: 'git', directory: '', url: `https://gitlab.com/${parts[0]}/${parts.slice(1).join('/')}`, }; } } }, // Reference to a GIST { canApply: (str) => str.toLowerCase().startsWith('gist:'), apply: (str) => { return { _valid: true, type: 'gist', directory: '', url: `https://gist.github.com/${str.substring(5)}`, }; } }, // Reference to a npm package directly { canApply: (str) => str.startsWith('npm/'), apply: (str) => { return { _valid: true, type: 'npm', directory: '', url: `https://www.npmjs.com/package/${str.substring(4)}`, }; } }, // It might start with git:// { canApply: (str) => str.startsWith('git://') || str.startsWith('git+ssh://'), apply: (str) => { const offset = str.startsWith('git://') ? 3 : 7; // Only for github we know out of the box that https is supported let url = `${str.indexOf('github.com') > -1 ? 'https' : 'http'}${str.substring(offset)}`; const m = url.match(URL_USER_IN_HOSTNAME_MATCHER); if (m && m[0] && m[1] && m.index) { const lenOriginal = m[0].length; url = `${url.substring(0, m.index)}://${m[1]}/${url.substring(m.index+lenOriginal)}`; } return { _valid: true, type: 'git', directory: '', url, }; } }, // Just an URL string { canApply: (str) => { return (str.startsWith('http://') || str.startsWith('https://')) && ((str.indexOf('github.com') > -1) || str.endsWith('.git')); }, apply: (url) => { const m = url.match(URL_USER_IN_HOSTNAME_MATCHER); if (m && m[0] && m[1] && m.index) { const lenOriginal = m[0].length; url = `${url.substring(0, m.index)}://${m[1]}/${url.substring(m.index+lenOriginal)}`; } return { _valid: true, type: 'git', directory: '', url, }; } }, // Case: check for "git@PROVIER:USER/REPO_PATH" { canApply: (str) => !!str.match(GIT_REF_STR_MATCHER), apply: (str) => { const m = str.match(GIT_REF_STR_MATCHER); if (m && m.length === 5) { if (m[2].indexOf('github.com') > -1) { return { _valid: true, type: 'git', directory: '', url: `https://github.com/${m[3]}/${m[4]}`, }; } else { return { _valid: true, type: 'other', directory: '', url: `${tred.indexOf('github.com') > -1 ? 'https' : 'http'}${tred.substring(3)}`, }; } } } }, { canApply: (str) => { return !!str.match(GITHUB_REPO_SHORT_REGEX_MATCHER); }, apply: (str) => { return { _valid: true, type: 'git', directory: '', url: `https://github.com/${str}`, }; } }, { canApply: (str) => { // Either invalid string or already with a protocol prefix if (!str || str.startsWith('http')) { return false; } // Is it parseable? try { new URL(`http://${(str || '').trim()}`) } catch (ex) { return false; } return true; }, apply: (str) => { // We don't know if the target server supports https, so to be sure, we provide // http and the target server should have a https redirect or HSTS or something let scheme = 'http'; if ( str.indexOf('github.com') > -1 || str.indexOf('gitlab.com') > -1 || str.indexOf('bitbucket.com') > -1 ) { scheme = 'https'; } return { _valid: true, type: str.toLowerCase().indexOf('git') > -1 ? 'git' : 'other', directory: '', url: `${scheme}://${str.trim()}`, }; } }, ] function _extractSCMInfoFromString(repoInfo) { let tred = (`${repoInfo || ''}`).trim(); if (!tred) { return { _valid: false, }; } tred = _cleanupRepoUrl(tred); // Parse it for (const p of SCM_INFO_PARSERS) { if (p.canApply(tred)) { const r = p.apply(tred); if (r && r._valid === true) { return r; } } } return { _valid: false }; } function _extractSCMInfo(packageJson) { if (!packageJson || (typeof packageJson) !== 'object') { return { _valid: false }; } const repo = packageJson.repository; if (repo === null || repo === undefined) { return { _valid: false }; } // It might be "only" a string to parse on this field if ((typeof repo) === 'string') { return _extractSCMInfoFromString(repo); } if ((typeof repo) === 'object') { const keys = Object.getOwnPropertyNames(repo); if (keys.indexOf('type') > -1 && keys.indexOf('url') > -1) { const repoInfo = { _valid: true, type: repo['type'], directory: repo['directory'] || '', url: repo['url'], }; // Try to parse it const parsed = _extractSCMInfoFromString(repoInfo.url); if (parsed._valid) { // Return the parsed one: return { ...parsed, directory: repoInfo.directory || parsed.directory || '', }; } // Return the fallback value return repoInfo; } // Fallback: we're missing the if (keys.indexOf('url') > -1) { return _extractSCMInfoFromString(repo['url']); } } return { _valid: false }; } function _cleanupRepoUrl(bareUrl) { if (bareUrl.startsWith('git+http')) { bareUrl = bareUrl.substring(4); } return bareUrl; } function extractSCMInfo(packageJson) { const ret = _extractSCMInfo(packageJson); if (!ret._valid) { return ret; } const result = { ...ret, type: (`${ret.type || ''}`).toLowerCase(), url: _cleanupRepoUrl(ret.url), }; if (!['git', 'svn', 'gist', 'npm'].includes(result.type)) { result.type = 'other'; } return result; } function _parseSingleLicenseObject(obj) { if (!obj || (typeof obj) !== 'object') { return { _valid: false }; } if (obj['type'] && obj['url']) { return { _valid: true, type: (obj['type'] || '').trim(), url: obj['url'], }; } else if (obj['type']) { return { _valid: true, type: (obj['type'] || '').trim(), url: '', }; } return { _valid: false }; } function extractLicenseInfoFromPackageJson(packageJson) { // Handle field 'licenses' if (packageJson && Array.isArray(packageJson['licenses'])) { const objs = packageJson['licenses'].map(l => _parseSingleLicenseObject(l)) const valid = objs.reduce((p, c) => p && c._valid, true); if (valid) { return { _valid: true, licenses: objs }; } } else if (packageJson && (typeof packageJson['licenses']) === 'object') { const obj = _parseSingleLicenseObject(packageJson['licenses']); if (obj._valid) { return { _valid: true, licenses: [obj] }; } } // Handle field 'license' if (packageJson && (typeof packageJson['license']) === 'string' && packageJson['license'].length > 1) { return { _valid: true, licenses: [ { _valid: true, type: packageJson['license'], } ] }; } else if (packageJson && Array.isArray(packageJson['license'])) { const result = { _valid: true, licenses: packageJson['license'].map(l => { if ((typeof l) === 'string') { return { _valid: true, type: (`${l || ''}`).trim(), }; } else { return _parseSingleLicenseObject(l); } }).filter(l => l._valid) }; return { ...result, _valid: result.licenses.length > 0 }; } else if (packageJson && (typeof packageJson['license']) === 'object' && packageJson['license']) { if ((typeof packageJson['license']['type']) !== 'undefined') { return { _valid: true, licenses: [ { _valid: true, type: packageJson['license']['type'], ...( (typeof packageJson['license']['url']) !== 'undefined' ? { url: packageJson['license']['url'] } : {} ) } ] }; } } return { _valid: false, licenses: [] }; } function _extractLicenseInfoFromFile(filePath) { const data = ioutil.ReadFile(filePath); if (!data) { // Maybe file read error etc. return { _valid: false }; } return _extractLicenseInfoFromString(data); } function _extractLicenseInfoFromString(data) { if (!data) { return { _valid: false }; } const lines = data.split('\n'); if (!Array.isArray(lines) || lines.length < 1) { // Empty file return { _valid: false }; } // Check for the standard license header like: 'Copyright (C) 1969 ...' const m = lines[0].trim().match(CUSTOM_COPYRIGHT_MATCHER); if (m) { return { _valid: true, _confidence: 0.5, type: 'CUSTOM_LICENSE', licenseLine: lines[0], }; } const beginning = lines.slice(0, Math.min(2, lines.length-1)); // Check if MIT license const hasMitLicense = beginning.findIndex(p => { return ( (p.toLowerCase().indexOf('mit license') > -1) || (p.toLowerCase().indexOf('the mit license') > -1) || (p.toLowerCase().indexOf('the mit license (mit)') > -1) ) }); if (hasMitLicense > -1) { return { _valid: true, _confidence: 0.75, type: 'MIT', licenseLine: lines[hasMitLicense], }; } // Check for GNU GPL license const hasGnuGplLicense = beginning.findIndex(p => { return ( (p.toUpperCase().indexOf('GNU GENERAL PUBLIC LICENSE') > -1) || (p.toUpperCase().indexOf('GENERAL PUBLIC LICENSE') > -1) ) }); if (hasGnuGplLicense > -1) { return { _valid: true, _confidence: 0.75, type: 'GPL', licenseLine: lines[hasGnuGplLicense], }; } // @see https://github.com/Illumina/licenses // Sorry, could not find anything return { _valid: false }; } function isLicenseFileFilename(filename) { const pathl = (`${filename || ''}`).toLowerCase(); // For American English ;-) return ((pathl.endsWith('/license') || pathl.endsWith('/license.md') || pathl.endsWith('/license.txt') || pathl.indexOf('/license') > -1)) || // For British English ;-) ((pathl.endsWith('/licence') || pathl.endsWith('/licence.md') || pathl.endsWith('/licence.txt') || pathl.indexOf('/licence') > -1)); } function extractLicenseInfoFromFiles(modulePath) { let files = readdirRec(modulePath); if (!Array.isArray(files) || files.length < 1) { return { _valid: false, licenses: [] }; } // We need to exclude all node_modules wich are inside a module // itself and want to make sure, only to get the contents of the // requested node_module and its subdirectories files = files.filter(path => { const subpath = path.substring(modulePath.length) || ''; return subpath.indexOf('node_modules') < 0 }); // Let's try to filter out all license files files = files.filter(path => isLicenseFileFilename(path)); // Nothing found if (files.length < 1) { return { _valid: false, licenses: [] }; } // Try to process the found files const rets = files.map(path => { const extr = _extractLicenseInfoFromFile(path) return { ...extr, _source: path.substring(modulePath.length + 1), } }); return { _valid: rets.filter(r => r._valid).length > 0, licenses: rets.filter(r => r._valid) || [], }; } function extractLicenseInfoFromReadme(modulePath) { let files = ioutil.Readdir(modulePath); if (!Array.isArray(files) || files.length < 1) { return { _valid: false, licenses: [] }; } // Find the README file files = files.filter(path => { const lpath = (`${path || ''}`).toLowerCase(); return lpath.endsWith(`/readme.md`) || lpath.endsWith(`/readme`) || lpath.indexOf(`/readme`) > -1 }); // Nothing found if (files.length < 1) { return { _valid: false, licenses: [] }; } const README = ioutil.ReadFile(files[0]); if (!README) { return { _valid: false, licenses: [] }; } // Check if we have a markdown license heading const lines = README.split('\n'); const idxLicenseHeading = lines.findIndex(p => (`${p || ''}`).trim().match(README_MD_LICENSE_HEADING)) if (idxLicenseHeading > -1 && (idxLicenseHeading + 1) < lines.length) { const licenseTxt = lines.slice(idxLicenseHeading + 1).join('\n').trim(); const ret = _extractLicenseInfoFromString(licenseTxt); if (ret._valid) { return { _valid: true, licenses: [ { ...ret, _confidence: Math.max(0.01, ret._confidence - 0.5), _source: files[0].substring(modulePath.length + 1), } ] }; } } return { _valid: false, licenses: [] }; } function extractLicenseInfo(packageJson, moduleRootPath) { // First we try to find a license inside the package.json const ret = extractLicenseInfoFromPackageJson(packageJson); if (ret._valid) { return ret; } // Then we check the file system const ret2 = extractLicenseInfoFromFiles(moduleRootPath); if (ret2._valid) { return ret2; } // And finally, let's look into the README if there is a license line const ret3 = extractLicenseInfoFromReadme(moduleRootPath); if (ret3._valid) { return ret3; } // And only if we can't find info inside the package.json or // the filesystem, we give up return { _valid: false }; } function combineSubpathWithRepo(repoInfo, subPath) { if (!subPath || !repoInfo || !repoInfo._valid) { return null; } if (!repoInfo.url || !repoInfo.url.startsWith('http')) { return null; } // Get the URL and clean it up let url = repoInfo.url; if (url.endsWith('.git')) { url = url.substring(0, url.length - 4); } else if (url.endsWith('/')) { url = url.substring(0, url.length - 1); } if (subPath.startsWith('/')) { subPath = subPath.substring(1); } // For the mainstream git repo providers, we need to find a path into // the repo rather than providing the URL; furthermore we assume the // branch 'master' if (url.indexOf('github.com')) { return `${url}/blob/master/${subPath}`; } // e.g. https://bitbucket.org/multicoreware/x265_git/src/master/COPYING if (url.indexOf('bitbucket.com')) { return `${url}/src/master/${subPath}`; } // e.g. https://gitlab.com/gitlab-org/gitlab/-/blob/master/.gitlab-ci.yml if (url.indexOf('gitlab.com')) { return `${url}/-/blob/master/${subPath}`; } // Fallack return `${url}/${subPath}`; } module.exports = { extractLicenseInfo, extractLicenseInfoFromFiles, // Main functions combineSubpathWithRepo, extractSCMInfo, extractLicenseInfoFromPackageJson, };