UNPKG

seqparse

Version:

Parse sequence files (GenBank, FASTA, SnapGene, SBOL) and accession IDs (NCBI, iGEM) to a common format

github.com/Lattice-Automation/seqparse

Lattice-Automation/seqparse

1,406 lines (1,312 loc) • 78.6 kB

JavaScript

(function webpackUniversalModuleDefinition(root, factory) { if(typeof exports === 'object' && typeof module === 'object') module.exports = factory(); else if(typeof define === 'function' && define.amd) define("seqparse", [], factory); else if(typeof exports === 'object') exports["seqparse"] = factory(); else root["seqparse"] = factory(); })(this, () => { return /******/ (() => { // webpackBootstrap /******/ "use strict"; /******/ var __webpack_modules__ = ([ /* 0 */ /***/ (function(__unused_webpack_module, exports, __webpack_require__) { var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __generator = (this && this.__generator) || function (thisArg, body) { var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g; return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g; function verb(n) { return function (v) { return step([n, v]); }; } function step(op) { if (f) throw new TypeError("Generator is already executing."); while (_) try { if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t; if (y = 0, t) op = [op[0] & 2, t.value]; switch (op[0]) { case 0: case 1: t = op; break; case 4: _.label++; return { value: op[1], done: false }; case 5: _.label++; y = op[1]; op = [0]; continue; case 7: op = _.ops.pop(); _.trys.pop(); continue; default: if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; } if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; } if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; } if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; } if (t[2]) _.ops.pop(); _.trys.pop(); continue; } op = body.call(thisArg, _); } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; } if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true }; } }; Object.defineProperty(exports, "__esModule", ({ value: true })); exports.parseFile = void 0; var fetchFile_1 = __webpack_require__(1); var parseFile_1 = __webpack_require__(3); exports.parseFile = parseFile_1.default; /* Parse a sequence file. Or download a sequence with an Accession ID. */ exports["default"] = (function (input, options) { return __awaiter(void 0, void 0, void 0, function () { return __generator(this, function (_a) { switch (_a.label) { case 0: if (!(!(options === null || options === void 0 ? void 0 : options.fileName) && (0, fetchFile_1.isAccession)(input))) return [3 /*break*/, 2]; return [4 /*yield*/, (0, fetchFile_1.default)(input, options)]; case 1: return [2 /*return*/, _a.sent()]; case 2: return [2 /*return*/, (0, parseFile_1.default)(input, options)[0]]; } }); }); }); /***/ }), /* 1 */ /***/ (function(__unused_webpack_module, exports, __webpack_require__) { var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __generator = (this && this.__generator) || function (thisArg, body) { var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g; return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g; function verb(n) { return function (v) { return step([n, v]); }; } function step(op) { if (f) throw new TypeError("Generator is already executing."); while (_) try { if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t; if (y = 0, t) op = [op[0] & 2, t.value]; switch (op[0]) { case 0: case 1: t = op; break; case 4: _.label++; return { value: op[1], done: false }; case 5: _.label++; y = op[1]; op = [0]; continue; case 7: op = _.ops.pop(); _.trys.pop(); continue; default: if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; } if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; } if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; } if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; } if (t[2]) _.ops.pop(); _.trys.pop(); continue; } op = body.call(thisArg, _); } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; } if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true }; } }; Object.defineProperty(exports, "__esModule", ({ value: true })); exports.isAccession = void 0; var node_fetch_1 = __webpack_require__(2); var parseFile_1 = __webpack_require__(3); /** * Get a remote sequence from NCBI or the iGEM registry. */ exports["default"] = (function (accession, options) { return __awaiter(void 0, void 0, void 0, function () { var url, body, response, err_1; return __generator(this, function (_a) { switch (_a.label) { case 0: url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=".concat(accession.trim(), "&rettype=gbwithparts&retmode=text"); if (accession.startsWith("BB")) { // it's a BioBrick... target the iGEM repo if ((typeof window !== "undefined" && typeof process === "undefined") || (options === null || options === void 0 ? void 0 : options.cors)) { // use this hack to get around a no-CORS setting on iGEM webserver, pending fix on their side url = "https://cors-anywhere.herokuapp.com/http://parts.igem.org/cgi/xml/part.cgi?part=".concat(accession.trim()); } else { url = "http://parts.igem.org/cgi/xml/part.cgi?part=".concat(accession.trim()); } } body = ""; _a.label = 1; case 1: _a.trys.push([1, 4, , 5]); return [4 /*yield*/, (0, node_fetch_1.default)(url)]; case 2: response = _a.sent(); return [4 /*yield*/, response.text()]; case 3: body = _a.sent(); return [3 /*break*/, 5]; case 4: err_1 = _a.sent(); throw new Error("Failed to get part: accession=".concat(accession, " url=").concat(url, " err=").concat(err_1)); case 5: if (!response.ok || !body.length) { throw new Error("Failed to get part, no body returned: accession=".concat(accession, " url=").concat(url)); } return [4 /*yield*/, (0, parseFile_1.default)(body)]; case 6: return [2 /*return*/, (_a.sent())[0]]; } }); }); }); /** returns whether the passed ID is an accession in iGEM or NCBI */ var isAccession = function (accession) { if (accession.startsWith("BB")) { return true; // biobrick } if (accession.length < 14 && accession.match(/^[a-z0-9_\-.]+$/i)) { return true; } return false; }; exports.isAccession = isAccession; /***/ }), /* 2 */ /***/ ((module) => { module.exports = require("node-fetch"); /***/ }), /* 3 */ /***/ ((__unused_webpack_module, exports, __webpack_require__) => { Object.defineProperty(exports, "__esModule", ({ value: true })); var path_1 = __webpack_require__(4); var benchling_1 = __webpack_require__(5); var biobrick_1 = __webpack_require__(7); var fasta_1 = __webpack_require__(9); var genbank_1 = __webpack_require__(10); var jbei_1 = __webpack_require__(11); var sbol_1 = __webpack_require__(12); var seqbuilder_1 = __webpack_require__(15); var snapgene_1 = __webpack_require__(16); var utils_1 = __webpack_require__(6); /** * parseFile converts the contents of a sequence file to a an array of Seq */ exports["default"] = (function (file, opts) { var fileName = (opts === null || opts === void 0 ? void 0 : opts.fileName) || ""; var sourceName = fileName.split(path_1.sep).pop() || fileName; if (!file) { throw Error("cannot parse null or empty string"); } // this is a check for an edge case, where the user uploads come kind // of file that's full of bps but doesn't fit into a defined type var firstLine = file.substring(0, file.search("\n")); var dnaCharLength = firstLine.replace(/[^atcgATCG]/, "").length; var dnaOnlyFile = dnaCharLength / firstLine.length > 0.8; // is it >80% dna? var name = fileName && sourceName ? sourceName.substring(0, sourceName.search("\\.")) : "Untitled"; // another edge case check for whether the seq is a JSON seq from Benchling // just a heuristic that says 1) yes it can be parsed 2) it contains a list of // fields that are common to Benchling files var isBenchling = false; try { var benchlingJSON_1 = JSON.parse(file); // will err out if not JSON if (["bases", "annotations", "primers"].every(function (k) { return typeof benchlingJSON_1[k] !== "undefined"; })) { isBenchling = true; } } catch (ex) { // expected } var prefix = file.substring(0, 200); var seqs; switch (true) { // JBEI case prefix.includes(':seq="http://jbei.org/sequence"'): case file.startsWith("<seq:seq"): seqs = (0, jbei_1.default)(file); break; // FASTA case file.startsWith(">"): case file.startsWith(";"): case fileName.endsWith(".seq"): case fileName.endsWith(".fa"): case fileName.endsWith(".fas"): case fileName.endsWith(".fasta"): seqs = (0, fasta_1.default)(file, fileName); break; // Genbank case file.includes("LOCUS") && file.includes("ORIGIN"): case fileName.endsWith(".gb"): case fileName.endsWith(".gbk"): case fileName.endsWith(".genbank"): case fileName.endsWith(".ape"): seqs = (0, genbank_1.default)(file, fileName); break; // SnapGene case fileName.endsWith(".dna"): seqs = (0, snapgene_1.default)(opts); break; // SeqBuilder case prefix.includes("Written by SeqBuilder"): case fileName.endsWith(".sbd"): seqs = (0, seqbuilder_1.default)(file, fileName); break; // BioBrick XML case prefix.includes("Parts from the iGEM"): case prefix.includes("<part_list>"): seqs = (0, biobrick_1.default)(file); break; // Benchling JSON case isBenchling: seqs = (0, benchling_1.default)(file); break; // SBOL case prefix.includes("RDF"): seqs = (0, sbol_1.default)(file, fileName); break; // a DNA text file without an official formatting case dnaOnlyFile: { var seq = (0, utils_1.complement)(file).seq; seqs = [{ annotations: [], name: name, seq: seq, type: (0, utils_1.guessType)(seq) }]; break; } default: throw Error("".concat(fileName, " File type not recognized: ").concat(file)); } // bit of clean up to: only return the fields in a Seq and reorder to match expectations. return seqs.map(function (p) { return ({ annotations: p.annotations .sort(function (a, b) { return a.start - b.start || a.end - b.end; }) .map(function (a) { return ({ color: a.color, direction: a.direction, end: a.end, name: a.name, start: a.start, type: a.type, }); }), name: p.name, seq: p.seq, type: p.type, }); }); }); /***/ }), /* 4 */ /***/ ((module) => { // 'path' module extracted from Node.js v8.11.1 (only the posix part) // transplited with Babel // Copyright Joyent, Inc. and other Node contributors. // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the // "Software"), to deal in the Software without restriction, including // without limitation the rights to use, copy, modify, merge, publish, // distribute, sublicense, and/or sell copies of the Software, and to permit // persons to whom the Software is furnished to do so, subject to the // following conditions: // // The above copyright notice and this permission notice shall be included // in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN // NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR // OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE // USE OR OTHER DEALINGS IN THE SOFTWARE. function assertPath(path) { if (typeof path !== 'string') { throw new TypeError('Path must be a string. Received ' + JSON.stringify(path)); } } // Resolves . and .. elements in a path with directory names function normalizeStringPosix(path, allowAboveRoot) { var res = ''; var lastSegmentLength = 0; var lastSlash = -1; var dots = 0; var code; for (var i = 0; i <= path.length; ++i) { if (i < path.length) code = path.charCodeAt(i); else if (code === 47 /*/*/) break; else code = 47 /*/*/; if (code === 47 /*/*/) { if (lastSlash === i - 1 || dots === 1) { // NOOP } else if (lastSlash !== i - 1 && dots === 2) { if (res.length < 2 || lastSegmentLength !== 2 || res.charCodeAt(res.length - 1) !== 46 /*.*/ || res.charCodeAt(res.length - 2) !== 46 /*.*/) { if (res.length > 2) { var lastSlashIndex = res.lastIndexOf('/'); if (lastSlashIndex !== res.length - 1) { if (lastSlashIndex === -1) { res = ''; lastSegmentLength = 0; } else { res = res.slice(0, lastSlashIndex); lastSegmentLength = res.length - 1 - res.lastIndexOf('/'); } lastSlash = i; dots = 0; continue; } } else if (res.length === 2 || res.length === 1) { res = ''; lastSegmentLength = 0; lastSlash = i; dots = 0; continue; } } if (allowAboveRoot) { if (res.length > 0) res += '/..'; else res = '..'; lastSegmentLength = 2; } } else { if (res.length > 0) res += '/' + path.slice(lastSlash + 1, i); else res = path.slice(lastSlash + 1, i); lastSegmentLength = i - lastSlash - 1; } lastSlash = i; dots = 0; } else if (code === 46 /*.*/ && dots !== -1) { ++dots; } else { dots = -1; } } return res; } function _format(sep, pathObject) { var dir = pathObject.dir || pathObject.root; var base = pathObject.base || (pathObject.name || '') + (pathObject.ext || ''); if (!dir) { return base; } if (dir === pathObject.root) { return dir + base; } return dir + sep + base; } var posix = { // path.resolve([from ...], to) resolve: function resolve() { var resolvedPath = ''; var resolvedAbsolute = false; var cwd; for (var i = arguments.length - 1; i >= -1 && !resolvedAbsolute; i--) { var path; if (i >= 0) path = arguments[i]; else { if (cwd === undefined) cwd = process.cwd(); path = cwd; } assertPath(path); // Skip empty entries if (path.length === 0) { continue; } resolvedPath = path + '/' + resolvedPath; resolvedAbsolute = path.charCodeAt(0) === 47 /*/*/; } // At this point the path should be resolved to a full absolute path, but // handle relative paths to be safe (might happen when process.cwd() fails) // Normalize the path resolvedPath = normalizeStringPosix(resolvedPath, !resolvedAbsolute); if (resolvedAbsolute) { if (resolvedPath.length > 0) return '/' + resolvedPath; else return '/'; } else if (resolvedPath.length > 0) { return resolvedPath; } else { return '.'; } }, normalize: function normalize(path) { assertPath(path); if (path.length === 0) return '.'; var isAbsolute = path.charCodeAt(0) === 47 /*/*/; var trailingSeparator = path.charCodeAt(path.length - 1) === 47 /*/*/; // Normalize the path path = normalizeStringPosix(path, !isAbsolute); if (path.length === 0 && !isAbsolute) path = '.'; if (path.length > 0 && trailingSeparator) path += '/'; if (isAbsolute) return '/' + path; return path; }, isAbsolute: function isAbsolute(path) { assertPath(path); return path.length > 0 && path.charCodeAt(0) === 47 /*/*/; }, join: function join() { if (arguments.length === 0) return '.'; var joined; for (var i = 0; i < arguments.length; ++i) { var arg = arguments[i]; assertPath(arg); if (arg.length > 0) { if (joined === undefined) joined = arg; else joined += '/' + arg; } } if (joined === undefined) return '.'; return posix.normalize(joined); }, relative: function relative(from, to) { assertPath(from); assertPath(to); if (from === to) return ''; from = posix.resolve(from); to = posix.resolve(to); if (from === to) return ''; // Trim any leading backslashes var fromStart = 1; for (; fromStart < from.length; ++fromStart) { if (from.charCodeAt(fromStart) !== 47 /*/*/) break; } var fromEnd = from.length; var fromLen = fromEnd - fromStart; // Trim any leading backslashes var toStart = 1; for (; toStart < to.length; ++toStart) { if (to.charCodeAt(toStart) !== 47 /*/*/) break; } var toEnd = to.length; var toLen = toEnd - toStart; // Compare paths to find the longest common path from root var length = fromLen < toLen ? fromLen : toLen; var lastCommonSep = -1; var i = 0; for (; i <= length; ++i) { if (i === length) { if (toLen > length) { if (to.charCodeAt(toStart + i) === 47 /*/*/) { // We get here if `from` is the exact base path for `to`. // For example: from='/foo/bar'; to='/foo/bar/baz' return to.slice(toStart + i + 1); } else if (i === 0) { // We get here if `from` is the root // For example: from='/'; to='/foo' return to.slice(toStart + i); } } else if (fromLen > length) { if (from.charCodeAt(fromStart + i) === 47 /*/*/) { // We get here if `to` is the exact base path for `from`. // For example: from='/foo/bar/baz'; to='/foo/bar' lastCommonSep = i; } else if (i === 0) { // We get here if `to` is the root. // For example: from='/foo'; to='/' lastCommonSep = 0; } } break; } var fromCode = from.charCodeAt(fromStart + i); var toCode = to.charCodeAt(toStart + i); if (fromCode !== toCode) break; else if (fromCode === 47 /*/*/) lastCommonSep = i; } var out = ''; // Generate the relative path based on the path difference between `to` // and `from` for (i = fromStart + lastCommonSep + 1; i <= fromEnd; ++i) { if (i === fromEnd || from.charCodeAt(i) === 47 /*/*/) { if (out.length === 0) out += '..'; else out += '/..'; } } // Lastly, append the rest of the destination (`to`) path that comes after // the common path parts if (out.length > 0) return out + to.slice(toStart + lastCommonSep); else { toStart += lastCommonSep; if (to.charCodeAt(toStart) === 47 /*/*/) ++toStart; return to.slice(toStart); } }, _makeLong: function _makeLong(path) { return path; }, dirname: function dirname(path) { assertPath(path); if (path.length === 0) return '.'; var code = path.charCodeAt(0); var hasRoot = code === 47 /*/*/; var end = -1; var matchedSlash = true; for (var i = path.length - 1; i >= 1; --i) { code = path.charCodeAt(i); if (code === 47 /*/*/) { if (!matchedSlash) { end = i; break; } } else { // We saw the first non-path separator matchedSlash = false; } } if (end === -1) return hasRoot ? '/' : '.'; if (hasRoot && end === 1) return '//'; return path.slice(0, end); }, basename: function basename(path, ext) { if (ext !== undefined && typeof ext !== 'string') throw new TypeError('"ext" argument must be a string'); assertPath(path); var start = 0; var end = -1; var matchedSlash = true; var i; if (ext !== undefined && ext.length > 0 && ext.length <= path.length) { if (ext.length === path.length && ext === path) return ''; var extIdx = ext.length - 1; var firstNonSlashEnd = -1; for (i = path.length - 1; i >= 0; --i) { var code = path.charCodeAt(i); if (code === 47 /*/*/) { // If we reached a path separator that was not part of a set of path // separators at the end of the string, stop now if (!matchedSlash) { start = i + 1; break; } } else { if (firstNonSlashEnd === -1) { // We saw the first non-path separator, remember this index in case // we need it if the extension ends up not matching matchedSlash = false; firstNonSlashEnd = i + 1; } if (extIdx >= 0) { // Try to match the explicit extension if (code === ext.charCodeAt(extIdx)) { if (--extIdx === -1) { // We matched the extension, so mark this as the end of our path // component end = i; } } else { // Extension does not match, so our result is the entire path // component extIdx = -1; end = firstNonSlashEnd; } } } } if (start === end) end = firstNonSlashEnd;else if (end === -1) end = path.length; return path.slice(start, end); } else { for (i = path.length - 1; i >= 0; --i) { if (path.charCodeAt(i) === 47 /*/*/) { // If we reached a path separator that was not part of a set of path // separators at the end of the string, stop now if (!matchedSlash) { start = i + 1; break; } } else if (end === -1) { // We saw the first non-path separator, mark this as the end of our // path component matchedSlash = false; end = i + 1; } } if (end === -1) return ''; return path.slice(start, end); } }, extname: function extname(path) { assertPath(path); var startDot = -1; var startPart = 0; var end = -1; var matchedSlash = true; // Track the state of characters (if any) we see before our first dot and // after any path separator we find var preDotState = 0; for (var i = path.length - 1; i >= 0; --i) { var code = path.charCodeAt(i); if (code === 47 /*/*/) { // If we reached a path separator that was not part of a set of path // separators at the end of the string, stop now if (!matchedSlash) { startPart = i + 1; break; } continue; } if (end === -1) { // We saw the first non-path separator, mark this as the end of our // extension matchedSlash = false; end = i + 1; } if (code === 46 /*.*/) { // If this is our first dot, mark it as the start of our extension if (startDot === -1) startDot = i; else if (preDotState !== 1) preDotState = 1; } else if (startDot !== -1) { // We saw a non-dot and non-path separator before our dot, so we should // have a good chance at having a non-empty extension preDotState = -1; } } if (startDot === -1 || end === -1 || // We saw a non-dot character immediately before the dot preDotState === 0 || // The (right-most) trimmed path component is exactly '..' preDotState === 1 && startDot === end - 1 && startDot === startPart + 1) { return ''; } return path.slice(startDot, end); }, format: function format(pathObject) { if (pathObject === null || typeof pathObject !== 'object') { throw new TypeError('The "pathObject" argument must be of type Object. Received type ' + typeof pathObject); } return _format('/', pathObject); }, parse: function parse(path) { assertPath(path); var ret = { root: '', dir: '', base: '', ext: '', name: '' }; if (path.length === 0) return ret; var code = path.charCodeAt(0); var isAbsolute = code === 47 /*/*/; var start; if (isAbsolute) { ret.root = '/'; start = 1; } else { start = 0; } var startDot = -1; var startPart = 0; var end = -1; var matchedSlash = true; var i = path.length - 1; // Track the state of characters (if any) we see before our first dot and // after any path separator we find var preDotState = 0; // Get non-dir info for (; i >= start; --i) { code = path.charCodeAt(i); if (code === 47 /*/*/) { // If we reached a path separator that was not part of a set of path // separators at the end of the string, stop now if (!matchedSlash) { startPart = i + 1; break; } continue; } if (end === -1) { // We saw the first non-path separator, mark this as the end of our // extension matchedSlash = false; end = i + 1; } if (code === 46 /*.*/) { // If this is our first dot, mark it as the start of our extension if (startDot === -1) startDot = i;else if (preDotState !== 1) preDotState = 1; } else if (startDot !== -1) { // We saw a non-dot and non-path separator before our dot, so we should // have a good chance at having a non-empty extension preDotState = -1; } } if (startDot === -1 || end === -1 || // We saw a non-dot character immediately before the dot preDotState === 0 || // The (right-most) trimmed path component is exactly '..' preDotState === 1 && startDot === end - 1 && startDot === startPart + 1) { if (end !== -1) { if (startPart === 0 && isAbsolute) ret.base = ret.name = path.slice(1, end);else ret.base = ret.name = path.slice(startPart, end); } } else { if (startPart === 0 && isAbsolute) { ret.name = path.slice(1, startDot); ret.base = path.slice(1, end); } else { ret.name = path.slice(startPart, startDot); ret.base = path.slice(startPart, end); } ret.ext = path.slice(startDot, end); } if (startPart > 0) ret.dir = path.slice(0, startPart - 1);else if (isAbsolute) ret.dir = '/'; return ret; }, sep: '/', delimiter: ':', win32: null, posix: null }; posix.posix = posix; module.exports = posix; /***/ }), /* 5 */ /***/ (function(__unused_webpack_module, exports, __webpack_require__) { var __assign = (this && this.__assign) || function () { __assign = Object.assign || function(t) { for (var s, i = 1, n = arguments.length; i < n; i++) { s = arguments[i]; for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p)) t[p] = s[p]; } return t; }; return __assign.apply(this, arguments); }; Object.defineProperty(exports, "__esModule", ({ value: true })); var utils_1 = __webpack_require__(6); /** * Benchling format is just JSON. It's virtually the same format. */ exports["default"] = (function (text) { var partJSON = JSON.parse(text); var seq = (0, utils_1.complement)(partJSON.bases).seq; // throw an error if the sequence is empty if (seq.length < 1) { throw new Error("Invalid Benchling part: empty sequence"); } return [ { annotations: partJSON.annotations.map(function (a) { return (__assign(__assign({}, a), { direction: (0, utils_1.parseDirection)(a.strand) })); }), name: partJSON.name || partJSON._id, seq: seq, type: (0, utils_1.guessType)(seq), }, ]; }); /***/ }), /* 6 */ /***/ ((__unused_webpack_module, exports) => { Object.defineProperty(exports, "__esModule", ({ value: true })); exports.guessType = exports.parseDirection = exports.firstElement = exports.reverseComplement = exports.complement = void 0; // from http://arep.med.harvard.edu/labgc/adnan/projects/Utilities/revcomp.html var comp = { A: "T", B: "V", C: "G", D: "H", G: "C", H: "D", K: "M", M: "K", N: "N", R: "Y", S: "S", T: "A", U: "A", V: "B", W: "W", X: "X", Y: "R", a: "t", b: "v", c: "g", d: "h", g: "c", h: "d", k: "m", m: "k", n: "n", r: "y", s: "s", t: "a", u: "a", v: "b", w: "w", x: "x", y: "r", }; /** * Return the filtered sequence and its complement if its an empty string, return the same for both. */ var complement = function (origSeq) { if (!origSeq) { return { compSeq: "", seq: "" }; } // filter out unrecognized basepairs and build up the complement var seq = ""; var compSeq = ""; for (var i = 0, origLength = origSeq.length; i < origLength; i += 1) { if (comp[origSeq[i]]) { seq += origSeq[i]; compSeq += comp[origSeq[i]]; } } return { compSeq: compSeq, seq: seq }; }; exports.complement = complement; /** * Return the reverse complement of a DNA sequence */ var reverseComplement = function (inputSeq) { var compSeq = (0, exports.complement)(inputSeq).compSeq; return compSeq.split("").reverse().join(""); }; exports.reverseComplement = reverseComplement; var firstElement = function (arr) { if (!Array.isArray(arr)) return undefined; return arr[0]; }; exports.firstElement = firstElement; var fwd = new Set(["FWD", "fwd", "FORWARD", "forward", "FOR", "for", "TOP", "top", "1", 1]); var rev = new Set(["REV", "rev", "REVERSE", "reverse", "BOTTOM", "bottom", "-1", -1]); /** * Parse the user defined direction, estimate the direction of the element * * ```js * parseDirection("FWD") => 1 * parseDirection("FORWARD") => 1 * ``` */ var parseDirection = function (direction) { if (!direction) { return 0; } if (fwd.has(direction)) { return 1; } if (rev.has(direction)) { return -1; } return 0; }; exports.parseDirection = parseDirection; /** * mapping the 64 standard codons to amino acids * no synth AA's * * adapted from: "https://github.com/keithwhor/NtSeq/blob/master/lib/nt.js */ var codon2AA = { AAA: "K", AAC: "N", AAG: "K", AAT: "N", ACA: "T", ACC: "T", ACG: "T", ACT: "T", AGA: "R", AGC: "S", AGG: "R", AGT: "S", ATA: "I", ATC: "I", ATG: "M", ATT: "I", CAA: "Q", CAC: "H", CAG: "Q", CAT: "H", CCA: "P", CCC: "P", CCG: "P", CCT: "P", CGA: "R", CGC: "R", CGG: "R", CGT: "R", CTA: "L", CTC: "L", CTG: "L", CTT: "L", GAA: "E", GAC: "D", GAG: "E", GAT: "D", GCA: "A", GCC: "A", GCG: "A", GCT: "A", GGA: "G", GGC: "G", GGG: "G", GGT: "G", GTA: "V", GTC: "V", GTG: "V", GTT: "V", TAA: "*", TAC: "Y", TAG: "*", TAT: "Y", TCA: "S", TCC: "S", TCG: "S", TCT: "S", TGA: "*", TGC: "C", TGG: "W", TGT: "C", TTA: "L", TTC: "F", TTG: "L", TTT: "F", }; var aminoAcids = Array.from(new Set(Object.values(codon2AA)).values()).join(""); var aminoAcidRegex = new RegExp("^[".concat(aminoAcids, "]+$"), "i"); /** Infer the type of a sequence. This only allows a couple wildcard characters so may be overly strict. */ var guessType = function (seq) { if (/^[atgcn.]+$/i.test(seq)) { return "dna"; } else if (/^[augcn.]+$/i.test(seq)) { return "rna"; } else if (aminoAcidRegex.test(seq)) { return "aa"; } return "unknown"; }; exports.guessType = guessType; /***/ }), /* 7 */ /***/ ((__unused_webpack_module, exports, __webpack_require__) => { Object.defineProperty(exports, "__esModule", ({ value: true })); var fast_xml_parser_1 = __webpack_require__(8); var utils_1 = __webpack_require__(6); /** * Parse a BioBrick in XML format to Seq[] * * Eg: https://parts.igem.org/cgi/xml/part.cgi?part=BBa_J23100 */ exports["default"] = (function (file) { var bail = function (err) { throw new Error("Failed on BioBrick: ".concat(err)); }; // parse var parsedBiobrick = new fast_xml_parser_1.XMLParser({ isArray: function (name) { return ["features", "part_name", "sequences"].includes(name); }, removeNSPrefix: true, }).parse(file); // get the first part var part = parsedBiobrick.rsbpml.part_list.part; if (!part) bail("No part seen in part_list"); // extract the useful fields var features = part.features, part_name = part.part_name, sequences = part.sequences; var name = (0, utils_1.firstElement)(part_name); // parse the iGEM annotations var annotations = features .map(function (_a) { var feature = _a.feature; if (!feature) return null; var direction = feature.direction, endpos = feature.endpos, startpos = feature.startpos, type = feature.type; return { direction: (0, utils_1.parseDirection)(direction), end: +endpos, name: "".concat(direction, "-").concat(startpos), start: +startpos || 0, type: type || undefined, }; }) .filter(function (a) { return a; }); // parse the sequence var seq = (0, utils_1.complement)(sequences[0].seq_data).seq; return [ { annotations: annotations, name: name, seq: seq, type: (0, utils_1.guessType)(seq), }, ]; }); /***/ }), /* 8 */ /***/ ((module) => { module.exports = require("fast-xml-parser"); /***/ }), /* 9 */ /***/ ((__unused_webpack_module, exports, __webpack_require__) => { Object.defineProperty(exports, "__esModule", ({ value: true })); var utils_1 = __webpack_require__(6); exports["default"] = (function (text, fileName) { // partFactory returns a negative "circular" prop, we assume they're all linear if (text.trim().startsWith(">")) { return text .split(">") // split up if it's a multi-seq FASTA file .map(function (t) { // this starts at the end of the first line, grabs all other characters, // and removes any newlines (leaving only the original sequence) // sequence "cleaning" happens in complement (we don't support bps other than // the most common right now) var seq = t.substr(t.indexOf("\n"), t.length).replace(/\s/g, ""); // the first line contains the name, though there's lots of variability around // the information on this line... // >MCHU - Calmodulin - Human, rabbit, bovine, rat, and chicken var name = t.substring(0, t.search(/\n|\|/)).replace(/\//g, ""); return { annotations: [], name: name, seq: seq, type: (0, utils_1.guessType)(seq), }; }) .filter(function (p) { return p.name && p.seq; }); } if (text.trim().startsWith(";")) { // it's an old-school style FASTA that's punctuated with semi-colons // ;my|NAME // ;my comment // actGacgata var name_1 = text.substring(0, text.search(/\n|\|/)).replace(/\//g, ""); var newlineBeforeSeq = text.indexOf("\n", text.lastIndexOf(";")); var seq_1 = text.substring(newlineBeforeSeq, text.length); return [ { annotations: [], name: name_1, seq: seq_1, type: (0, utils_1.guessType)(seq_1), }, ]; } // assume that it's a no name FASTA. Ie it's just a file with dna and no header // try and get the name from the fileName var lastChar = fileName.lastIndexOf(".") || fileName.length; var name = fileName.substring(0, lastChar) || "Untitled"; var seq = text; return [ { annotations: [], name: name, seq: seq, type: (0, utils_1.guessType)(seq), }, ]; }); /***/ }), /* 10 */ /***/ ((__unused_webpack_module, exports, __webpack_require__) => { Object.defineProperty(exports, "__esModule", ({ value: true })); var utils_1 = __webpack_require__(6); // a list of recognized types that would constitute an annotation name var tagNameSet = new Set(["gene", "product", "note", "db_xref", "protein_id", "label", "lab_host", "locus_tag"]); // a list of tags that could represent colors var tagColorSet = new Set(["ApEinfo_fwdcolor", "ApEinfo_revcolor", "loom_color"]); /** * takes in a string representation of a GenBank file and outputs our * part representation of it. an example of a Genbank file can be found * at ./parsers/Gebank, though there is significant variability to the * format * * another official example can be found at: * https://www.ncbi.nlm.nih.gov/Sitemap/samplerecord.html */ exports["default"] = (function (fileInput, fileName) { return fileInput .split(/\/\/\s/g) .filter(function (f) { return f.length > 5; }) .map(function (file) { // the first row contains the name of the part and its creation date // LOCUS SCU49845 5028 bp DNA PLN 21-JUN-1999 var HEADER_ROW = file.substring(file.indexOf("LOCUS"), file.search(/\\n|\n/)); var _a = HEADER_ROW.split(/\s{2,}/g).filter(function (h) { return h; }), name = _a[1]; // trying to avoid giving a stupid name like Exported which Snapgene has by default // also, if there is not name in header, the seq length will be used as name, which should // be corrected (Number.parseInt to check for this case) https://stackoverflow.com/a/175787/7541747 var parsedName = name; if ((parsedName === "Exported" && file.includes("SnapGene")) || // stupid Snapgene name Number.parseInt(parsedName, 10) // it thinks seq-length is the name ) { // first try and get the name from ACCESSION var accessionName = false; if (file.includes("ACCESSION")) { // this will be undefined is there is no var accession = file .substring(file.indexOf("ACCESSION"), file.indexOf("\n", file.indexOf("ACCESSION"))) .replace(".", "") .split(/\s{2,}/) .filter(function (a) { return a !== "ACCESSION"; }) .pop(); if (accession) { parsedName = accession; accessionName = true; } } // otherwise, revert to trying to get the part name from the file name if (!accessionName && fileName) { parsedName = fileName .substring(0, Math.max(fileName.search(/\n|\||\./), fileName.lastIndexOf("."))) .replace(/\/\s/g, ""); } else if (!accessionName) { parsedName = "Unnamed"; // give up } } // the part sequence is contained in and after the line that begins with ORIGIN // do this before annotations so we can calc seqlength // // ORIGIN // 1 gatcctccat atacaacggt atctccacct caggtttaga tctcaacaac ggaaccattg // 61 ccgacatgag acagttaggt atcgtcgaga gttacaagct aaaacgagca gtagtcagct var SEQ_ROWS = file.substring(file.lastIndexOf("ORIGIN") + "ORIGIN".length, file.length); var seq = SEQ_ROWS.replace(/[^gatc]/gi, ""); (seq = (0, utils_1.complement)(seq).seq); // seq and compSeq // the features are translated into annotations // region is FEATURES thru ORIGIN // FEATURES Location/Qualifiers // source 1..5028 // /organism="Saccharomyces cerevisiae" // /db_xref="taxon:4932" // /chromosome="IX" // /map="9" // // in the example above, source is the annotation "type" and name is "taxon:4932" // because "db_xref" is a recognized name type // the name depends on whether the tag type is in the reocgnized list of types var annotations = []; var primers = []; if (file.indexOf("FEATURES")) { var FEATURES_LINE = file.indexOf("FEATURES"); var FEATURES_NEW_LINE = file.indexOf("\n", FEATURES_LINE); var ORIGIN_LINE = file.lastIndexOf("ORIGIN"); // some files have a contig file line that needs to parsed out/ shouldn't be included in // the features parsing if (file.includes("CONTIG")) { ORIGIN_LINE = Math.min(ORIGIN_LINE, file.indexOf("CONTIG")); } var FEATURES_ROWS = file .substring(FEATURES_NEW_LINE, ORIGIN_LINE) .split(/\n/) .filter(function (r) { return r; }); FEATURES_ROWS.forEach(function (r) { // in the example above, the following converts it to ['source', '1..5028'] var currLine = r.split(/\s{2,}/g).filter(function (l) { return l; }); if (currLine.length > 1) { // it's the beginning of a new feature/annotation var type = currLine[0], rangeString = currLine[1]; var rangeRegex = /\d+/g; var direction = r.includes("complement") ? -1 : 1; // using the example above, this parses 1..5028 into 1 and 5028 var _a = [0, 0], start = _a[0], end = _a[1]; var startSearch = rangeRegex.exec(rangeString); if (startSearch) { // the - 1 is because genbank is 1-based while we're 0 start = +startSearch[0] - (1 % seq.length); // single bp annotations are a thing in Genbank: // https://github.com/Lattice-Automation/seqviz/issues/117 end = (start + 1) % seq.length; var endSearch = rangeRegex.exec(rangeString); if (endSearch) { end = +endSearch[0] % seq.length; } } if (type !== "source") { // create a new annotation around the properties in this line (type and range) annotations.push({ direction: direction, // set in next block end: end, name: "", start: start, type: type, }); } } else if (currLine.length === 1) { // it's a continuation of a prior feature/annotation // any updates (to name or color) to the last annotation should affect // the last annotation that's in the array if (currLine[0].startsWith("/")) { var tag = currLine[0]; tag = tag.replace(/[/"]/g, ""); // get rid of quotation marks and forward slaches // should now look like ['organism', 'Saccharomyces cerevisiae'] var _b = tag.split(/=/), tagName = _b[0], tagValue = _b[1]; // the two values that can be extracted are name or color var lastAnn = annotations.length - 1; if (tagNameSet.has(tagName.toLowerCase())) { // the key is something we recognize as an annotation name if (lastAnn >= 0 && !annotations[lastAnn].name) { annotations[lastAnn].name = tagValue.trim(); } } else if (tagColorSet.has(tagName)) { // the key is something we recognize as an annotation color if (lastAnn > -1) { annotations[lastAnn].color = tagValue; } } } } }); } return { annotations: annotations, name: parsedName.trim() || fileName, primers: primers, seq: seq, type: (0, utils_1.guessType)(seq), }; }); }); /***/ }), /* 11 */ /***/ ((__unused_webpack_module, exports, __webpack_require__) => { Object.defineProperty(exports, "__esModule", ({ value: true })); var fast_xml_parser_1 = __webpack_require__(8); var utils_1 = __webpack_require__(6); /** * Converts a JBEI file to a Seq * * https://j5.jbei.org/j5manual/pages/94.html */ exports["default"] = (function (JBEI) { // weird edge case with directed quotation characters var fileString = JBEI.replace(/“|”/g, '"'); // parse var parsedJbei = new fast_xml_parser_1.XMLParser({ removeNSPrefix: true, }).parse(fileString); // destructure the parameters from JBEI var seq = parsedJbei.seq; var features = seq.features, name = seq.name, sequence = seq.sequence; // attempt to get the name out of the JBEI var parsedName = "Unnamed"; if (name) { parsedName = name; } // attempt to get the sequence. fail if it's not findable var parsedSeq = (0, utils_1.complement)(sequence).seq; // seq and compSeq if (!parsedSeq) return []; // attempt to parse the JBEI annotations into our version of annotations var annotations = []; if (features && features.feature) { features.feature.forEach(function (feature) { if (!feature) return; var complement = feature.complement, label = feature.label, location = feature.location, type = feature.type; if (location && location.genbankStart && location.end) { annotations.push({ direction: complement ? -1 : 1, // JBEI is 1-based end: +location.end || 0, name: label || "Untitled",