seqparse
Version:
Parse sequence files (GenBank, FASTA, SnapGene, SBOL) and accession IDs (NCBI, iGEM) to a common format
1,406 lines (1,312 loc) • 78.6 kB
JavaScript
(function webpackUniversalModuleDefinition(root, factory) {
if(typeof exports === 'object' && typeof module === 'object')
module.exports = factory();
else if(typeof define === 'function' && define.amd)
define("seqparse", [], factory);
else if(typeof exports === 'object')
exports["seqparse"] = factory();
else
root["seqparse"] = factory();
})(this, () => {
return /******/ (() => { // webpackBootstrap
/******/ "use strict";
/******/ var __webpack_modules__ = ([
/* 0 */
/***/ (function(__unused_webpack_module, exports, __webpack_require__) {
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __generator = (this && this.__generator) || function (thisArg, body) {
var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g;
return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g;
function verb(n) { return function (v) { return step([n, v]); }; }
function step(op) {
if (f) throw new TypeError("Generator is already executing.");
while (_) try {
if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;
if (y = 0, t) op = [op[0] & 2, t.value];
switch (op[0]) {
case 0: case 1: t = op; break;
case 4: _.label++; return { value: op[1], done: false };
case 5: _.label++; y = op[1]; op = [0]; continue;
case 7: op = _.ops.pop(); _.trys.pop(); continue;
default:
if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }
if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }
if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }
if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }
if (t[2]) _.ops.pop();
_.trys.pop(); continue;
}
op = body.call(thisArg, _);
} catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }
if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };
}
};
Object.defineProperty(exports, "__esModule", ({ value: true }));
exports.parseFile = void 0;
var fetchFile_1 = __webpack_require__(1);
var parseFile_1 = __webpack_require__(3);
exports.parseFile = parseFile_1.default;
/* Parse a sequence file. Or download a sequence with an Accession ID. */
exports["default"] = (function (input, options) { return __awaiter(void 0, void 0, void 0, function () {
return __generator(this, function (_a) {
switch (_a.label) {
case 0:
if (!(!(options === null || options === void 0 ? void 0 : options.fileName) && (0, fetchFile_1.isAccession)(input))) return [3 /*break*/, 2];
return [4 /*yield*/, (0, fetchFile_1.default)(input, options)];
case 1: return [2 /*return*/, _a.sent()];
case 2: return [2 /*return*/, (0, parseFile_1.default)(input, options)[0]];
}
});
}); });
/***/ }),
/* 1 */
/***/ (function(__unused_webpack_module, exports, __webpack_require__) {
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __generator = (this && this.__generator) || function (thisArg, body) {
var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g;
return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g;
function verb(n) { return function (v) { return step([n, v]); }; }
function step(op) {
if (f) throw new TypeError("Generator is already executing.");
while (_) try {
if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;
if (y = 0, t) op = [op[0] & 2, t.value];
switch (op[0]) {
case 0: case 1: t = op; break;
case 4: _.label++; return { value: op[1], done: false };
case 5: _.label++; y = op[1]; op = [0]; continue;
case 7: op = _.ops.pop(); _.trys.pop(); continue;
default:
if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }
if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }
if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }
if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }
if (t[2]) _.ops.pop();
_.trys.pop(); continue;
}
op = body.call(thisArg, _);
} catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }
if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };
}
};
Object.defineProperty(exports, "__esModule", ({ value: true }));
exports.isAccession = void 0;
var node_fetch_1 = __webpack_require__(2);
var parseFile_1 = __webpack_require__(3);
/**
* Get a remote sequence from NCBI or the iGEM registry.
*/
exports["default"] = (function (accession, options) { return __awaiter(void 0, void 0, void 0, function () {
var url, body, response, err_1;
return __generator(this, function (_a) {
switch (_a.label) {
case 0:
url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=".concat(accession.trim(), "&rettype=gbwithparts&retmode=text");
if (accession.startsWith("BB")) {
// it's a BioBrick... target the iGEM repo
if ((typeof window !== "undefined" && typeof process === "undefined") || (options === null || options === void 0 ? void 0 : options.cors)) {
// use this hack to get around a no-CORS setting on iGEM webserver, pending fix on their side
url = "https://cors-anywhere.herokuapp.com/http://parts.igem.org/cgi/xml/part.cgi?part=".concat(accession.trim());
}
else {
url = "http://parts.igem.org/cgi/xml/part.cgi?part=".concat(accession.trim());
}
}
body = "";
_a.label = 1;
case 1:
_a.trys.push([1, 4, , 5]);
return [4 /*yield*/, (0, node_fetch_1.default)(url)];
case 2:
response = _a.sent();
return [4 /*yield*/, response.text()];
case 3:
body = _a.sent();
return [3 /*break*/, 5];
case 4:
err_1 = _a.sent();
throw new Error("Failed to get part: accession=".concat(accession, " url=").concat(url, " err=").concat(err_1));
case 5:
if (!response.ok || !body.length) {
throw new Error("Failed to get part, no body returned: accession=".concat(accession, " url=").concat(url));
}
return [4 /*yield*/, (0, parseFile_1.default)(body)];
case 6: return [2 /*return*/, (_a.sent())[0]];
}
});
}); });
/** returns whether the passed ID is an accession in iGEM or NCBI */
var isAccession = function (accession) {
if (accession.startsWith("BB")) {
return true; // biobrick
}
if (accession.length < 14 && accession.match(/^[a-z0-9_\-.]+$/i)) {
return true;
}
return false;
};
exports.isAccession = isAccession;
/***/ }),
/* 2 */
/***/ ((module) => {
module.exports = require("node-fetch");
/***/ }),
/* 3 */
/***/ ((__unused_webpack_module, exports, __webpack_require__) => {
Object.defineProperty(exports, "__esModule", ({ value: true }));
var path_1 = __webpack_require__(4);
var benchling_1 = __webpack_require__(5);
var biobrick_1 = __webpack_require__(7);
var fasta_1 = __webpack_require__(9);
var genbank_1 = __webpack_require__(10);
var jbei_1 = __webpack_require__(11);
var sbol_1 = __webpack_require__(12);
var seqbuilder_1 = __webpack_require__(15);
var snapgene_1 = __webpack_require__(16);
var utils_1 = __webpack_require__(6);
/**
* parseFile converts the contents of a sequence file to a an array of Seq
*/
exports["default"] = (function (file, opts) {
var fileName = (opts === null || opts === void 0 ? void 0 : opts.fileName) || "";
var sourceName = fileName.split(path_1.sep).pop() || fileName;
if (!file) {
throw Error("cannot parse null or empty string");
}
// this is a check for an edge case, where the user uploads come kind
// of file that's full of bps but doesn't fit into a defined type
var firstLine = file.substring(0, file.search("\n"));
var dnaCharLength = firstLine.replace(/[^atcgATCG]/, "").length;
var dnaOnlyFile = dnaCharLength / firstLine.length > 0.8; // is it >80% dna?
var name = fileName && sourceName ? sourceName.substring(0, sourceName.search("\\.")) : "Untitled";
// another edge case check for whether the seq is a JSON seq from Benchling
// just a heuristic that says 1) yes it can be parsed 2) it contains a list of
// fields that are common to Benchling files
var isBenchling = false;
try {
var benchlingJSON_1 = JSON.parse(file); // will err out if not JSON
if (["bases", "annotations", "primers"].every(function (k) { return typeof benchlingJSON_1[k] !== "undefined"; })) {
isBenchling = true;
}
}
catch (ex) {
// expected
}
var prefix = file.substring(0, 200);
var seqs;
switch (true) {
// JBEI
case prefix.includes(':seq="http://jbei.org/sequence"'):
case file.startsWith("<seq:seq"):
seqs = (0, jbei_1.default)(file);
break;
// FASTA
case file.startsWith(">"):
case file.startsWith(";"):
case fileName.endsWith(".seq"):
case fileName.endsWith(".fa"):
case fileName.endsWith(".fas"):
case fileName.endsWith(".fasta"):
seqs = (0, fasta_1.default)(file, fileName);
break;
// Genbank
case file.includes("LOCUS") && file.includes("ORIGIN"):
case fileName.endsWith(".gb"):
case fileName.endsWith(".gbk"):
case fileName.endsWith(".genbank"):
case fileName.endsWith(".ape"):
seqs = (0, genbank_1.default)(file, fileName);
break;
// SnapGene
case fileName.endsWith(".dna"):
seqs = (0, snapgene_1.default)(opts);
break;
// SeqBuilder
case prefix.includes("Written by SeqBuilder"):
case fileName.endsWith(".sbd"):
seqs = (0, seqbuilder_1.default)(file, fileName);
break;
// BioBrick XML
case prefix.includes("Parts from the iGEM"):
case prefix.includes("<part_list>"):
seqs = (0, biobrick_1.default)(file);
break;
// Benchling JSON
case isBenchling:
seqs = (0, benchling_1.default)(file);
break;
// SBOL
case prefix.includes("RDF"):
seqs = (0, sbol_1.default)(file, fileName);
break;
// a DNA text file without an official formatting
case dnaOnlyFile: {
var seq = (0, utils_1.complement)(file).seq;
seqs = [{ annotations: [], name: name, seq: seq, type: (0, utils_1.guessType)(seq) }];
break;
}
default:
throw Error("".concat(fileName, " File type not recognized: ").concat(file));
}
// bit of clean up to: only return the fields in a Seq and reorder to match expectations.
return seqs.map(function (p) { return ({
annotations: p.annotations
.sort(function (a, b) { return a.start - b.start || a.end - b.end; })
.map(function (a) { return ({
color: a.color,
direction: a.direction,
end: a.end,
name: a.name,
start: a.start,
type: a.type,
}); }),
name: p.name,
seq: p.seq,
type: p.type,
}); });
});
/***/ }),
/* 4 */
/***/ ((module) => {
// 'path' module extracted from Node.js v8.11.1 (only the posix part)
// transplited with Babel
// Copyright Joyent, Inc. and other Node contributors.
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the
// "Software"), to deal in the Software without restriction, including
// without limitation the rights to use, copy, modify, merge, publish,
// distribute, sublicense, and/or sell copies of the Software, and to permit
// persons to whom the Software is furnished to do so, subject to the
// following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
// USE OR OTHER DEALINGS IN THE SOFTWARE.
function assertPath(path) {
if (typeof path !== 'string') {
throw new TypeError('Path must be a string. Received ' + JSON.stringify(path));
}
}
// Resolves . and .. elements in a path with directory names
function normalizeStringPosix(path, allowAboveRoot) {
var res = '';
var lastSegmentLength = 0;
var lastSlash = -1;
var dots = 0;
var code;
for (var i = 0; i <= path.length; ++i) {
if (i < path.length)
code = path.charCodeAt(i);
else if (code === 47 /*/*/)
break;
else
code = 47 /*/*/;
if (code === 47 /*/*/) {
if (lastSlash === i - 1 || dots === 1) {
// NOOP
} else if (lastSlash !== i - 1 && dots === 2) {
if (res.length < 2 || lastSegmentLength !== 2 || res.charCodeAt(res.length - 1) !== 46 /*.*/ || res.charCodeAt(res.length - 2) !== 46 /*.*/) {
if (res.length > 2) {
var lastSlashIndex = res.lastIndexOf('/');
if (lastSlashIndex !== res.length - 1) {
if (lastSlashIndex === -1) {
res = '';
lastSegmentLength = 0;
} else {
res = res.slice(0, lastSlashIndex);
lastSegmentLength = res.length - 1 - res.lastIndexOf('/');
}
lastSlash = i;
dots = 0;
continue;
}
} else if (res.length === 2 || res.length === 1) {
res = '';
lastSegmentLength = 0;
lastSlash = i;
dots = 0;
continue;
}
}
if (allowAboveRoot) {
if (res.length > 0)
res += '/..';
else
res = '..';
lastSegmentLength = 2;
}
} else {
if (res.length > 0)
res += '/' + path.slice(lastSlash + 1, i);
else
res = path.slice(lastSlash + 1, i);
lastSegmentLength = i - lastSlash - 1;
}
lastSlash = i;
dots = 0;
} else if (code === 46 /*.*/ && dots !== -1) {
++dots;
} else {
dots = -1;
}
}
return res;
}
function _format(sep, pathObject) {
var dir = pathObject.dir || pathObject.root;
var base = pathObject.base || (pathObject.name || '') + (pathObject.ext || '');
if (!dir) {
return base;
}
if (dir === pathObject.root) {
return dir + base;
}
return dir + sep + base;
}
var posix = {
// path.resolve([from ...], to)
resolve: function resolve() {
var resolvedPath = '';
var resolvedAbsolute = false;
var cwd;
for (var i = arguments.length - 1; i >= -1 && !resolvedAbsolute; i--) {
var path;
if (i >= 0)
path = arguments[i];
else {
if (cwd === undefined)
cwd = process.cwd();
path = cwd;
}
assertPath(path);
// Skip empty entries
if (path.length === 0) {
continue;
}
resolvedPath = path + '/' + resolvedPath;
resolvedAbsolute = path.charCodeAt(0) === 47 /*/*/;
}
// At this point the path should be resolved to a full absolute path, but
// handle relative paths to be safe (might happen when process.cwd() fails)
// Normalize the path
resolvedPath = normalizeStringPosix(resolvedPath, !resolvedAbsolute);
if (resolvedAbsolute) {
if (resolvedPath.length > 0)
return '/' + resolvedPath;
else
return '/';
} else if (resolvedPath.length > 0) {
return resolvedPath;
} else {
return '.';
}
},
normalize: function normalize(path) {
assertPath(path);
if (path.length === 0) return '.';
var isAbsolute = path.charCodeAt(0) === 47 /*/*/;
var trailingSeparator = path.charCodeAt(path.length - 1) === 47 /*/*/;
// Normalize the path
path = normalizeStringPosix(path, !isAbsolute);
if (path.length === 0 && !isAbsolute) path = '.';
if (path.length > 0 && trailingSeparator) path += '/';
if (isAbsolute) return '/' + path;
return path;
},
isAbsolute: function isAbsolute(path) {
assertPath(path);
return path.length > 0 && path.charCodeAt(0) === 47 /*/*/;
},
join: function join() {
if (arguments.length === 0)
return '.';
var joined;
for (var i = 0; i < arguments.length; ++i) {
var arg = arguments[i];
assertPath(arg);
if (arg.length > 0) {
if (joined === undefined)
joined = arg;
else
joined += '/' + arg;
}
}
if (joined === undefined)
return '.';
return posix.normalize(joined);
},
relative: function relative(from, to) {
assertPath(from);
assertPath(to);
if (from === to) return '';
from = posix.resolve(from);
to = posix.resolve(to);
if (from === to) return '';
// Trim any leading backslashes
var fromStart = 1;
for (; fromStart < from.length; ++fromStart) {
if (from.charCodeAt(fromStart) !== 47 /*/*/)
break;
}
var fromEnd = from.length;
var fromLen = fromEnd - fromStart;
// Trim any leading backslashes
var toStart = 1;
for (; toStart < to.length; ++toStart) {
if (to.charCodeAt(toStart) !== 47 /*/*/)
break;
}
var toEnd = to.length;
var toLen = toEnd - toStart;
// Compare paths to find the longest common path from root
var length = fromLen < toLen ? fromLen : toLen;
var lastCommonSep = -1;
var i = 0;
for (; i <= length; ++i) {
if (i === length) {
if (toLen > length) {
if (to.charCodeAt(toStart + i) === 47 /*/*/) {
// We get here if `from` is the exact base path for `to`.
// For example: from='/foo/bar'; to='/foo/bar/baz'
return to.slice(toStart + i + 1);
} else if (i === 0) {
// We get here if `from` is the root
// For example: from='/'; to='/foo'
return to.slice(toStart + i);
}
} else if (fromLen > length) {
if (from.charCodeAt(fromStart + i) === 47 /*/*/) {
// We get here if `to` is the exact base path for `from`.
// For example: from='/foo/bar/baz'; to='/foo/bar'
lastCommonSep = i;
} else if (i === 0) {
// We get here if `to` is the root.
// For example: from='/foo'; to='/'
lastCommonSep = 0;
}
}
break;
}
var fromCode = from.charCodeAt(fromStart + i);
var toCode = to.charCodeAt(toStart + i);
if (fromCode !== toCode)
break;
else if (fromCode === 47 /*/*/)
lastCommonSep = i;
}
var out = '';
// Generate the relative path based on the path difference between `to`
// and `from`
for (i = fromStart + lastCommonSep + 1; i <= fromEnd; ++i) {
if (i === fromEnd || from.charCodeAt(i) === 47 /*/*/) {
if (out.length === 0)
out += '..';
else
out += '/..';
}
}
// Lastly, append the rest of the destination (`to`) path that comes after
// the common path parts
if (out.length > 0)
return out + to.slice(toStart + lastCommonSep);
else {
toStart += lastCommonSep;
if (to.charCodeAt(toStart) === 47 /*/*/)
++toStart;
return to.slice(toStart);
}
},
_makeLong: function _makeLong(path) {
return path;
},
dirname: function dirname(path) {
assertPath(path);
if (path.length === 0) return '.';
var code = path.charCodeAt(0);
var hasRoot = code === 47 /*/*/;
var end = -1;
var matchedSlash = true;
for (var i = path.length - 1; i >= 1; --i) {
code = path.charCodeAt(i);
if (code === 47 /*/*/) {
if (!matchedSlash) {
end = i;
break;
}
} else {
// We saw the first non-path separator
matchedSlash = false;
}
}
if (end === -1) return hasRoot ? '/' : '.';
if (hasRoot && end === 1) return '//';
return path.slice(0, end);
},
basename: function basename(path, ext) {
if (ext !== undefined && typeof ext !== 'string') throw new TypeError('"ext" argument must be a string');
assertPath(path);
var start = 0;
var end = -1;
var matchedSlash = true;
var i;
if (ext !== undefined && ext.length > 0 && ext.length <= path.length) {
if (ext.length === path.length && ext === path) return '';
var extIdx = ext.length - 1;
var firstNonSlashEnd = -1;
for (i = path.length - 1; i >= 0; --i) {
var code = path.charCodeAt(i);
if (code === 47 /*/*/) {
// If we reached a path separator that was not part of a set of path
// separators at the end of the string, stop now
if (!matchedSlash) {
start = i + 1;
break;
}
} else {
if (firstNonSlashEnd === -1) {
// We saw the first non-path separator, remember this index in case
// we need it if the extension ends up not matching
matchedSlash = false;
firstNonSlashEnd = i + 1;
}
if (extIdx >= 0) {
// Try to match the explicit extension
if (code === ext.charCodeAt(extIdx)) {
if (--extIdx === -1) {
// We matched the extension, so mark this as the end of our path
// component
end = i;
}
} else {
// Extension does not match, so our result is the entire path
// component
extIdx = -1;
end = firstNonSlashEnd;
}
}
}
}
if (start === end) end = firstNonSlashEnd;else if (end === -1) end = path.length;
return path.slice(start, end);
} else {
for (i = path.length - 1; i >= 0; --i) {
if (path.charCodeAt(i) === 47 /*/*/) {
// If we reached a path separator that was not part of a set of path
// separators at the end of the string, stop now
if (!matchedSlash) {
start = i + 1;
break;
}
} else if (end === -1) {
// We saw the first non-path separator, mark this as the end of our
// path component
matchedSlash = false;
end = i + 1;
}
}
if (end === -1) return '';
return path.slice(start, end);
}
},
extname: function extname(path) {
assertPath(path);
var startDot = -1;
var startPart = 0;
var end = -1;
var matchedSlash = true;
// Track the state of characters (if any) we see before our first dot and
// after any path separator we find
var preDotState = 0;
for (var i = path.length - 1; i >= 0; --i) {
var code = path.charCodeAt(i);
if (code === 47 /*/*/) {
// If we reached a path separator that was not part of a set of path
// separators at the end of the string, stop now
if (!matchedSlash) {
startPart = i + 1;
break;
}
continue;
}
if (end === -1) {
// We saw the first non-path separator, mark this as the end of our
// extension
matchedSlash = false;
end = i + 1;
}
if (code === 46 /*.*/) {
// If this is our first dot, mark it as the start of our extension
if (startDot === -1)
startDot = i;
else if (preDotState !== 1)
preDotState = 1;
} else if (startDot !== -1) {
// We saw a non-dot and non-path separator before our dot, so we should
// have a good chance at having a non-empty extension
preDotState = -1;
}
}
if (startDot === -1 || end === -1 ||
// We saw a non-dot character immediately before the dot
preDotState === 0 ||
// The (right-most) trimmed path component is exactly '..'
preDotState === 1 && startDot === end - 1 && startDot === startPart + 1) {
return '';
}
return path.slice(startDot, end);
},
format: function format(pathObject) {
if (pathObject === null || typeof pathObject !== 'object') {
throw new TypeError('The "pathObject" argument must be of type Object. Received type ' + typeof pathObject);
}
return _format('/', pathObject);
},
parse: function parse(path) {
assertPath(path);
var ret = { root: '', dir: '', base: '', ext: '', name: '' };
if (path.length === 0) return ret;
var code = path.charCodeAt(0);
var isAbsolute = code === 47 /*/*/;
var start;
if (isAbsolute) {
ret.root = '/';
start = 1;
} else {
start = 0;
}
var startDot = -1;
var startPart = 0;
var end = -1;
var matchedSlash = true;
var i = path.length - 1;
// Track the state of characters (if any) we see before our first dot and
// after any path separator we find
var preDotState = 0;
// Get non-dir info
for (; i >= start; --i) {
code = path.charCodeAt(i);
if (code === 47 /*/*/) {
// If we reached a path separator that was not part of a set of path
// separators at the end of the string, stop now
if (!matchedSlash) {
startPart = i + 1;
break;
}
continue;
}
if (end === -1) {
// We saw the first non-path separator, mark this as the end of our
// extension
matchedSlash = false;
end = i + 1;
}
if (code === 46 /*.*/) {
// If this is our first dot, mark it as the start of our extension
if (startDot === -1) startDot = i;else if (preDotState !== 1) preDotState = 1;
} else if (startDot !== -1) {
// We saw a non-dot and non-path separator before our dot, so we should
// have a good chance at having a non-empty extension
preDotState = -1;
}
}
if (startDot === -1 || end === -1 ||
// We saw a non-dot character immediately before the dot
preDotState === 0 ||
// The (right-most) trimmed path component is exactly '..'
preDotState === 1 && startDot === end - 1 && startDot === startPart + 1) {
if (end !== -1) {
if (startPart === 0 && isAbsolute) ret.base = ret.name = path.slice(1, end);else ret.base = ret.name = path.slice(startPart, end);
}
} else {
if (startPart === 0 && isAbsolute) {
ret.name = path.slice(1, startDot);
ret.base = path.slice(1, end);
} else {
ret.name = path.slice(startPart, startDot);
ret.base = path.slice(startPart, end);
}
ret.ext = path.slice(startDot, end);
}
if (startPart > 0) ret.dir = path.slice(0, startPart - 1);else if (isAbsolute) ret.dir = '/';
return ret;
},
sep: '/',
delimiter: ':',
win32: null,
posix: null
};
posix.posix = posix;
module.exports = posix;
/***/ }),
/* 5 */
/***/ (function(__unused_webpack_module, exports, __webpack_require__) {
var __assign = (this && this.__assign) || function () {
__assign = Object.assign || function(t) {
for (var s, i = 1, n = arguments.length; i < n; i++) {
s = arguments[i];
for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p))
t[p] = s[p];
}
return t;
};
return __assign.apply(this, arguments);
};
Object.defineProperty(exports, "__esModule", ({ value: true }));
var utils_1 = __webpack_require__(6);
/**
* Benchling format is just JSON. It's virtually the same format.
*/
exports["default"] = (function (text) {
var partJSON = JSON.parse(text);
var seq = (0, utils_1.complement)(partJSON.bases).seq;
// throw an error if the sequence is empty
if (seq.length < 1) {
throw new Error("Invalid Benchling part: empty sequence");
}
return [
{
annotations: partJSON.annotations.map(function (a) { return (__assign(__assign({}, a), { direction: (0, utils_1.parseDirection)(a.strand) })); }),
name: partJSON.name || partJSON._id,
seq: seq,
type: (0, utils_1.guessType)(seq),
},
];
});
/***/ }),
/* 6 */
/***/ ((__unused_webpack_module, exports) => {
Object.defineProperty(exports, "__esModule", ({ value: true }));
exports.guessType = exports.parseDirection = exports.firstElement = exports.reverseComplement = exports.complement = void 0;
// from http://arep.med.harvard.edu/labgc/adnan/projects/Utilities/revcomp.html
var comp = {
A: "T",
B: "V",
C: "G",
D: "H",
G: "C",
H: "D",
K: "M",
M: "K",
N: "N",
R: "Y",
S: "S",
T: "A",
U: "A",
V: "B",
W: "W",
X: "X",
Y: "R",
a: "t",
b: "v",
c: "g",
d: "h",
g: "c",
h: "d",
k: "m",
m: "k",
n: "n",
r: "y",
s: "s",
t: "a",
u: "a",
v: "b",
w: "w",
x: "x",
y: "r",
};
/**
* Return the filtered sequence and its complement if its an empty string, return the same for both.
*/
var complement = function (origSeq) {
if (!origSeq) {
return { compSeq: "", seq: "" };
}
// filter out unrecognized basepairs and build up the complement
var seq = "";
var compSeq = "";
for (var i = 0, origLength = origSeq.length; i < origLength; i += 1) {
if (comp[origSeq[i]]) {
seq += origSeq[i];
compSeq += comp[origSeq[i]];
}
}
return { compSeq: compSeq, seq: seq };
};
exports.complement = complement;
/**
* Return the reverse complement of a DNA sequence
*/
var reverseComplement = function (inputSeq) {
var compSeq = (0, exports.complement)(inputSeq).compSeq;
return compSeq.split("").reverse().join("");
};
exports.reverseComplement = reverseComplement;
var firstElement = function (arr) {
if (!Array.isArray(arr))
return undefined;
return arr[0];
};
exports.firstElement = firstElement;
var fwd = new Set(["FWD", "fwd", "FORWARD", "forward", "FOR", "for", "TOP", "top", "1", 1]);
var rev = new Set(["REV", "rev", "REVERSE", "reverse", "BOTTOM", "bottom", "-1", -1]);
/**
* Parse the user defined direction, estimate the direction of the element
*
* ```js
* parseDirection("FWD") => 1
* parseDirection("FORWARD") => 1
* ```
*/
var parseDirection = function (direction) {
if (!direction) {
return 0;
}
if (fwd.has(direction)) {
return 1;
}
if (rev.has(direction)) {
return -1;
}
return 0;
};
exports.parseDirection = parseDirection;
/**
* mapping the 64 standard codons to amino acids
* no synth AA's
*
* adapted from: "https://github.com/keithwhor/NtSeq/blob/master/lib/nt.js
*/
var codon2AA = {
AAA: "K",
AAC: "N",
AAG: "K",
AAT: "N",
ACA: "T",
ACC: "T",
ACG: "T",
ACT: "T",
AGA: "R",
AGC: "S",
AGG: "R",
AGT: "S",
ATA: "I",
ATC: "I",
ATG: "M",
ATT: "I",
CAA: "Q",
CAC: "H",
CAG: "Q",
CAT: "H",
CCA: "P",
CCC: "P",
CCG: "P",
CCT: "P",
CGA: "R",
CGC: "R",
CGG: "R",
CGT: "R",
CTA: "L",
CTC: "L",
CTG: "L",
CTT: "L",
GAA: "E",
GAC: "D",
GAG: "E",
GAT: "D",
GCA: "A",
GCC: "A",
GCG: "A",
GCT: "A",
GGA: "G",
GGC: "G",
GGG: "G",
GGT: "G",
GTA: "V",
GTC: "V",
GTG: "V",
GTT: "V",
TAA: "*",
TAC: "Y",
TAG: "*",
TAT: "Y",
TCA: "S",
TCC: "S",
TCG: "S",
TCT: "S",
TGA: "*",
TGC: "C",
TGG: "W",
TGT: "C",
TTA: "L",
TTC: "F",
TTG: "L",
TTT: "F",
};
var aminoAcids = Array.from(new Set(Object.values(codon2AA)).values()).join("");
var aminoAcidRegex = new RegExp("^[".concat(aminoAcids, "]+$"), "i");
/** Infer the type of a sequence. This only allows a couple wildcard characters so may be overly strict. */
var guessType = function (seq) {
if (/^[atgcn.]+$/i.test(seq)) {
return "dna";
}
else if (/^[augcn.]+$/i.test(seq)) {
return "rna";
}
else if (aminoAcidRegex.test(seq)) {
return "aa";
}
return "unknown";
};
exports.guessType = guessType;
/***/ }),
/* 7 */
/***/ ((__unused_webpack_module, exports, __webpack_require__) => {
Object.defineProperty(exports, "__esModule", ({ value: true }));
var fast_xml_parser_1 = __webpack_require__(8);
var utils_1 = __webpack_require__(6);
/**
* Parse a BioBrick in XML format to Seq[]
*
* Eg: https://parts.igem.org/cgi/xml/part.cgi?part=BBa_J23100
*/
exports["default"] = (function (file) {
var bail = function (err) {
throw new Error("Failed on BioBrick: ".concat(err));
};
// parse
var parsedBiobrick = new fast_xml_parser_1.XMLParser({
isArray: function (name) {
return ["features", "part_name", "sequences"].includes(name);
},
removeNSPrefix: true,
}).parse(file);
// get the first part
var part = parsedBiobrick.rsbpml.part_list.part;
if (!part)
bail("No part seen in part_list");
// extract the useful fields
var features = part.features, part_name = part.part_name, sequences = part.sequences;
var name = (0, utils_1.firstElement)(part_name);
// parse the iGEM annotations
var annotations = features
.map(function (_a) {
var feature = _a.feature;
if (!feature)
return null;
var direction = feature.direction, endpos = feature.endpos, startpos = feature.startpos, type = feature.type;
return {
direction: (0, utils_1.parseDirection)(direction),
end: +endpos,
name: "".concat(direction, "-").concat(startpos),
start: +startpos || 0,
type: type || undefined,
};
})
.filter(function (a) { return a; });
// parse the sequence
var seq = (0, utils_1.complement)(sequences[0].seq_data).seq;
return [
{
annotations: annotations,
name: name,
seq: seq,
type: (0, utils_1.guessType)(seq),
},
];
});
/***/ }),
/* 8 */
/***/ ((module) => {
module.exports = require("fast-xml-parser");
/***/ }),
/* 9 */
/***/ ((__unused_webpack_module, exports, __webpack_require__) => {
Object.defineProperty(exports, "__esModule", ({ value: true }));
var utils_1 = __webpack_require__(6);
exports["default"] = (function (text, fileName) {
// partFactory returns a negative "circular" prop, we assume they're all linear
if (text.trim().startsWith(">")) {
return text
.split(">") // split up if it's a multi-seq FASTA file
.map(function (t) {
// this starts at the end of the first line, grabs all other characters,
// and removes any newlines (leaving only the original sequence)
// sequence "cleaning" happens in complement (we don't support bps other than
// the most common right now)
var seq = t.substr(t.indexOf("\n"), t.length).replace(/\s/g, "");
// the first line contains the name, though there's lots of variability around
// the information on this line...
// >MCHU - Calmodulin - Human, rabbit, bovine, rat, and chicken
var name = t.substring(0, t.search(/\n|\|/)).replace(/\//g, "");
return {
annotations: [],
name: name,
seq: seq,
type: (0, utils_1.guessType)(seq),
};
})
.filter(function (p) { return p.name && p.seq; });
}
if (text.trim().startsWith(";")) {
// it's an old-school style FASTA that's punctuated with semi-colons
// ;my|NAME
// ;my comment
// actGacgata
var name_1 = text.substring(0, text.search(/\n|\|/)).replace(/\//g, "");
var newlineBeforeSeq = text.indexOf("\n", text.lastIndexOf(";"));
var seq_1 = text.substring(newlineBeforeSeq, text.length);
return [
{
annotations: [],
name: name_1,
seq: seq_1,
type: (0, utils_1.guessType)(seq_1),
},
];
}
// assume that it's a no name FASTA. Ie it's just a file with dna and no header
// try and get the name from the fileName
var lastChar = fileName.lastIndexOf(".") || fileName.length;
var name = fileName.substring(0, lastChar) || "Untitled";
var seq = text;
return [
{
annotations: [],
name: name,
seq: seq,
type: (0, utils_1.guessType)(seq),
},
];
});
/***/ }),
/* 10 */
/***/ ((__unused_webpack_module, exports, __webpack_require__) => {
Object.defineProperty(exports, "__esModule", ({ value: true }));
var utils_1 = __webpack_require__(6);
// a list of recognized types that would constitute an annotation name
var tagNameSet = new Set(["gene", "product", "note", "db_xref", "protein_id", "label", "lab_host", "locus_tag"]);
// a list of tags that could represent colors
var tagColorSet = new Set(["ApEinfo_fwdcolor", "ApEinfo_revcolor", "loom_color"]);
/**
* takes in a string representation of a GenBank file and outputs our
* part representation of it. an example of a Genbank file can be found
* at ./parsers/Gebank, though there is significant variability to the
* format
*
* another official example can be found at:
* https://www.ncbi.nlm.nih.gov/Sitemap/samplerecord.html
*/
exports["default"] = (function (fileInput, fileName) {
return fileInput
.split(/\/\/\s/g)
.filter(function (f) { return f.length > 5; })
.map(function (file) {
// the first row contains the name of the part and its creation date
// LOCUS SCU49845 5028 bp DNA PLN 21-JUN-1999
var HEADER_ROW = file.substring(file.indexOf("LOCUS"), file.search(/\\n|\n/));
var _a = HEADER_ROW.split(/\s{2,}/g).filter(function (h) { return h; }), name = _a[1];
// trying to avoid giving a stupid name like Exported which Snapgene has by default
// also, if there is not name in header, the seq length will be used as name, which should
// be corrected (Number.parseInt to check for this case) https://stackoverflow.com/a/175787/7541747
var parsedName = name;
if ((parsedName === "Exported" && file.includes("SnapGene")) || // stupid Snapgene name
Number.parseInt(parsedName, 10) // it thinks seq-length is the name
) {
// first try and get the name from ACCESSION
var accessionName = false;
if (file.includes("ACCESSION")) {
// this will be undefined is there is no
var accession = file
.substring(file.indexOf("ACCESSION"), file.indexOf("\n", file.indexOf("ACCESSION")))
.replace(".", "")
.split(/\s{2,}/)
.filter(function (a) { return a !== "ACCESSION"; })
.pop();
if (accession) {
parsedName = accession;
accessionName = true;
}
}
// otherwise, revert to trying to get the part name from the file name
if (!accessionName && fileName) {
parsedName = fileName
.substring(0, Math.max(fileName.search(/\n|\||\./), fileName.lastIndexOf(".")))
.replace(/\/\s/g, "");
}
else if (!accessionName) {
parsedName = "Unnamed"; // give up
}
}
// the part sequence is contained in and after the line that begins with ORIGIN
// do this before annotations so we can calc seqlength
//
// ORIGIN
// 1 gatcctccat atacaacggt atctccacct caggtttaga tctcaacaac ggaaccattg
// 61 ccgacatgag acagttaggt atcgtcgaga gttacaagct aaaacgagca gtagtcagct
var SEQ_ROWS = file.substring(file.lastIndexOf("ORIGIN") + "ORIGIN".length, file.length);
var seq = SEQ_ROWS.replace(/[^gatc]/gi, "");
(seq = (0, utils_1.complement)(seq).seq); // seq and compSeq
// the features are translated into annotations
// region is FEATURES thru ORIGIN
// FEATURES Location/Qualifiers
// source 1..5028
// /organism="Saccharomyces cerevisiae"
// /db_xref="taxon:4932"
// /chromosome="IX"
// /map="9"
//
// in the example above, source is the annotation "type" and name is "taxon:4932"
// because "db_xref" is a recognized name type
// the name depends on whether the tag type is in the reocgnized list of types
var annotations = [];
var primers = [];
if (file.indexOf("FEATURES")) {
var FEATURES_LINE = file.indexOf("FEATURES");
var FEATURES_NEW_LINE = file.indexOf("\n", FEATURES_LINE);
var ORIGIN_LINE = file.lastIndexOf("ORIGIN");
// some files have a contig file line that needs to parsed out/ shouldn't be included in
// the features parsing
if (file.includes("CONTIG")) {
ORIGIN_LINE = Math.min(ORIGIN_LINE, file.indexOf("CONTIG"));
}
var FEATURES_ROWS = file
.substring(FEATURES_NEW_LINE, ORIGIN_LINE)
.split(/\n/)
.filter(function (r) { return r; });
FEATURES_ROWS.forEach(function (r) {
// in the example above, the following converts it to ['source', '1..5028']
var currLine = r.split(/\s{2,}/g).filter(function (l) { return l; });
if (currLine.length > 1) {
// it's the beginning of a new feature/annotation
var type = currLine[0], rangeString = currLine[1];
var rangeRegex = /\d+/g;
var direction = r.includes("complement") ? -1 : 1;
// using the example above, this parses 1..5028 into 1 and 5028
var _a = [0, 0], start = _a[0], end = _a[1];
var startSearch = rangeRegex.exec(rangeString);
if (startSearch) {
// the - 1 is because genbank is 1-based while we're 0
start = +startSearch[0] - (1 % seq.length);
// single bp annotations are a thing in Genbank:
// https://github.com/Lattice-Automation/seqviz/issues/117
end = (start + 1) % seq.length;
var endSearch = rangeRegex.exec(rangeString);
if (endSearch) {
end = +endSearch[0] % seq.length;
}
}
if (type !== "source") {
// create a new annotation around the properties in this line (type and range)
annotations.push({
direction: direction,
// set in next block
end: end,
name: "",
start: start,
type: type,
});
}
}
else if (currLine.length === 1) {
// it's a continuation of a prior feature/annotation
// any updates (to name or color) to the last annotation should affect
// the last annotation that's in the array
if (currLine[0].startsWith("/")) {
var tag = currLine[0];
tag = tag.replace(/[/"]/g, ""); // get rid of quotation marks and forward slaches
// should now look like ['organism', 'Saccharomyces cerevisiae']
var _b = tag.split(/=/), tagName = _b[0], tagValue = _b[1];
// the two values that can be extracted are name or color
var lastAnn = annotations.length - 1;
if (tagNameSet.has(tagName.toLowerCase())) {
// the key is something we recognize as an annotation name
if (lastAnn >= 0 && !annotations[lastAnn].name) {
annotations[lastAnn].name = tagValue.trim();
}
}
else if (tagColorSet.has(tagName)) {
// the key is something we recognize as an annotation color
if (lastAnn > -1) {
annotations[lastAnn].color = tagValue;
}
}
}
}
});
}
return {
annotations: annotations,
name: parsedName.trim() || fileName,
primers: primers,
seq: seq,
type: (0, utils_1.guessType)(seq),
};
});
});
/***/ }),
/* 11 */
/***/ ((__unused_webpack_module, exports, __webpack_require__) => {
Object.defineProperty(exports, "__esModule", ({ value: true }));
var fast_xml_parser_1 = __webpack_require__(8);
var utils_1 = __webpack_require__(6);
/**
* Converts a JBEI file to a Seq
*
* https://j5.jbei.org/j5manual/pages/94.html
*/
exports["default"] = (function (JBEI) {
// weird edge case with directed quotation characters
var fileString = JBEI.replace(/“|”/g, '"');
// parse
var parsedJbei = new fast_xml_parser_1.XMLParser({
removeNSPrefix: true,
}).parse(fileString);
// destructure the parameters from JBEI
var seq = parsedJbei.seq;
var features = seq.features, name = seq.name, sequence = seq.sequence;
// attempt to get the name out of the JBEI
var parsedName = "Unnamed";
if (name) {
parsedName = name;
}
// attempt to get the sequence. fail if it's not findable
var parsedSeq = (0, utils_1.complement)(sequence).seq; // seq and compSeq
if (!parsedSeq)
return [];
// attempt to parse the JBEI annotations into our version of annotations
var annotations = [];
if (features && features.feature) {
features.feature.forEach(function (feature) {
if (!feature)
return;
var complement = feature.complement, label = feature.label, location = feature.location, type = feature.type;
if (location && location.genbankStart && location.end) {
annotations.push({
direction: complement ? -1 : 1,
// JBEI is 1-based
end: +location.end || 0,
name: label || "Untitled",