grnsight
Version:
Web app and service for visualizing models of gene regulatory networks
325 lines (306 loc) • 13.3 kB
JavaScript
// Parses "optimization_paramters" and 2-column sheets
// from GRNmap input or output workbook
var constants = require(__dirname + "/workbook-constants");
const getSheetHeader = (sheetName, column, row) => {
if (row === 0) {
if (sheetName === "production_rates" || sheetName === "optimized_production_rates") {
return "production_rate";
} else if (sheetName === "degradation_rates") {
return "degradation_rate";
} else if (sheetName === "threshold_b" || sheetName === "optimized_threshold_b") {
return "threshold_b";
} else if (sheetName === "optimization_parameters") {
return (column === 0) ? "optimization_parameter" : "value";
} else if (sheetName === "optimization_diagnostics") {
return (column === 0) ? "Parameter" : "Value";
}
}
};
const optimizationParametersTypeKey = {
alpha: "number", "kk_max": "number", MaxIter: "number", TolFun: "number", MaxFunEval: "number",
TolX: "number", "production_function": "string", "L_curve": "number", "estimate_params": "number",
"make_graphs": "number", "fix_P": "number", "fix_b": "number", "expression_timepoints": "object",
Strain: "object", species: "string", "taxon_id": "number", workbookType: "string",
"simulation_timepoints": "object", "b_or_tau" : "number"
};
const optimizationDiagnosticsParameters = ["LSE", "Penalty", "min LSE", "iteration count"];
const optimizationParametersObectKey = {
"expression_timepoints": "number", Strain: "string", "simulation_timepoints": "number"
};
const addWarning = (workbook, message) => {
let warningsCount;
if (!Object.keys(workbook).includes("warnings")) {
warningsCount = 0;
workbook.warnings = [];
} else {
warningsCount = workbook.warnings.length;
}
const MAX_WARNINGS = 75;
if (warningsCount < MAX_WARNINGS) {
workbook.warnings.push(message);
} else {
workbook.errors.push(constants.errors.warningsCountError);
return false;
}
};
const addError = (output, message) => {
const errorsCount = output.errors.length;
const MAX_ERRORS = 20;
if (errorsCount < MAX_ERRORS) {
output.errors.push(message);
} else {
output.errors.push(constants.errors.errorsCountError);
return false;
}
};
const TWO_COL_SHEET_NAMES = [
"production_rates",
"degradation_rates",
"threshold_b",
"optimized_production_rates",
"optimized_threshold_b"
];
const validGeneName = (output, sheetName, gene, row) => {
var maxGeneLength = 12;
var regex = /[^a-z0-9\_\-]/gi;
if (typeof gene !== "string") {
addError(output, constants.errors.invalidGeneTypeError(sheetName, gene, row));
return false;
} else if (gene.length > maxGeneLength) {
addError(output, constants.errors.invalidGeneLengthError(sheetName, gene, row));
return false;
} else if (gene.match(regex) !== null) {
addError(output, constants.errors.specialCharacterError(sheetName, gene, row));
return false;
}
return true;
};
// Optimization Parameters Parser
const parseMetaDataSheet = (sheet) => {
let meta = {
data: {},
errors: [],
warnings: []
};
let paramType;
if (sheet.data[0][0] === undefined) {
addError(meta, constants.errors.missingColumnHeaderError(sheet.name, constants.numbersToLetters[0],
getSheetHeader(sheet.name, 0, 0)));
} else if (sheet.data[0][0] !== getSheetHeader(sheet.name, 0, 0)) {
addError(meta, constants.errors.incorrectColumnHeaderError(sheet.name, constants.numbersToLetters[0],
getSheetHeader(sheet.name, 0, 0)));
}
if (sheet.data[0][1] === undefined) {
addError(meta, constants.errors.missingColumnHeaderError(sheet.name, constants.numbersToLetters[1],
getSheetHeader(sheet.name, 1, 0)));
} else if (sheet.data[0][1] !== getSheetHeader(sheet.name, 1, 0)) {
addError(meta, constants.errors.incorrectColumnHeaderError(sheet.name, constants.numbersToLetters[1],
getSheetHeader(sheet.name, 1, 0)));
}
sheet.data.forEach(function (element, index) {
if (index !== 0) {
const value = element.slice(1);
// Extract element from array if array contains only 1 value
meta.data[element[0]] = value.length > 1 ? value : value[0];
}
});
for (let key in meta.data) {
paramType = optimizationParametersTypeKey[key];
if (paramType === "object") {
paramType = `list of ${optimizationParametersObectKey[key]}s`;
}
if (meta.data[key] === undefined) {
addWarning(meta, constants.warnings.unknownOptimizationParameter(sheet.name, key));
} else if (typeof meta.data[key] !== optimizationParametersTypeKey[key]) {
if (optimizationParametersTypeKey[key] !== "object" ||
typeof meta.data[key] !== optimizationParametersObectKey[key]) {
addWarning(meta, constants.warnings.invalidOptimizationParameter(sheet.name, key, paramType));
}
} else if (optimizationParametersTypeKey[key] === "object") {
for (let val of meta.data[key]) {
if (typeof val !== optimizationParametersObectKey[key]) {
// throw error once per object. Makes sure that errors list is not flooded
addWarning(meta, constants.warnings.invalidOptimizationParameter(sheet.name, key, paramType));
break;
}
}
}
}
return meta;
};
const parseOptimizationDiagnosticsSheet = (sheet) => {
let output = {
data: {
Parameters: {},
MSE: {
"column-headers": [],
Genes: {}
},
},
errors: [],
warnings: [],
};
let currentParameter;
let currentValue;
let currentGene;
let currentMSE = [];
// Check Headers
if (sheet.data[0].length > 1) {
if (sheet.data[0][0] !== getSheetHeader(sheet.name, 0, 0)) {
addError(output, constants.errors.incorrectColumnHeaderError(sheet.name, constants.numbersToLetters[0],
getSheetHeader(sheet.name, 0, 0)));
}
if (sheet.data[0][1] !== getSheetHeader(sheet.name, 1, 0)) {
addError(output, constants.errors.incorrectColumnHeaderError(sheet.name, constants.numbersToLetters[1],
getSheetHeader(sheet.name, 1, 0)));
}
} else {
// seems a bit sus, but we'll see if this works properly during testing :\
for (let col = 1; col >= sheet.data[0].length; col--) {
addError(output, constants.errors.missingColumnHeaderError(sheet.name, constants.numbersToLetters[col],
getSheetHeader(sheet.name, col, 0)));
}
}
// Check Parameter Section
let row = 1;
// a missing row is the indicator to move onto the MSE
while (sheet.data[row].length > 0) {
currentParameter = sheet.data[row][0];
currentValue = sheet.data[row][1];
if (currentParameter === undefined || currentParameter.replace(/\s+/g, "") === "") {
if (currentValue === undefined || currentValue.replace(/\s+/g, "") === "") {
// if there is no parameter or value assume that its time to move on
row++;
break;
}
}
if (sheet.data[row].length > 2) {
addWarning(output, constants.warnings.extraneousDataWarning(sheet.name, row + 1));
}
if (! optimizationDiagnosticsParameters.includes(currentParameter)) {
if (currentParameter === "Gene") {
row--;
break;
}
addWarning(output, constants.warnings.unknownOptimizationDiagnosticsParameter(sheet.name,
currentParameter));
} else if (typeof currentValue !== "number") {
addWarning(output, constants.warnings.invalidOptimizationDiagnosticsValue(sheet.name, currentParameter));
} else {
output.data.Parameters[currentParameter] = currentValue;
}
row++;
}
// Skip until Gene section
while (sheet.data[row] !== undefined && sheet.data[row].length < 1) {
row++;
}
// Check Gene section MSE's
if (sheet.data[row].length > 1) {
if (sheet.data[row][0] !== "Gene") {
addWarning(output, constants.warnings.incorrectMSEGeneHeaderWarning(sheet.name, row + 1));
}
for (let col = 1; col < sheet.data[row].length; col++) {
if (!sheet.data[row][col].includes("MSE")) {
addWarning(output, constants.warnings.incorrectMSEHeaderWarning(sheet.name, sheet.data[row][col],
row + 1, constants.numbersToLetters[col]));
}
// we still push the header (even tho it's sus) because the gene MSE's are
// dependent on the order of the column headers
output.data.MSE["column-headers"].push(sheet.data[row][col]);
}
row++;
// on to the actual genes
while (row < sheet.data.length) {
if (sheet.data[row].length > output.data.MSE["column-headers"].length + 1) {
addWarning(output, constants.warnings.extraneousDataWarning(sheet.name, row + 1));
}
currentGene = sheet.data[row][0];
// if it's a valid gene set the key = MSE value
if (validGeneName(output, sheet.name, currentGene, row)) {
for (let col = 1; col <= output.data.MSE["column-headers"].length; col++) {
if (typeof sheet.data[row][col] === "number") {
currentMSE.push(sheet.data[row][col]);
} else if (sheet.data[row][col] === undefined) {
addWarning(output, constants.warnings.missingMSEDataWarning(sheet.name, row + 1,
constants.numbersToLetters[col]));
} else {
addWarning(output, constants.warnings.invalidMSEDataWarning(sheet.name, row + 1,
constants.numbersToLetters[col]));
}
}
output.data.MSE.Genes[currentGene] = currentMSE;
currentMSE = [];
}
row++;
}
}
return output;
};
const parseTwoColumnSheet = (sheet) => {
let output = {
data : {},
errors: [],
warnings: [],
};
let currentGene;
let currentValue;
// check to see if the genes are strings and the values are numbers
for (let row = 0; row < sheet.data.length; row++) {
if (sheet.data[row].length > 2) {
addWarning(output, constants.warnings.extraneousDataWarning(sheet.name, row + 1));
}
if (row === 0) {
if (sheet.data[row].length > 0) {
if (sheet.data[row][0] !== "id") {
addError(output, constants.errors.idLabelError(sheet.name));
}
}
if (sheet.data[row].length > 1) {
if (sheet.data[row][1] !== getSheetHeader(sheet.name, 1, row)) {
addError(output, constants.errors.incorrectColumnHeaderError(sheet.name,
constants.numbersToLetters[1],
getSheetHeader(sheet.name, 1, row)));
}
} else {
addError(output, constants.errors.missingColumnHeaderError(sheet.name, constants.numbersToLetters[1],
getSheetHeader(sheet.name, 1, row)));
}
} else {
currentGene = sheet.data[row][0];
currentValue = sheet.data[row][1];
if (validGeneName(output, sheet.name, currentGene, row + 1)) {
if (typeof currentValue === "number") {
output.data[currentGene] = currentValue;
} else {
addError(output, constants.errors.invalidValueError(sheet.name, currentValue, row + 1,
getSheetHeader(sheet.name, 1, row)));
}
}
}
}
return output;
};
module.exports = function (workbookFile) {
let output = {
meta: {
data: {},
errors: [],
warnings: []
}, // optimization_parameters only
twoColumnSheets: {}, // 2-column data
meta2: {} // optimation_diagnostics only //temporary until where it goes is decided
};
workbookFile.forEach(function (sheet) {
if (sheet.name === "optimization_parameters") {
output.meta = parseMetaDataSheet(sheet);
// above line creates an object from the optimization paramerters sheet
// these are part of the "meta" property
} else if (TWO_COL_SHEET_NAMES.includes(sheet.name)) {
output.twoColumnSheets[sheet.name] = parseTwoColumnSheet(sheet);
} else if (sheet.name === "optimization_diagnostics") {
output.meta2 = parseOptimizationDiagnosticsSheet(sheet);
}
});
return output;
};