UNPKG

por

Version:

A streaming parser for the SPSS / PSPP Portable File Format (.por).

436 lines (414 loc) 14.2 kB
var porLineParser = require("./line-parser"); var STATES = 0, STATE_FILE_HEADER = ++STATES, STATE_VERSION = ++STATES, STATE_CREATION_DATE = ++STATES, STATE_CREATION_TIME = ++STATES, STATE_TAG = ++STATES, STATE_PRODUCT_IDENTIFICATION = ++STATES, STATE_AUTHOR_IDENTIFICATION = ++STATES, STATE_SUBPRODUCT_IDENTIFICATION = ++STATES, STATE_VARIABLE_COUNT = ++STATES, STATE_AFTER_VARIABLE_COUNT = ++STATES, STATE_CASE_WEIGHT_VARIABLE = ++STATES, STATE_VARIABLE = ++STATES, STATE_VALUE_LABEL = ++STATES, STATE_DOCUMENT = ++STATES, STATE_FIRST_DATUM = ++STATES, STATE_DATA = ++STATES, STATE_EOF = ++STATES; var FIELD_STATE_DEFAULT = ++STATES, FIELD_STATE_AFTER_ASTERISK = ++STATES; var STRING_STATE_DEFAULT = ++STATES, STRING_STATE_AFTER_LENGTH = ++STATES; var VARIABLE_STATE_WIDTH = ++STATES, VARIABLE_STATE_NAME = ++STATES, VARIABLE_STATE_PRINT_FORMAT_TYPE = ++STATES, VARIABLE_STATE_PRINT_FORMAT_WIDTH = ++STATES, VARIABLE_STATE_PRINT_FORMAT_PRECISION = ++STATES, VARIABLE_STATE_WRITE_FORMAT_TYPE = ++STATES, VARIABLE_STATE_WRITE_FORMAT_WIDTH = ++STATES, VARIABLE_STATE_WRITE_FORMAT_PRECISION = ++STATES, VARIABLE_STATE_TAG = ++STATES, VARIABLE_STATE_MISSING_VALUE = ++STATES, VARIABLE_STATE_MISSING_VALUE_RANGE_LO = ++STATES, VARIABLE_STATE_MISSING_VALUE_RANGE_HI = ++STATES, VARIABLE_STATE_LO_THRU_X = ++STATES, VARIABLE_STATE_X_THRU_HI = ++STATES, VARIABLE_STATE_LABEL = ++STATES, VARIABLE_STATE_END = ++STATES; var VALUE_LABEL_STATE_VARIABLE_COUNT = ++STATES, VALUE_LABEL_STATE_VARIABLE_NAME = ++STATES, VALUE_LABEL_STATE_LABEL_COUNT = ++STATES, VALUE_LABEL_STATE_VALUE = ++STATES, VALUE_LABEL_STATE_LABEL = ++STATES, VALUE_LABEL_STATE_END = ++STATES; var CODE_FORWARD_SLASH = "/".charCodeAt(0), CODE_ASTERISK = "*".charCodeAt(0), CODE_SPACE = " ".charCodeAt(0), CODE_PERIOD = ".".charCodeAt(0), CODE_PLUS = "+".charCodeAt(0), CODE_MINUS = "-".charCodeAt(0); var STATE_BY_TAG = { "1": STATE_PRODUCT_IDENTIFICATION, "2": STATE_AUTHOR_IDENTIFICATION, "3": STATE_SUBPRODUCT_IDENTIFICATION, "4": STATE_VARIABLE_COUNT, "6": STATE_CASE_WEIGHT_VARIABLE, "7": STATE_VARIABLE, "D": STATE_VALUE_LABEL, "E": STATE_DOCUMENT, "F": STATE_FIRST_DATUM, "Z": STATE_EOF }; var VARIABLE_STATE_BY_TAG = { "8": VARIABLE_STATE_MISSING_VALUE, "9": VARIABLE_STATE_LO_THRU_X, "A": VARIABLE_STATE_X_THRU_HI, "B": VARIABLE_STATE_MISSING_VALUE_RANGE_LO, "C": VARIABLE_STATE_LABEL }; module.exports = function() { var parser = { push: parser_push, pop: parser_pop }, state = STATE_FILE_HEADER, encoding = "utf8", lineParser = porLineParser(), line = new Buffer(0), lineLength = 0, fieldState = FIELD_STATE_DEFAULT, fieldLength = 0, stringState = STRING_STATE_DEFAULT, stringLength = null, variableIndex = 0, variableState = VARIABLE_STATE_WIDTH, variableWidth = null, variableName = null, variablePrintFormatType = null, variablePrintFormatWidth = null, variablePrintFormatPrecision = null, variableWriteFormatType = null, variableWriteFormatWidth = null, variableWriteFormatPrecision = null, variableMissingValues = [], variableMissingValueRange = null, variableLabel = null, variables = [], valueLabelState = VALUE_LABEL_STATE_VARIABLE_COUNT, valueLabelVariables = [], valueLabelIndex = 0, valueLabelCount = 0, data = [], dataOffset = 0, rowIndex = -1; function parser_push(data) { lineParser.push(data); } function parser_pop() { var record; while (true) { switch (state) { case STATE_FILE_HEADER: { if ((record = popFixed(464)) == null) return null; state = STATE_VERSION; continue; } case STATE_VERSION: { if ((record = popFixed(1)) == null) return null; state = STATE_CREATION_DATE; continue; } case STATE_CREATION_DATE: { if ((record = popString()) == null) return null; state = STATE_CREATION_TIME; continue; } case STATE_CREATION_TIME: { if ((record = popString()) == null) return null; state = STATE_TAG; continue; } case STATE_TAG: { if ((record = popFixed(1)) == null) return null; state = STATE_BY_TAG[record.toString(encoding)]; if (state == null) throw new Error("unknown tag: " + record); continue; } case STATE_PRODUCT_IDENTIFICATION: { if ((record = popString()) == null) return null; state = STATE_TAG; continue; } case STATE_AUTHOR_IDENTIFICATION: { if ((record = popString()) == null) return null; state = STATE_TAG; continue; } case STATE_SUBPRODUCT_IDENTIFICATION: { if ((record = popString()) == null) return null; state = STATE_TAG; continue; } case STATE_VARIABLE_COUNT: { if ((record = popInteger()) == null) return null; state = STATE_AFTER_VARIABLE_COUNT; continue; } case STATE_AFTER_VARIABLE_COUNT: { if ((record = popInteger()) == null) return null; state = STATE_TAG; continue; } case STATE_CASE_WEIGHT_VARIABLE: { if ((record = popString()) == null) return null; state = STATE_TAG; continue; } case STATE_VARIABLE: { if ((record = popVariable()) == null) return null; state = STATE_TAG; variables.push(record); continue; } case STATE_VALUE_LABEL: { if ((record = popValueLabel()) == null) return null; state = STATE_TAG; continue; } case STATE_FIRST_DATUM: { state = STATE_DATA; return variables.map(function(v) { return v.name; }); } case STATE_DATA: { if ((record = popData()) == null) return null; return record; } default: throw new Error("not implemented: " + state); } } } function popData() { var record; while (dataOffset < variableIndex) { if ((record = (variables[dataOffset].width ? popString : popFloat)()) == null) return null; ++dataOffset; data.push(record); } var oldData = data; dataOffset = 0; data = []; return oldData; } function popValueLabel() { var record; while (true) { switch (valueLabelState) { case VALUE_LABEL_STATE_VARIABLE_COUNT: { if ((valueLabelCount = popInteger()) == null) return null; valueLabelState = valueLabelCount ? VALUE_LABEL_STATE_VARIABLE_NAME : VALUE_LABEL_STATE_LABEL_COUNT; break; } case VALUE_LABEL_STATE_VARIABLE_NAME: { if ((record = popString()) == null) return null; valueLabelVariables.push(variables.filter(function(v) { return v.name === record; })[0]); if (!--valueLabelCount) valueLabelState = VALUE_LABEL_STATE_LABEL_COUNT; break; } case VALUE_LABEL_STATE_LABEL_COUNT: { if ((valueLabelCount = popInteger()) == null) return null; valueLabelState = valueLabelCount ? VALUE_LABEL_STATE_VALUE : VALUE_LABEL_STATE_END; break; } case VALUE_LABEL_STATE_VALUE: { if ((record = (valueLabelVariables[0].width ? popString : popFloat)()) == null) return null; valueLabelState = VALUE_LABEL_STATE_LABEL; break; } case VALUE_LABEL_STATE_LABEL: { if ((record = popString()) == null) return null; if (--valueLabelCount) valueLabelState = VALUE_LABEL_STATE_VALUE; else valueLabelState = VALUE_LABEL_STATE_END; break; } case VALUE_LABEL_STATE_END: { valueLabelState = VALUE_LABEL_STATE_VARIABLE_COUNT; return {}; } } } } function popVariable() { var record; while (true) { switch (variableState) { case VARIABLE_STATE_WIDTH: { if ((variableWidth = popInteger()) == null) return null; variableState = VARIABLE_STATE_NAME; break; } case VARIABLE_STATE_NAME: { if ((variableName = popString()) == null) return null; variableState = VARIABLE_STATE_PRINT_FORMAT_TYPE; break; } case VARIABLE_STATE_PRINT_FORMAT_TYPE: { if ((variablePrintFormatType = popInteger()) == null) return null; variableState = VARIABLE_STATE_PRINT_FORMAT_WIDTH; break; } case VARIABLE_STATE_PRINT_FORMAT_WIDTH: { if ((variablePrintFormatWidth = popInteger()) == null) return null; variableState = VARIABLE_STATE_PRINT_FORMAT_PRECISION; break; } case VARIABLE_STATE_PRINT_FORMAT_PRECISION: { if ((variablePrintFormatPrecision = popInteger()) == null) return null; variableState = VARIABLE_STATE_WRITE_FORMAT_TYPE; break; } case VARIABLE_STATE_WRITE_FORMAT_TYPE: { if ((variableWriteFormatType = popInteger()) == null) return null; variableState = VARIABLE_STATE_WRITE_FORMAT_WIDTH; break; } case VARIABLE_STATE_WRITE_FORMAT_WIDTH: { if ((variableWriteFormatWidth = popInteger()) == null) return null; variableState = VARIABLE_STATE_WRITE_FORMAT_PRECISION; break; } case VARIABLE_STATE_WRITE_FORMAT_PRECISION: { if ((variableWriteFormatPrecision = popInteger()) == null) return null; variableState = VARIABLE_STATE_TAG; break; } case VARIABLE_STATE_TAG: { if ((record = popFixed(1)) == null) return null; variableState = VARIABLE_STATE_BY_TAG[record.toString(encoding)]; if (!variableState) { variableState = VARIABLE_STATE_END; state = STATE_BY_TAG[record.toString(encoding)]; if (state == null) throw new Error("unknown tag: " + record); } break; } case VARIABLE_STATE_MISSING_VALUE: { if ((record = (variableWidth ? popString : popFloat)()) == null) return null; variableMissingValues.push(record); variableState = VARIABLE_STATE_TAG; break; } case VARIABLE_STATE_MISSING_VALUE_RANGE_LO: { if ((record = (variableWidth ? popString : popFloat)()) == null) return null; variableMissingValueRange = [record, null]; variableState = VARIABLE_STATE_MISSING_VALUE_RANGE_HI; break; } case VARIABLE_STATE_MISSING_VALUE_RANGE_HI: { if ((record = (variableWidth ? popString : popFloat)()) == null) return null; variableMissingValueRange[1] = record; variableState = VARIABLE_STATE_TAG; break; } case VARIABLE_STATE_LABEL: { if ((variableLabel = popString()) == null) return null; variableState = VARIABLE_STATE_END; state = STATE_TAG; break; } case VARIABLE_STATE_END: { variableState = VARIABLE_STATE_WIDTH; var variable = { type: "variable", index: variableIndex++, width: variableWidth, name: variableName, label: variableLabel, printFormat: { type: variablePrintFormatType, width: variablePrintFormatWidth, precision: variablePrintFormatPrecision }, writeFormat: { type: variableWriteFormatType, width: variableWriteFormatWidth, precision: variableWriteFormatPrecision }, missingValues: variableMissingValues, missingValueRange: variableMissingValueRange }; variableLabel = null; variableMissingValues = []; variableMissingValueRange = null; return variable; } default: throw new Error("not implemented"); } } } function popLine() { var newLine = lineParser.pop(); if (newLine == null) return null; line = Buffer.concat([line, newLine]); // slow, but shouldn’t care lineLength += newLine.length; return line; } function popFixed(length) { while (length > lineLength) if (popLine() == null) return null; var oldLine = line.slice(0, length); line = line.slice(length); lineLength -= length; return oldLine; } function popField() { while (true) { if (fieldLength >= lineLength) if (popLine() == null) return null; var code = line[fieldLength++]; if (code === CODE_FORWARD_SLASH) { fieldState = FIELD_STATE_DEFAULT; break; } if (code === CODE_ASTERISK) { fieldState = FIELD_STATE_AFTER_ASTERISK; continue; } if (fieldState == FIELD_STATE_AFTER_ASTERISK) { fieldState = FIELD_STATE_DEFAULT; break; } } var oldLine = line.slice(0, fieldLength - 1); lineLength -= fieldLength; line = line.slice(fieldLength); fieldLength = 0; return oldLine; } // TODO support fractions // TODO support exponent function popFloat() { return popInteger(); } // TODO support exponent function popInteger() { var field = popField(); if (field == null) return null; var i = -1, n = field.length; while (++i < n && field[i] === CODE_SPACE); if (field[i] === CODE_ASTERISK) return NaN; return parseInt(field.toString(encoding, i), 30); } function popString() { if (stringState === STRING_STATE_AFTER_LENGTH) { var string = popFixed(stringLength); if (string == null) return null; stringState = STRING_STATE_DEFAULT; return string.toString(encoding); } stringLength = popInteger(); if (stringLength == null) return null; stringState = STRING_STATE_AFTER_LENGTH; return popString(); } return parser; }