node-nlp
Version:
Library for NLU (Natural Language Understanding) done in Node.js
240 lines (223 loc) • 6.5 kB
JavaScript
/*
* Copyright (c) AXA Shared Services Spain S.A.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
const XLSX = require('xlsx');
const XTable = require('./xtable');
const XTableUtils = require('./xtable-utils');
class XDoc {
constructor() {
this.tables = [];
this.tablesByName = {};
}
getRect(sheet) {
const keys = Object.keys(sheet);
let minRow;
let maxRow;
let minColumn;
let maxColumn;
for (let i = 0, l = keys.length; i < l; i += 1) {
const key = keys[i];
if (key[0] !== '!') {
const coord = XTableUtils.excel2coord(key);
if (minColumn === undefined || minColumn > coord.column) {
minColumn = coord.column;
}
if (maxColumn === undefined || maxColumn < coord.column) {
maxColumn = coord.column;
}
if (minRow === undefined || minRow > coord.row) {
minRow = coord.row;
}
if (maxRow === undefined || maxRow < coord.row) {
maxRow = coord.row;
}
}
}
return {
top: minRow,
bottom: maxRow,
left: minColumn,
right: maxColumn,
};
}
isEmptyRow(block, index) {
const row = block[index];
if (!row) {
return true;
}
for (let i = 0; i < row.length; i += 1) {
if (row[i]) {
return false;
}
}
return true;
}
findEmptyRow(block) {
for (let i = 0; i < block.length; i += 1) {
if (this.isEmptyRow(block, i)) {
return i;
}
}
return -1;
}
isEmptyColum(block, index) {
for (let i = 0; i < block.length; i += 1) {
if (block[i][index]) {
return false;
}
}
return true;
}
findEmptyColumn(block) {
if (!block || block.length === 0) {
return -1;
}
const l = block[0].length;
for (let i = 0; i < l; i += 1) {
if (this.isEmptyColum(block, i)) {
return i;
}
}
return -1;
}
splitByRow(block, emptyRowIndex, nextEmptyRowIndex) {
const block1 = [];
const block2 = [];
for (let i = 0; i < block.length; i += 1) {
if (i < emptyRowIndex) {
block1.push(block[i]);
} else if (i > nextEmptyRowIndex) {
block2.push(block[i]);
}
}
return [block1, block2];
}
splitByColumn(block, emptyColumnIndex, nextEmptyColumnIndex) {
const block1 = [];
const block2 = [];
for (let i = 0; i < block.length; i += 1) {
const row = block[i];
const row1 = [];
const row2 = [];
block1.push(row1);
block2.push(row2);
for (let j = 0; j < row.length; j += 1) {
if (j < emptyColumnIndex) {
row1.push(row[j]);
} else if (j > nextEmptyColumnIndex) {
row2.push(row[j]);
}
}
}
if (block2[0].length === 0) {
return [block1];
}
if (block1[0].length === 0) {
return [block2];
}
return [block1, block2];
}
splitBlock(block) {
const emptyRowIndex = this.findEmptyRow(block);
if (emptyRowIndex > -1) {
let nextEmptyRowIndex = emptyRowIndex;
while (
nextEmptyRowIndex < block.length &&
this.isEmptyRow(block, nextEmptyRowIndex + 1)
) {
nextEmptyRowIndex += 1;
}
return this.splitByRow(block, emptyRowIndex, nextEmptyRowIndex);
}
const emptyColumnIndex = this.findEmptyColumn(block);
if (emptyColumnIndex > -1) {
let nextEmptyColumnIndex = emptyColumnIndex;
while (
nextEmptyColumnIndex < block[0].length &&
this.isEmptyColum(block, nextEmptyColumnIndex + 1)
) {
nextEmptyColumnIndex += 1;
}
return this.splitByColumn(block, emptyColumnIndex, nextEmptyColumnIndex);
}
return [block];
}
processSheet(sheet) {
const rect = this.getRect(sheet);
let pendingBlocks = [];
let currentBlock = [];
for (let j = rect.top; j <= rect.bottom; j += 1) {
const currentRow = [];
currentBlock.push(currentRow);
for (let i = rect.left; i <= rect.right; i += 1) {
const cellRef = XTableUtils.coord2excel({ row: j, column: i });
currentRow.push(sheet[cellRef]);
}
}
pendingBlocks.push(currentBlock);
let modified = true;
while (modified) {
modified = false;
const oldBlocks = pendingBlocks;
pendingBlocks = [];
for (let i = 0; i < oldBlocks.length; i += 1) {
currentBlock = oldBlocks[i];
const newBlocks = this.splitBlock(currentBlock);
if (newBlocks.length > 1 && !modified) {
modified = true;
}
for (let j = 0; j < newBlocks.length; j += 1) {
pendingBlocks.push(newBlocks[j]);
}
}
}
for (let i = 0; i < pendingBlocks.length; i += 1) {
const table = new XTable(pendingBlocks[i]);
this.tables.push(table);
this.tablesByName[table.name] = table;
}
}
read(filename) {
const wb = XLSX.readFile(filename);
for (let i = 0, l = wb.SheetNames.length; i < l; i += 1) {
this.processSheet(wb.Sheets[wb.SheetNames[i]]);
}
}
getTable(name) {
return this.tablesByName[name];
}
find(name, query) {
const table = this.tablesByName[name];
if (!table) {
return [];
}
return table.find(query);
}
findOne(name, query) {
const table = this.tablesByName[name];
if (!table) {
return undefined;
}
return table.findOne(query);
}
}
module.exports = XDoc;