@infect/api
Version:
infect 2.0 api
371 lines (248 loc) • 10.4 kB
JavaScript
import SpreadSheet from 'google-spreadsheet';
import RainboConfig from '@rainbow-industries/rainbow-config';
import path from 'path';
import log from 'ee-log';
import type from 'ee-types';
import util from 'util';
import fs from 'fs';
import IDMapping from './GoogleIDMapping.js';
const dirname = path.dirname(new URL(import.meta.url).pathname);
const writeFile = util.promisify(fs.writeFile);
export default class DataLoader {
constructor() {
// sheet mappings must be retreived from the api
this.sheets = new Map();
// we're trying to get consistent ids that won't change if
// rows are moved in the google sheet
this.idMappings = new IDMapping();
// the data env to use
this.env = process.argv.includes('--to-dev') ? 'development' : (
process.argv.includes('--to-beta') ? 'beta' : (
process.argv.includes('--to-production') ? 'production' : ''
)
);
if (!this.env) throw new Error(`Failed to identify the data env. Please specific it using one of the following flags: --to-dev, --to-beta, --to-production`);
}
async load() {
const configPath = path.join(path.dirname(new URL(import.meta.url).pathname), '../../');
this.config = new RainboConfig(path.join(configPath, 'config/google-data-loader'), configPath);
await this.config.load();
this.sheet = new SpreadSheet(this.config.get('spreadsheetId'));
}
/**
* get all data from the data master and store
* them in the target environemtn storage directory
*/
async download() {
await this.load();
await this.authenticate();
await this.loadInfo();
await this.idMappings.load(this.env);
this.data = new Map();
for (const sheetConfig of this.config.get('sheets')) {
const rows = await this.getRows(sheetConfig);
this.data.set(sheetConfig.name, rows);
}
// resolve relations
this.resolveForeignKeys();
// convert the substance class entity to an
// ordered nested set
this.createOrderedNestedSet('substanceClass', 'id_parentSubstanceClass', 'id');
// the references to the substance on the compound need
// to be moved into a mapping
this.data.set('compound_substance', this.createMappingEntity({
rows: this.data.get('compound'),
columns: ['id_substance1', 'id_substance2', 'id_substance3'],
localColumn: 'id_compound',
remoteColumn: 'id_substance',
}));
// nice, that's it, we've gotten all data and were able
// to normalize it. its time to write them to the files
await this.storeData();
// store id mapping
await this.idMappings.save();
}
/**
* store the data in files
*/
async storeData() {
log.info(`Storing data files ...`);
log.wtf(`Env: ${this.env}`);
for (const [name, data] of this.data.entries()) {
const fileName = path.join(dirname, `../../data/${this.env}/${name}.json`);
log.debug(`Storing ${data.length} records in file ${fileName} ...`);
await writeFile(fileName, JSON.stringify(data, null, 4));
}
}
/**
* create a mapping entity from a multi-selection
*/
createMappingEntity({
rows,
columns,
localColumn,
remoteColumn,
}) {
const mapping = [];
for (const row of rows) {
for (const column of columns) {
if (type.number(row[column])) {
const data = {};
data[localColumn] = row.id;
data[remoteColumn] = row[column];
mapping.push(data);
}
delete row[column];
}
}
return mapping;
}
/**
* converts thee substance classes to a nested
* set data structure
*/
createOrderedNestedSet(sheetName, parentReference, orderKey) {
const data = this.data.get(sheetName);
const getChildren = (parentId) => {
return data.filter(item => item[parentReference] == parentId);
};
const nestify = (items, offset = 1) => {
items.sort((a, b) => a[orderKey] - b[orderKey]);
items.forEach((item) => {
item.left = offset++;
offset = nestify(getChildren(item.id), offset);
item.right = offset++;
});
return offset;
}
nestify(getChildren());
//log(data.map(x => `${x.id}\t${x[parentReference]}\t${x.left}\t${x.right}\t${x.identifier}`));
// remove parent reference
data.forEach(item => delete item[parentReference]);
}
/**
* resolve foreign keys to ids, aka emulate
* a relational database
*/
resolveForeignKeys() {
log.info(`Resolving foreign keys ...`);
for (const sheetConfig of this.config.get('sheets')) {
const sheetName = sheetConfig.name;
const rows = this.data.get(sheetName);
for (const columnConfig of sheetConfig.columns) {
if (columnConfig.foreignKey) {
const fk = columnConfig.foreignKey;
// create a map of the foreign property
const foreignData = this.data.get(fk.sheet);
const foreignMap = new Map(foreignData.filter(item => !!item[fk.column]).map(item => ([item[fk.column].toLowerCase().trim(), item.id])));
// set fk on our rows
for (const row of rows) {
if (row[columnConfig.name]) {
const fkValue = row[columnConfig.name].toLowerCase().trim();
if (foreignMap.has(fkValue)) {
row[fk.name || `id_${fk.sheet}`] = foreignMap.get(fkValue);
} else {
log.warn(`Missing foreign entity for '${sheetName}'.'${columnConfig.name}' -> '${fk.sheet}'.'${fk.column}'`);
}
} else {
// no value, remove reference anyway
delete row[columnConfig.name];
}
}
}
}
}
this.cleanupForeignKeys();
}
cleanupForeignKeys() {
for (const sheetConfig of this.config.get('sheets')) {
const sheetName = sheetConfig.name;
const rows = this.data.get(sheetName);
for (const columnConfig of sheetConfig.columns) {
if (columnConfig.foreignKey) {
// set fk on our rows
for (const row of rows) {
delete row[columnConfig.name];
}
}
}
}
}
/**
* get the structure of the sheet
*/
loadInfo() {
return new Promise((resolve, reject) => {
log.debug(`Getting info for data master ...`);
this.sheet.getInfo((err, info) => {
if (err) reject(err);
else {
for (const sheet of info.worksheets) {
this.sheets.set(sheet.title.toLowerCase(), sheet);
}
resolve(info);
}
});
});
}
/**
* get rows for a specifc infect sheet
*/
getRows(sheetConfig) {
return new Promise((resolve, reject) => {
const sheetName = sheetConfig.googleName || sheetConfig.name;
log.debug(`Getting rows for '${sheetName}' ...`);
this.sheets.get(sheetName.toLowerCase()).getRows({
offset: 1,
limit: 1000,
}, (err, rows) => {
if (err) reject(err);
else {
const data = rows.map((row, index) => {
const rowData = {
id: this.idMappings.translateId(row.id)
};
for (const column of sheetConfig.columns) {
const value = row[column.googleName];
if (value === 'TRUE') rowData[column.name] = true;
else if (value === 'FALSE') rowData[column.name] = false;
else if (value === '') rowData[column.name] = null;
else if (! (/[^0-9]/gi.test(value))) rowData[column.name] = parseInt(value, 10);
else if (! (/[^0-9\.]/gi.test(value))) rowData[column.name] = parseFloat(value);
else rowData[column.name] = value;
}
if (sheetConfig.translations) {
rowData.translations = [];
for (const translation of sheetConfig.translations) {
const value = row[translation];
if (value !== '') {
rowData.translations.push({
language: translation,
value: value,
});
}
}
}
return rowData;
});
resolve(data);
}
});
});
}
/**
* gets an authentication token for the sprreadsheet
*/
authenticate() {
return new Promise((resolve, reject) => {
log.info(`Authenticating ...`);
this.sheet.useServiceAccountAuth({
client_email: this.config.get('googleApiEmail'),
private_key: this.config.get('googleApiPrivateKey'),
}, (err, token) => {
if (err) reject(err);
else resolve(token);
});
});
}
}