@infect/infect-rda-sample-storage
Version:
INFECT Sample Storage for RDA
239 lines (171 loc) • 7.74 kB
JavaScript
import { Controller } from '@infect/rda-service';
import type from 'ee-types';
import log from 'ee-log';
export default class DataController extends Controller {
constructor({db}) {
super('data');
this.db = db;
this.enableAction('create');
this.enableAction('list');
// the number of records that gets assigned to a group.
this.groupSize = 1000;
}
/**
* returns data fro a given filter
*/
async list(request) {
const query = request.query();
if (query.shard) {
// return the data for one given shard
if (!type.string(query.offset)) request.response().status(400).send(`Missing offset query parameter!`);
else if (!type.string(query.limit)) request.response().status(400).send(`Missing limit query parameter!`);
else {
// get the viable data versions. do this in two steps,
// postgres has difficulties optimizing this if we're
// getting everything in one query
const dataGroups = await this.db.dataGroup().getShard({
identifier: query.shard
}).raw().find();
// get the actual data
const data = await this.db.data('*', {
id_dataGroup: this.db.getORM().in(dataGroups.map(g => g.id))
}).order('id')
.offset(parseInt(query.offset, 10))
.limit(parseInt(query.limit, 10))
.getDataVersion('identifier')
.fetchDataVersionStatus('identifier')
.getDataSet('identifier')
.raw().find();
return data.map((data) => ({
...data.data,
dataVersionId: data.dataVersion.id,
dataSetId: data.dataVersion.dataSet.id,
datasetIdentifier: data.dataVersion.dataSet.identifier,
dataVersionStatusIdentifier: data.dataVersion.dataVersionStatus.identifier,
}));
}
} else throw new Error('not implemented (missing filter)');
}
/**
* write data to the db
*/
async create(request) {
const start = Date.now();
const data = await request.getData();
if (!data) request.response().status(400).send(`Missing request body!`);
else if (!type.object(data)) request.response().status(400).send(`Request body must be a json object!`);
else if (!type.array(data.records)) request.response().status(400).send(`Missing records array on the request body!`);
else if (!type.number(data.dataVersionId)) request.response().status(400).send(`Missing the property 'dataVersionId' on the request body!`);
else {
// basic schema validation for the data
for (const record of data.records) {
if (!type.object(record)) {
return request.response()
.status(400)
.send(`Got records that don't contain objects as data!`);
}
if (!type.string(record.uniqueIdentifier)) {
return request.response()
.status(400)
.send(`Missing the field uniqueIdentifier on a record!`);
}
}
// load all records that have a uniqueIdentifier we're trying to import in order to
// prevent duplicate key errors
const existingRecords = await this.db.data('uniqueIdentifier', {
uniqueIdentifier: this.db.getORM().in(data.records.map(r => r.uniqueIdentifier)),
}).getDataVersion().getDataVersionStatus({
identifier: this.db.getORM().in(['preview', 'active'])
}).raw().find();
const existingMap = new Set(existingRecords.map(record => record.uniqueIdentifier));
const recordCount = data.records.length - existingMap.size;
let importedRecordCount = 0;
let duplicateRecordCount = 0;
if (recordCount > 0) {
// get data groups for adding records to
const groups = await this.getDataGroups({
dataVersionId: data.dataVersionId,
recordCount: data.records.length - existingMap.size,
});
const transaction = this.db.createTransaction();
const dataVersion = await this.db.dataVersion({
id: data.dataVersionId
}).findOne();
let currentGroup = groups.shift();
// store data
for (const record of data.records) {
if (!existingMap.has(record.uniqueIdentifier)) {
existingMap.add(record.uniqueIdentifier);
const row = {
dataVersion: dataVersion,
id_dataGroup: currentGroup.groupId,
data: record,
uniqueIdentifier: record.uniqueIdentifier,
};
await new transaction.data(row).save();
// switch group if required
currentGroup.recordCount--;
if (currentGroup.recordCount < 0) currentGroup = groups.shift();
if (!currentGroup) throw new Error(`Failed to get group for records, created not enough groups!`);
importedRecordCount++;
} else duplicateRecordCount++;
}
// persist changes
await transaction.commit();
} else {
duplicateRecordCount = data.records.length;
}
return {
importedRecordCount,
duplicateRecordCount,
runtime: Date.now() - start,
}
}
}
/**
* since we're grouping records in groups we need
* create data groups that can be assigned to the
* records. currently groups have a fixed size of
* not more than this.grougSize items. this method
* gets a group that is not full yet or creates a
* new one. it may also return multiple groups if
* one will not be enough for the records currently
* added.
*/
async getDataGroups({
dataVersionId,
recordCount,
}) {
const Related = this.db.getORM();
const groups = [];
// get the one dataset that has the least records
// for the given data group
const dataGroup = await this.db.dataGroup([
Related.select('recordCount').referenceCount('data.id')
]).order('recordCount').getDataVersion({
id: dataVersionId
}).raw().findOne();
// check if we can use the existing data group
if (dataGroup) {
const leftSlots = this.grougSize - dataGroup.recordCount;
if (leftSlots > 0) {
groups.push({
groupId: dataGroup.id,
recordCount: leftSlots,
});
recordCount -= leftSlots;
}
}
while(recordCount > 0) {
const group = await new this.db.dataGroup({
id_dataVersion: dataVersionId
}).save();
groups.push({
groupId: group.id,
recordCount: this.groupSize,
});
recordCount -= this.groupSize;
}
return groups;
}
}