@rdfc/sds-storage-writer-ts
Version:
An RDF-Connect processor to write SDS streams into a given storage system
104 lines (103 loc) • 4.6 kB
JavaScript
import { Processor } from "@rdfc/js-runner";
import { PROV, RDF as RDFT, SDS } from "@treecg/types";
import { Writer } from "n3";
import { Extractor } from "./extractor.js";
import { getRepository, } from "./repositories/Repository.js";
import { filterMember, maybe_parse, pathString } from "./utils.js";
export class IngestSDS extends Processor {
repository;
recordCount = 0;
async init() {
this.repository = getRepository(this.database);
await this.repository.open();
const dbFragmentations = await this.repository.findMetadataFragmentations();
this.logger.debug(`[init] Found ${dbFragmentations.length} fragmentations (${dbFragmentations.map((x) => x.id.value)})`);
await this.repository.createIndices();
this.logger.info("[init] IngestSDS initialized over a " + this.repository.getStoreType() + " data store");
}
async transform() {
await Promise.all([
this.processData(this.data.strings()),
this.processMetadata(this.metadata.strings()),
]);
await this.repository.close();
}
async produce() {
}
async processData(iterable) {
const extractor = new Extractor();
for await (const item of iterable) {
const data = maybe_parse(item);
this.logger.debug(`[processData] Handling ingest for record with payload id: <${data.find((q) => q.predicate.equals(SDS.terms.payload))?.object?.value}>`);
const extract = extractor.extract_quads(data);
const indexOperations = this.repository.prepareIndexBulk();
await this.handleRecords(extract, indexOperations);
await this.handleRelations(extract, indexOperations);
await this.handleBuckets(extract, indexOperations);
await this.repository.ingestIndexBulk(indexOperations);
this.recordCount++;
}
this.logger.info(`[processData] Ingested ${this.recordCount} records`);
}
async processMetadata(iterable) {
for await (const item of iterable) {
const meta = maybe_parse(item);
const streams = meta
.filter((q) => q.predicate.equals(RDFT.terms.type) &&
q.object.equals(SDS.terms.Stream))
.map((q) => q.subject);
for (const streamId of streams) {
const streamMember = filterMember(meta, streamId, [
(q, id) => q.predicate.equals(PROV.terms.used) &&
q.object.equals(id),
(q, id) => q.predicate.equals(SDS.terms.dataset) &&
q.object.equals(id),
]);
const ser = new Writer().quadsToString(streamMember);
await this.repository.ingestMetadata(SDS.Stream, streamId.value, ser);
}
}
}
async handleRecords(extract, operations) {
const dataSer = new Writer().quadsToString(extract.getData());
const records = extract.getRecords();
const dataOperations = this.repository.prepareDataBulk();
for (const rec of records) {
if (!rec.dataless) {
await this.repository.handleRecord(rec, dataSer, dataOperations);
}
}
await this.repository.ingestDataBulk(dataOperations);
for (const rec of records) {
for (const bucket of rec.buckets) {
await this.repository.handleMember(rec, bucket, operations);
}
}
}
async handleBuckets(extract, operations) {
const buckets = extract.getBuckets();
for (const bucket of buckets) {
if (!bucket.root) {
delete bucket.root;
}
if (!bucket.immutable) {
delete bucket.immutable;
}
await this.repository.handleBucket(bucket, operations);
}
}
async handleRelations(extract, operations) {
const removeRelations = extract.getRemoveRelations();
for (const rel of removeRelations) {
const pathValue = await pathString(rel.path);
const valueValue = await pathString(rel.value);
await this.repository.removeRelation(rel, pathValue, valueValue, operations);
}
const relations = extract.getRelations();
for (const rel of relations) {
const pathValue = await pathString(rel.path);
const valueValue = await pathString(rel.value);
await this.repository.handleRelation(rel, pathValue, valueValue, operations);
}
}
}