UNPKG

@rdfc/sds-storage-writer-ts

Version:

An RDF-Connect processor to write SDS streams into a given storage system

104 lines (103 loc) 4.6 kB
import { Processor } from "@rdfc/js-runner"; import { PROV, RDF as RDFT, SDS } from "@treecg/types"; import { Writer } from "n3"; import { Extractor } from "./extractor.js"; import { getRepository, } from "./repositories/Repository.js"; import { filterMember, maybe_parse, pathString } from "./utils.js"; export class IngestSDS extends Processor { repository; recordCount = 0; async init() { this.repository = getRepository(this.database); await this.repository.open(); const dbFragmentations = await this.repository.findMetadataFragmentations(); this.logger.debug(`[init] Found ${dbFragmentations.length} fragmentations (${dbFragmentations.map((x) => x.id.value)})`); await this.repository.createIndices(); this.logger.info("[init] IngestSDS initialized over a " + this.repository.getStoreType() + " data store"); } async transform() { await Promise.all([ this.processData(this.data.strings()), this.processMetadata(this.metadata.strings()), ]); await this.repository.close(); } async produce() { } async processData(iterable) { const extractor = new Extractor(); for await (const item of iterable) { const data = maybe_parse(item); this.logger.debug(`[processData] Handling ingest for record with payload id: <${data.find((q) => q.predicate.equals(SDS.terms.payload))?.object?.value}>`); const extract = extractor.extract_quads(data); const indexOperations = this.repository.prepareIndexBulk(); await this.handleRecords(extract, indexOperations); await this.handleRelations(extract, indexOperations); await this.handleBuckets(extract, indexOperations); await this.repository.ingestIndexBulk(indexOperations); this.recordCount++; } this.logger.info(`[processData] Ingested ${this.recordCount} records`); } async processMetadata(iterable) { for await (const item of iterable) { const meta = maybe_parse(item); const streams = meta .filter((q) => q.predicate.equals(RDFT.terms.type) && q.object.equals(SDS.terms.Stream)) .map((q) => q.subject); for (const streamId of streams) { const streamMember = filterMember(meta, streamId, [ (q, id) => q.predicate.equals(PROV.terms.used) && q.object.equals(id), (q, id) => q.predicate.equals(SDS.terms.dataset) && q.object.equals(id), ]); const ser = new Writer().quadsToString(streamMember); await this.repository.ingestMetadata(SDS.Stream, streamId.value, ser); } } } async handleRecords(extract, operations) { const dataSer = new Writer().quadsToString(extract.getData()); const records = extract.getRecords(); const dataOperations = this.repository.prepareDataBulk(); for (const rec of records) { if (!rec.dataless) { await this.repository.handleRecord(rec, dataSer, dataOperations); } } await this.repository.ingestDataBulk(dataOperations); for (const rec of records) { for (const bucket of rec.buckets) { await this.repository.handleMember(rec, bucket, operations); } } } async handleBuckets(extract, operations) { const buckets = extract.getBuckets(); for (const bucket of buckets) { if (!bucket.root) { delete bucket.root; } if (!bucket.immutable) { delete bucket.immutable; } await this.repository.handleBucket(bucket, operations); } } async handleRelations(extract, operations) { const removeRelations = extract.getRemoveRelations(); for (const rel of removeRelations) { const pathValue = await pathString(rel.path); const valueValue = await pathString(rel.value); await this.repository.removeRelation(rel, pathValue, valueValue, operations); } const relations = extract.getRelations(); for (const rel of relations) { const pathValue = await pathString(rel.path); const valueValue = await pathString(rel.value); await this.repository.handleRelation(rel, pathValue, valueValue, operations); } } }