@vulcan-sql/extension-driver-bq
Version:
BigQuery driver for Vulcan SQL
191 lines • 9.1 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.BQDataSource = void 0;
const tslib_1 = require("tslib");
const core_1 = require("@vulcan-sql/core");
const stream_1 = require("stream");
const bqlSqlBuilder_1 = require("./bqlSqlBuilder");
const typeMapper_1 = require("./typeMapper");
const bigquery_1 = require("@google-cloud/bigquery");
const storage_1 = require("@google-cloud/storage");
const fs = require("fs");
const path = require("path");
let BQDataSource = class BQDataSource extends core_1.DataSource {
constructor() {
super(...arguments);
this.logger = this.getLogger();
this.bqMapping = new Map();
}
onActivate() {
return tslib_1.__awaiter(this, void 0, void 0, function* () {
const profiles = this.getProfiles().values();
for (const profile of profiles) {
this.logger.debug(`Initializing profile: ${profile.name} using bq driver`);
const bigqueryClient = new bigquery_1.BigQuery(profile.connection);
// https://cloud.google.com/nodejs/docs/reference/bigquery/latest
const storage = new storage_1.Storage(profile.connection);
this.bqMapping.set(profile.name, {
bigQuery: bigqueryClient,
storage: storage,
options: profile.connection,
cache: profile.cache,
});
// Testing connection
yield bigqueryClient.query('SELECT 1;');
this.logger.debug(`Profile ${profile.name} initialized`);
}
});
}
export({ sql: statement, profileName, directory, }) {
return tslib_1.__awaiter(this, void 0, void 0, function* () {
// Use "EXPORT DATA" statement to store query result in the GCS bucket {specified in profile.cache.bucketName} temporarily, and will remove the stored data after download.
// EXPORT DATA ref: https://cloud.google.com/bigquery/docs/reference/standard-sql/other-statements#export_data_statement
this.checkProfileExist(profileName);
if (!fs.existsSync(directory)) {
throw new Error(`Directory "${directory}" does not exist`);
}
const { bigQuery, storage, options, cache } = this.bqMapping.get(profileName);
const bucketName = cache === null || cache === void 0 ? void 0 : cache.bucketName;
if (!bucketName)
throw new core_1.ConfigurationError(`cache.bucketName in profile "${profileName}" is required when using cache feature.`);
// Use the directory to avoid filename collision,
// The wildcard indicates the partition,
// ref: https://cloud.google.com/bigquery/docs/reference/standard-sql/other-statements#export_option_list
const bucketObjPrefix = path.join(bucketName, directory);
const uri = `gs://${path.join(bucketObjPrefix, 'part*.parquet')}`;
const queryOptions = {
query: `EXPORT DATA OPTIONS( uri="${uri}", format='PARQUET') AS ${statement}`,
location: (options === null || options === void 0 ? void 0 : options.location) || 'US',
};
try {
const [job] = yield bigQuery.createQueryJob(queryOptions);
yield this.runJobAndWait(job);
const getFilesResponse = yield storage.bucket(bucketName).getFiles({
prefix: directory[0] === '/' ? directory.slice(1) : directory, // remove the first slash if needed
});
yield Promise.all(getFilesResponse[0].map((file) => tslib_1.__awaiter(this, void 0, void 0, function* () {
const fileName = `${file.name.split('/').pop()}`;
yield file.download({
destination: path.resolve(process.cwd(), directory, fileName),
});
// delete file from GCS bucket
yield file.delete();
})));
}
catch (e) {
this.logger.debug(`Error when exporting parquet "${directory}"`, e);
throw e;
}
});
}
execute({ statement: sql, bindParams, profileName, operations, }) {
return tslib_1.__awaiter(this, void 0, void 0, function* () {
this.checkProfileExist(profileName);
const { bigQuery, options } = this.bqMapping.get(profileName);
const params = {};
bindParams.forEach((value, key) => {
params[key.replace('@', '')] = value;
});
try {
const builtSQL = (0, bqlSqlBuilder_1.buildSQL)(sql, operations);
const queryOptions = {
query: builtSQL,
location: (options === null || options === void 0 ? void 0 : options.location) || 'US',
params,
maxResults: (options === null || options === void 0 ? void 0 : options.chunkSize) || 100,
};
const [job] = yield bigQuery.createQueryJob(queryOptions);
return yield this.getResultFromQueryJob(job, options);
}
catch (e) {
this.logger.debug(`Errors occurred, release connection from ${profileName}`);
throw e;
}
});
}
prepare({ parameterIndex }) {
return tslib_1.__awaiter(this, void 0, void 0, function* () {
return `@p${parameterIndex}`;
});
}
getResultFromQueryJob(queryJob, options) {
return tslib_1.__awaiter(this, void 0, void 0, function* () {
const { chunkSize = 100 } = options || {};
const fetchJobResult = this.fetchJobResult.bind(this);
const firstChunk = yield fetchJobResult(queryJob, chunkSize);
// save first chunk in buffer for incoming requests
let bufferedRows = [...firstChunk.rows];
let bufferReadIndex = 0;
let nextQuery = firstChunk.nextQuery;
const fetchNext = () => tslib_1.__awaiter(this, void 0, void 0, function* () {
if (bufferReadIndex >= bufferedRows.length) {
if (nextQuery == null)
return null;
const fetchData = yield fetchJobResult(queryJob, chunkSize, nextQuery);
bufferedRows = fetchData.rows;
nextQuery = fetchData.nextQuery;
bufferReadIndex = 0;
}
const res = bufferedRows[bufferReadIndex] || null;
bufferReadIndex += 1;
return res;
});
const stream = new stream_1.Readable({
objectMode: true,
read() {
fetchNext()
.then((row) => {
this.push(row);
})
.catch((error) => {
this.destroy(error);
});
},
// automatically destroy() the stream when it emits 'finish' or errors. Node > 10.16
autoDestroy: true,
});
return {
getColumns: () => {
var _a, _b;
const fields = ((_b = (_a = firstChunk.apiResponse) === null || _a === void 0 ? void 0 : _a.schema) === null || _b === void 0 ? void 0 : _b.fields) || [];
return fields.map((field) => ({
name: field.name || '',
type: (0, typeMapper_1.mapFromBQTypeId)(field.type || ''),
}));
},
getData: () => stream,
};
});
}
fetchJobResult(queryJob, chunkSize, nextQuery) {
return tslib_1.__awaiter(this, void 0, void 0, function* () {
return new Promise((resolve, reject) => {
queryJob.getQueryResults(nextQuery || { maxResults: chunkSize }, (err, rows, nextQuery, apiResponse) => {
if (err) {
return reject(err);
}
resolve({ rows: rows || [], nextQuery, apiResponse });
});
});
});
}
checkProfileExist(profileName) {
if (!this.bqMapping.has(profileName)) {
throw new core_1.InternalError(`Profile instance ${profileName} not found`);
}
}
runJobAndWait(job) {
return tslib_1.__awaiter(this, void 0, void 0, function* () {
// Wait for the job to complete
let [jobResult] = yield job.getMetadata();
while (jobResult.status.state !== 'DONE') {
[jobResult] = yield job.getMetadata();
}
});
}
};
BQDataSource = tslib_1.__decorate([
(0, core_1.VulcanExtensionId)('bq')
], BQDataSource);
exports.BQDataSource = BQDataSource;
//# sourceMappingURL=bqDataSource.js.map