UNPKG

@vulcan-sql/extension-driver-bq

Version:

BigQuery driver for Vulcan SQL

191 lines 9.1 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.BQDataSource = void 0; const tslib_1 = require("tslib"); const core_1 = require("@vulcan-sql/core"); const stream_1 = require("stream"); const bqlSqlBuilder_1 = require("./bqlSqlBuilder"); const typeMapper_1 = require("./typeMapper"); const bigquery_1 = require("@google-cloud/bigquery"); const storage_1 = require("@google-cloud/storage"); const fs = require("fs"); const path = require("path"); let BQDataSource = class BQDataSource extends core_1.DataSource { constructor() { super(...arguments); this.logger = this.getLogger(); this.bqMapping = new Map(); } onActivate() { return tslib_1.__awaiter(this, void 0, void 0, function* () { const profiles = this.getProfiles().values(); for (const profile of profiles) { this.logger.debug(`Initializing profile: ${profile.name} using bq driver`); const bigqueryClient = new bigquery_1.BigQuery(profile.connection); // https://cloud.google.com/nodejs/docs/reference/bigquery/latest const storage = new storage_1.Storage(profile.connection); this.bqMapping.set(profile.name, { bigQuery: bigqueryClient, storage: storage, options: profile.connection, cache: profile.cache, }); // Testing connection yield bigqueryClient.query('SELECT 1;'); this.logger.debug(`Profile ${profile.name} initialized`); } }); } export({ sql: statement, profileName, directory, }) { return tslib_1.__awaiter(this, void 0, void 0, function* () { // Use "EXPORT DATA" statement to store query result in the GCS bucket {specified in profile.cache.bucketName} temporarily, and will remove the stored data after download. // EXPORT DATA ref: https://cloud.google.com/bigquery/docs/reference/standard-sql/other-statements#export_data_statement this.checkProfileExist(profileName); if (!fs.existsSync(directory)) { throw new Error(`Directory "${directory}" does not exist`); } const { bigQuery, storage, options, cache } = this.bqMapping.get(profileName); const bucketName = cache === null || cache === void 0 ? void 0 : cache.bucketName; if (!bucketName) throw new core_1.ConfigurationError(`cache.bucketName in profile "${profileName}" is required when using cache feature.`); // Use the directory to avoid filename collision, // The wildcard indicates the partition, // ref: https://cloud.google.com/bigquery/docs/reference/standard-sql/other-statements#export_option_list const bucketObjPrefix = path.join(bucketName, directory); const uri = `gs://${path.join(bucketObjPrefix, 'part*.parquet')}`; const queryOptions = { query: `EXPORT DATA OPTIONS( uri="${uri}", format='PARQUET') AS ${statement}`, location: (options === null || options === void 0 ? void 0 : options.location) || 'US', }; try { const [job] = yield bigQuery.createQueryJob(queryOptions); yield this.runJobAndWait(job); const getFilesResponse = yield storage.bucket(bucketName).getFiles({ prefix: directory[0] === '/' ? directory.slice(1) : directory, // remove the first slash if needed }); yield Promise.all(getFilesResponse[0].map((file) => tslib_1.__awaiter(this, void 0, void 0, function* () { const fileName = `${file.name.split('/').pop()}`; yield file.download({ destination: path.resolve(process.cwd(), directory, fileName), }); // delete file from GCS bucket yield file.delete(); }))); } catch (e) { this.logger.debug(`Error when exporting parquet "${directory}"`, e); throw e; } }); } execute({ statement: sql, bindParams, profileName, operations, }) { return tslib_1.__awaiter(this, void 0, void 0, function* () { this.checkProfileExist(profileName); const { bigQuery, options } = this.bqMapping.get(profileName); const params = {}; bindParams.forEach((value, key) => { params[key.replace('@', '')] = value; }); try { const builtSQL = (0, bqlSqlBuilder_1.buildSQL)(sql, operations); const queryOptions = { query: builtSQL, location: (options === null || options === void 0 ? void 0 : options.location) || 'US', params, maxResults: (options === null || options === void 0 ? void 0 : options.chunkSize) || 100, }; const [job] = yield bigQuery.createQueryJob(queryOptions); return yield this.getResultFromQueryJob(job, options); } catch (e) { this.logger.debug(`Errors occurred, release connection from ${profileName}`); throw e; } }); } prepare({ parameterIndex }) { return tslib_1.__awaiter(this, void 0, void 0, function* () { return `@p${parameterIndex}`; }); } getResultFromQueryJob(queryJob, options) { return tslib_1.__awaiter(this, void 0, void 0, function* () { const { chunkSize = 100 } = options || {}; const fetchJobResult = this.fetchJobResult.bind(this); const firstChunk = yield fetchJobResult(queryJob, chunkSize); // save first chunk in buffer for incoming requests let bufferedRows = [...firstChunk.rows]; let bufferReadIndex = 0; let nextQuery = firstChunk.nextQuery; const fetchNext = () => tslib_1.__awaiter(this, void 0, void 0, function* () { if (bufferReadIndex >= bufferedRows.length) { if (nextQuery == null) return null; const fetchData = yield fetchJobResult(queryJob, chunkSize, nextQuery); bufferedRows = fetchData.rows; nextQuery = fetchData.nextQuery; bufferReadIndex = 0; } const res = bufferedRows[bufferReadIndex] || null; bufferReadIndex += 1; return res; }); const stream = new stream_1.Readable({ objectMode: true, read() { fetchNext() .then((row) => { this.push(row); }) .catch((error) => { this.destroy(error); }); }, // automatically destroy() the stream when it emits 'finish' or errors. Node > 10.16 autoDestroy: true, }); return { getColumns: () => { var _a, _b; const fields = ((_b = (_a = firstChunk.apiResponse) === null || _a === void 0 ? void 0 : _a.schema) === null || _b === void 0 ? void 0 : _b.fields) || []; return fields.map((field) => ({ name: field.name || '', type: (0, typeMapper_1.mapFromBQTypeId)(field.type || ''), })); }, getData: () => stream, }; }); } fetchJobResult(queryJob, chunkSize, nextQuery) { return tslib_1.__awaiter(this, void 0, void 0, function* () { return new Promise((resolve, reject) => { queryJob.getQueryResults(nextQuery || { maxResults: chunkSize }, (err, rows, nextQuery, apiResponse) => { if (err) { return reject(err); } resolve({ rows: rows || [], nextQuery, apiResponse }); }); }); }); } checkProfileExist(profileName) { if (!this.bqMapping.has(profileName)) { throw new core_1.InternalError(`Profile instance ${profileName} not found`); } } runJobAndWait(job) { return tslib_1.__awaiter(this, void 0, void 0, function* () { // Wait for the job to complete let [jobResult] = yield job.getMetadata(); while (jobResult.status.state !== 'DONE') { [jobResult] = yield job.getMetadata(); } }); } }; BQDataSource = tslib_1.__decorate([ (0, core_1.VulcanExtensionId)('bq') ], BQDataSource); exports.BQDataSource = BQDataSource; //# sourceMappingURL=bqDataSource.js.map