@genkit-ai/vertexai
Version:
Genkit AI framework plugin for Google Cloud Vertex AI APIs including Gemini APIs, Imagen, and more.
68 lines • 2.06 kB
JavaScript
import { z } from "genkit";
import { logger } from "genkit/logging";
import { Document, DocumentDataSchema } from "genkit/retriever";
const getBigQueryDocumentRetriever = (bq, tableId, datasetId) => {
const bigQueryRetriever = async (neighbors) => {
const ids = neighbors.map((neighbor) => neighbor.datapoint?.datapointId).filter(Boolean);
const query = `
SELECT * FROM \`${datasetId}.${tableId}\`
WHERE id IN UNNEST(@ids)
`;
const options = {
query,
params: { ids }
};
let rows;
try {
[rows] = await bq.query(options);
} catch (queryError) {
logger.error("Failed to execute BigQuery query:", queryError);
return [];
}
const documents = [];
for (const row of rows) {
try {
const docData = {
content: JSON.parse(row.content)
};
if (row.metadata) {
docData.metadata = JSON.parse(row.metadata);
}
const parsedDocData = DocumentDataSchema.parse(docData);
documents.push(new Document(parsedDocData));
} catch (error) {
const id = row.id;
const errorPrefix = `Failed to parse document data for document with ID ${id}:`;
if (error instanceof z.ZodError || error instanceof Error) {
logger.warn(`${errorPrefix} ${error.message}`);
} else {
logger.warn(errorPrefix);
}
}
}
return documents;
};
return bigQueryRetriever;
};
const getBigQueryDocumentIndexer = (bq, tableId, datasetId) => {
const bigQueryIndexer = async (docs) => {
const ids = [];
const rows = docs.map((doc) => {
const id = Math.random().toString(36).substring(7);
ids.push(id);
return {
id,
content: JSON.stringify(doc.content),
metadata: JSON.stringify(doc.metadata)
};
});
await bq.dataset(datasetId).table(tableId).insert(rows);
return ids;
};
return bigQueryIndexer;
};
export {
getBigQueryDocumentIndexer,
getBigQueryDocumentRetriever
};
//# sourceMappingURL=bigquery.mjs.map