indinis
Version:
A storage library using LSM trees for storage and B-trees for indices with MVCC support
191 lines (151 loc) • 8.49 kB
text/typescript
//tssrc/test/analytical_queries.test.ts
import { Indinis, IndinisOptions, ColumnSchemaDefinition, ColumnType, sum, count } from '../index';
import * as fs from 'fs';
import * as path from 'path';
import { rimraf } from 'rimraf';
const delay = (ms: number) => new Promise(resolve => setTimeout(resolve, ms));
const s = (obj: any): string => JSON.stringify(obj);
const TEST_DATA_DIR_BASE = path.resolve(__dirname, '..', '..', '.test-data', 'indinis-analytical-queries');
// Test Data Interface
interface SalesRecord {
id?: string;
region: 'NA' | 'EU' | 'APAC';
product_category: 'electronics' | 'books' | 'apparel';
units_sold: number;
total_price: number;
}
describe('Indinis Analytical Queries (groupBy, aggregate)', () => {
let db: Indinis;
let testDataDir: string;
const salesPath = 'sales';
const salesSchema: ColumnSchemaDefinition = {
storePath: salesPath,
schemaVersion: 1,
columns: [
{ name: 'region', type: ColumnType.STRING, column_id: 1 },
{ name: 'product_category', type: ColumnType.STRING, column_id: 2 },
{ name: 'units_sold', type: ColumnType.INT64, column_id: 3 },
{ name: 'total_price', type: ColumnType.DOUBLE, column_id: 4 },
{ name: 'product_id', type: ColumnType.INT64, column_id: 5 }
]
};
// Sample data designed for easy-to-verify aggregations
const testData: (Omit<SalesRecord, 'id'> & { product_id: number })[] = [
{ region: 'NA', product_category: 'electronics', units_sold: 10, total_price: 1200.50, product_id: 101 },
{ region: 'NA', product_category: 'books', units_sold: 100, total_price: 1500.00, product_id: 401 },
{ region: 'EU', product_category: 'electronics', units_sold: 5, total_price: 800.00, product_id: 201 },
{ region: 'EU', product_category: 'apparel', units_sold: 50, total_price: 2500.00, product_id: 501 },
{ region: 'NA', product_category: 'electronics', units_sold: 8, total_price: 950.50, product_id: 102 },
{ region: 'APAC', product_category: 'books', units_sold: 200, total_price: 2200.00, product_id: 402 },
{ region: 'EU', product_category: 'electronics', units_sold: 7, total_price: 990.00, product_id: 202 },
];
jest.setTimeout(45000);
beforeAll(async () => {
await fs.promises.mkdir(TEST_DATA_DIR_BASE, { recursive: true });
});
beforeEach(async () => {
const randomSuffix = `${Date.now()}-${Math.random().toString(36).substring(7)}`;
testDataDir = path.join(TEST_DATA_DIR_BASE, `test-${randomSuffix}`);
await fs.promises.mkdir(testDataDir, { recursive: true });
console.log(`\n[ANALYTICAL TEST START] Using data directory: ${testDataDir}`);
// --- START OF FIX: Corrected Test Setup ---
// 1. Create the DB instance
db = new Indinis(testDataDir, { checkpointIntervalSeconds: 0 }); // Disable auto checkpoints for control
// 2. Register the schema BEFORE writing data. This is the most common use case
// and prevents the backfill race condition in this test.
await db.registerStoreSchema(salesSchema);
// 3. Write the data. Because a schema exists, `shadowInsert` will be called
// during this transaction, populating the columnar store's active buffer.
const salesStore = db.store<SalesRecord>(salesPath);
await db.transaction(async (tx) => {
for (let i = 0; i < testData.length; i++) {
// Add the missing product_id to the data being made
const docData = { ...testData[i], product_id: (i + 1) * 100 };
await salesStore.item(`sale${i + 1}`).make(docData);
}
});
// 4. Force a flush. In C++, this would be a debug API call. In this test,
// closing the database is the most reliable way to ensure the columnar
// write buffer is flushed to disk.
console.log("Setup: Closing DB to force flush of columnar write buffer...");
await db.close();
// 5. Re-open the database for the actual test. It will now load the
// `.cstore` file created in the previous step.
console.log("Setup: Re-opening DB for test execution...");
db = new Indinis(testDataDir);
await delay(1000); // Allow time for reopening and background threads to settle.
console.log("Test setup complete: Data written to both LSM and Columnar stores.");
// --- END OF FIX ---
});
afterEach(async () => {
if (db) await db.close();
if (fs.existsSync(testDataDir)) await rimraf(testDataDir);
});
afterAll(async () => {
if (fs.existsSync(TEST_DATA_DIR_BASE)) await rimraf(TEST_DATA_DIR_BASE);
});
it('should perform a simple aggregation (SUM) over the whole collection', async () => {
console.log("--- TEST: Total SUM of units_sold ---");
const results = await db.store<SalesRecord>(salesPath)
.query()
.aggregate({
total_units: sum('units_sold')
})
.take();
// Expected sum: 10 + 100 + 5 + 50 + 8 + 200 + 7 = 380
expect(results).toHaveLength(1);
expect(results[0].total_units).toBe(380);
console.log(" Verified: Correct total sum calculated.");
});
it('should perform a GROUP BY with SUM and COUNT aggregations', async () => {
console.log("--- TEST: GROUP BY region with SUM and COUNT ---");
const results = await db.store<SalesRecord>(salesPath)
.query()
.groupBy('region')
.aggregate({
total_revenue: sum('total_price'),
num_transactions: count('product_id') // Count on any non-null field
})
.take();
// Sort for deterministic test
results.sort((a, b) => a.region.localeCompare(b.region));
expect(results).toHaveLength(3);
// --- CORRECTED ASSERTIONS ---
// APAC: 1 transaction, total_price 2200.00
expect(results[0]).toEqual({ region: 'APAC', total_revenue: 2200.00, num_transactions: 1 });
// EU: 3 transactions, total_price 800.00 + 2500.00 + 990.00 = 4290.00
expect(results[1]).toEqual({ region: 'EU', total_revenue: 4290.00, num_transactions: 3 });
// NA: 3 transactions, total_price 1200.50 + 1500.00 + 950.50 = 3651.00
expect(results[2]).toEqual({ region: 'NA', total_revenue: 3651.00, num_transactions: 3 });
console.log(" Verified: Correct GROUP BY results for all regions.");
});
it('should apply a filter BEFORE performing the aggregation', async () => {
console.log("--- TEST: Filter before GROUP BY ---");
// Get total sales for only the 'electronics' category, grouped by region
const results = await db.store<SalesRecord>(salesPath)
.query()
.filter('product_category').equals('electronics') // This filter is applied first
.groupBy('region')
.aggregate({
electronics_revenue: sum('total_price')
})
.take();
results.sort((a, b) => a.region.localeCompare(b.region));
expect(results).toHaveLength(2); // Only EU and NA have electronics sales
// EU electronics revenue: 800.00 + 990.00 = 1790.00
expect(results[0]).toEqual({ region: 'EU', electronics_revenue: 1790.00 });
// NA electronics revenue: 1200.50 + 950.50 = 2151.00
expect(results[1]).toEqual({ region: 'NA', electronics_revenue: 2151.00 });
console.log(" Verified: Filter was correctly applied before aggregation.");
});
it('should throw an error if trying to use .one() with an aggregation', async () => {
const query = db.store<SalesRecord>(salesPath)
.query()
.groupBy('region')
.aggregate({ total: sum('units_sold') });
await expect(query.one()).rejects.toThrow(
".one() cannot be used with an aggregation query. Use .take() to get aggregation results."
);
console.log(" Verified: Correctly threw error for .one() on aggregation.");
});
});