@soulcraft/brainy
Version:
Universal Knowledge Protocolβ’ - World's first Triple Intelligence database unifying vector, graph, and document search in one API. 31 nouns Γ 40 verbs for infinite expressiveness.
273 lines (225 loc) β’ 7.96 kB
JavaScript
/**
* Download and bundle models for offline usage
*/
const fs = require('fs').promises
const path = require('path')
const MODEL_NAME = 'Xenova/all-MiniLM-L6-v2'
const OUTPUT_DIR = './models'
// Parse command line arguments for model type selection
const args = process.argv.slice(2)
const downloadType = args.includes('fp32') ? 'fp32' :
args.includes('q8') ? 'q8' : 'both'
async function downloadModels() {
// Use dynamic import for ES modules in CommonJS
const { pipeline, env } = await import('@huggingface/transformers')
// Configure transformers.js to use local cache
env.cacheDir = './models-cache'
env.allowRemoteModels = true
try {
console.log('π§ Brainy Model Downloader v2.8.0')
console.log('===================================')
console.log(` Model: ${MODEL_NAME}`)
console.log(` Type: ${downloadType} (fp32, q8, or both)`)
console.log(` Cache: ${env.cacheDir}`)
console.log('')
// Create output directory
await fs.mkdir(OUTPUT_DIR, { recursive: true })
// Download models based on type
if (downloadType === 'both' || downloadType === 'fp32') {
console.log('π₯ Downloading FP32 model (full precision, 90MB)...')
await downloadModelVariant('fp32')
}
if (downloadType === 'both' || downloadType === 'q8') {
console.log('π₯ Downloading Q8 model (quantized, 23MB)...')
await downloadModelVariant('q8')
}
// Copy ALL model files from cache to our models directory
console.log('π Copying model files to bundle directory...')
const cacheDir = path.resolve(env.cacheDir)
const outputDir = path.resolve(OUTPUT_DIR)
console.log(` From: ${cacheDir}`)
console.log(` To: ${outputDir}`)
// Copy the entire cache directory structure to ensure we get ALL files
// including tokenizer.json, config.json, and all ONNX model files
const modelCacheDir = path.join(cacheDir, 'Xenova', 'all-MiniLM-L6-v2')
if (await dirExists(modelCacheDir)) {
const targetModelDir = path.join(outputDir, 'Xenova', 'all-MiniLM-L6-v2')
console.log(` Copying complete model: Xenova/all-MiniLM-L6-v2`)
await copyDirectory(modelCacheDir, targetModelDir)
} else {
throw new Error(`Model cache directory not found: ${modelCacheDir}`)
}
console.log('β
Model bundling complete!')
console.log(` Total size: ${await calculateDirectorySize(outputDir)} MB`)
console.log(` Location: ${outputDir}`)
// Create a marker file with downloaded model info
const markerData = {
model: MODEL_NAME,
bundledAt: new Date().toISOString(),
version: '2.8.0',
downloadType: downloadType,
models: {}
}
// Check which models were downloaded
const fp32Path = path.join(outputDir, 'Xenova/all-MiniLM-L6-v2/onnx/model.onnx')
const q8Path = path.join(outputDir, 'Xenova/all-MiniLM-L6-v2/onnx/model_quantized.onnx')
if (await fileExists(fp32Path)) {
const stats = await fs.stat(fp32Path)
markerData.models.fp32 = {
file: 'onnx/model.onnx',
size: stats.size,
sizeFormatted: `${Math.round(stats.size / (1024 * 1024))}MB`
}
}
if (await fileExists(q8Path)) {
const stats = await fs.stat(q8Path)
markerData.models.q8 = {
file: 'onnx/model_quantized.onnx',
size: stats.size,
sizeFormatted: `${Math.round(stats.size / (1024 * 1024))}MB`
}
}
await fs.writeFile(
path.join(outputDir, '.brainy-models-bundled'),
JSON.stringify(markerData, null, 2)
)
console.log('')
console.log('β
Download complete! Available models:')
if (markerData.models.fp32) {
console.log(` β’ FP32: ${markerData.models.fp32.sizeFormatted} (full precision)`)
}
if (markerData.models.q8) {
console.log(` β’ Q8: ${markerData.models.q8.sizeFormatted} (quantized, 75% smaller)`)
}
console.log('')
console.log('Air-gap deployment ready! π')
} catch (error) {
console.error('β Error downloading models:', error)
process.exit(1)
}
}
// Download a specific model variant
async function downloadModelVariant(dtype) {
const { pipeline } = await import('@huggingface/transformers')
try {
// Load the model to force download
const extractor = await pipeline('feature-extraction', MODEL_NAME, {
dtype: dtype,
cache_dir: './models-cache'
})
// Test the model
const testResult = await extractor(['Hello world!'], {
pooling: 'mean',
normalize: true
})
console.log(` β
${dtype.toUpperCase()} model downloaded and tested (${testResult.data.length} dimensions)`)
// Dispose to free memory
if (extractor.dispose) {
await extractor.dispose()
}
} catch (error) {
console.error(` β Failed to download ${dtype} model:`, error)
throw error
}
}
async function findModelDirectories(baseDir, modelName) {
const dirs = []
try {
// Convert model name to expected directory structure
const modelPath = modelName.replace('/', '--')
async function searchDirectory(currentDir) {
try {
const entries = await fs.readdir(currentDir, { withFileTypes: true })
for (const entry of entries) {
if (entry.isDirectory()) {
const fullPath = path.join(currentDir, entry.name)
// Check if this directory contains model files
if (entry.name.includes(modelPath) || entry.name === 'onnx') {
const hasModelFiles = await containsModelFiles(fullPath)
if (hasModelFiles) {
dirs.push(fullPath)
}
}
// Recursively search subdirectories
await searchDirectory(fullPath)
}
}
} catch (error) {
// Ignore access errors
}
}
await searchDirectory(baseDir)
} catch (error) {
console.warn('Warning: Error searching for model directories:', error)
}
return dirs
}
async function containsModelFiles(dir) {
try {
const files = await fs.readdir(dir)
return files.some(file =>
file.endsWith('.onnx') ||
file.endsWith('.json') ||
file === 'config.json' ||
file === 'tokenizer.json'
)
} catch (error) {
return false
}
}
async function dirExists(dir) {
try {
const stats = await fs.stat(dir)
return stats.isDirectory()
} catch (error) {
return false
}
}
async function fileExists(file) {
try {
const stats = await fs.stat(file)
return stats.isFile()
} catch (error) {
return false
}
}
async function copyDirectory(src, dest) {
await fs.mkdir(dest, { recursive: true })
const entries = await fs.readdir(src, { withFileTypes: true })
for (const entry of entries) {
const srcPath = path.join(src, entry.name)
const destPath = path.join(dest, entry.name)
if (entry.isDirectory()) {
await copyDirectory(srcPath, destPath)
} else {
await fs.copyFile(srcPath, destPath)
}
}
}
async function calculateDirectorySize(dir) {
let size = 0
async function calculateSize(currentDir) {
try {
const entries = await fs.readdir(currentDir, { withFileTypes: true })
for (const entry of entries) {
const fullPath = path.join(currentDir, entry.name)
if (entry.isDirectory()) {
await calculateSize(fullPath)
} else {
const stats = await fs.stat(fullPath)
size += stats.size
}
}
} catch (error) {
// Ignore access errors
}
}
await calculateSize(dir)
return Math.round(size / (1024 * 1024))
}
// Run the download
downloadModels().catch(error => {
console.error('Fatal error:', error)
process.exit(1)
})