lande
Version:
A tiny neural network for natural language detection.
41 lines (29 loc) • 1.23 kB
text/typescript
/* IMPORT */
import path from 'node:path';
import type {Config} from './types';
/* MAIN */
const DATASET_PATH = path.join ( process.cwd (), 'resources', 'dataset.csv' );
const DATASET_BENCHMARK_LENGTH_MIN = 35;
const DATASET_BENCHMARK_LIMIT = 1000;
const DATASET_TRAIN_LENGTH_MIN = 40;
const DATASET_TRAIN_LIMIT = 9000;
const DATASET_TEST_LENGTH_MIN = 35;
const DATASET_TEST_LIMIT = 30000;
const DATASET_TRAIN_PERC = 0.8;
const CONFIGS: Config[] = [
{
id: 't50',
langs: ['eng', 'deu', 'fra', 'ita', 'rus', 'tur', 'fin', 'heb', 'hun', 'jpn', 'nld', 'pol', 'por', 'spa', 'ukr', 'ces', 'cmn', 'dan', 'lit', 'mar', 'mkd', 'swe', 'ara', 'ell', 'pes', 'ron', 'srp', 'bel', 'bul', 'ckb', 'hau', 'hin', 'ind', 'isl', 'kor', 'nob', 'slk', 'tgl', 'vie', 'aze', 'ben', 'cat', 'eus', 'hrv', 'hye', 'kat', 'run', 'afr', 'est', 'kaz'].sort (),
network: {
batchSize: 100,
epochs: 4,
unigrams: 120,
bigrams: 175,
trigrams: 175,
quadgrams: 150,
hidden: 120
}
}
];
/* EXPORT */
export {DATASET_PATH, DATASET_BENCHMARK_LENGTH_MIN, DATASET_BENCHMARK_LIMIT, DATASET_TRAIN_LENGTH_MIN, DATASET_TRAIN_LIMIT, DATASET_TEST_LENGTH_MIN, DATASET_TEST_LIMIT, DATASET_TRAIN_PERC,CONFIGS};