UNPKG

lande

Version:

A tiny neural network for natural language detection.

41 lines (29 loc) 1.23 kB
/* IMPORT */ import path from 'node:path'; import type {Config} from './types'; /* MAIN */ const DATASET_PATH = path.join ( process.cwd (), 'resources', 'dataset.csv' ); const DATASET_BENCHMARK_LENGTH_MIN = 35; const DATASET_BENCHMARK_LIMIT = 1000; const DATASET_TRAIN_LENGTH_MIN = 40; const DATASET_TRAIN_LIMIT = 9000; const DATASET_TEST_LENGTH_MIN = 35; const DATASET_TEST_LIMIT = 30000; const DATASET_TRAIN_PERC = 0.8; const CONFIGS: Config[] = [ { id: 't50', langs: ['eng', 'deu', 'fra', 'ita', 'rus', 'tur', 'fin', 'heb', 'hun', 'jpn', 'nld', 'pol', 'por', 'spa', 'ukr', 'ces', 'cmn', 'dan', 'lit', 'mar', 'mkd', 'swe', 'ara', 'ell', 'pes', 'ron', 'srp', 'bel', 'bul', 'ckb', 'hau', 'hin', 'ind', 'isl', 'kor', 'nob', 'slk', 'tgl', 'vie', 'aze', 'ben', 'cat', 'eus', 'hrv', 'hye', 'kat', 'run', 'afr', 'est', 'kaz'].sort (), network: { batchSize: 100, epochs: 4, unigrams: 120, bigrams: 175, trigrams: 175, quadgrams: 150, hidden: 120 } } ]; /* EXPORT */ export {DATASET_PATH, DATASET_BENCHMARK_LENGTH_MIN, DATASET_BENCHMARK_LIMIT, DATASET_TRAIN_LENGTH_MIN, DATASET_TRAIN_LIMIT, DATASET_TEST_LENGTH_MIN, DATASET_TEST_LIMIT, DATASET_TRAIN_PERC,CONFIGS};