compromise
Version:
natural language processing in the browser
290 lines (282 loc) • 5.42 kB
JavaScript
;
const fns = require('../fns');
//uncompressed country names
let countries = [
'bahamas',
'bangladesh',
'belgium',
'brazil',
'burkina faso',
'burundi',
'cape verde',
'chile',
'comoros',
'congo-brazzaville',
'cuba',
'cote d\'ivoire',
'denmark',
'djibouti',
'ecuador',
'egypt',
'el salvador',
'fiji',
'france',
'germany',
'greece',
'guinea-bissau',
'haiti',
'honduras',
'hungary',
'iraq',
'israel',
'italy',
'jamaica',
'kenya',
'kuwait',
'laos',
'lesotho',
'libya',
'luxembourg',
'malawi',
'mali',
'malta',
'mexico',
'moldova',
'morocco',
'mozambique',
'netherlands',
'nicaragua',
'niger',
'panama',
'peru',
'solomon islands',
'sri lanka',
'suriname',
'sweden',
'timor-leste',
'turkey',
'u.s.a.',
'united kingdom',
'usa',
'ussr',
'vietnam',
'yemen',
'zimbabwe'
];
let compressed_countries = {
istan: 'pak,uzbek,afghan,tajik,turkmen',
ublic: 'czech rep,dominican rep,central african rep',
uinea: 'g,papua new g,equatorial g',
land: 'thai,po,switzer,fin,republic of ire,new zea,swazi,ice',
ania: 'tanz,rom,maurit,lithu,alb',
rica: 'ame,united states of ame,south af,costa ',
mbia: 'colo,za,ga',
eria: 'nig,alg,lib',
nia: 'arme,macedo,slove,esto',
sia: 'indone,rus,malay,tuni',
ina: 'ch,argent,bosnia and herzegov',
tan: 'kazakhs,kyrgyzs,bhu',
ana: 'gh,botsw,guy',
bia: 'saudi ara,ser,nami',
lia: 'austra,soma,mongo',
rea: 'south ko,north ko,erit',
dan: 'su,south su,jor',
ria: 'sy,aust,bulga',
ia: 'ind,ethiop,cambod,boliv,slovak,georg,croat,latv',
an: 'jap,ir,taiw,azerbaij,om',
da: 'ugan,cana,rwan',
us: 'belar,mauriti,cypr',
al: 'nep,seneg,portug',
in: 'spa,ben,bahra',
go: 'dr con,to,trinidad-toba',
la: 'venezue,ango,guatema',
es: 'united stat,philippin,united arab emirat',
on: 'camero,leban,gab',
ar: 'myanm,madagasc,qat',
ay: 'paragu,norw,urugu',
ne: 'ukrai,sierra leo,palesti'
};
countries = fns.uncompress_suffixes(countries, compressed_countries);
/////uncomressed cities
let cities = [
'aalborg',
'abu dhabi',
'ahmedabad',
'almaty',
'antwerp',
'aqaba',
'ashdod',
'ashgabat',
'athens',
'auckland',
'bogota',
'brno',
'brussels',
'calgary',
'cape town',
'cebu',
'cluj-napoca',
'curitiba',
'doha',
'dushanbe',
'espoo',
'frankfurt',
'genoa',
'ghent',
'giza',
'graz',
'guangzhou',
'haifa',
'hanoi',
'helsinki',
'ho chi minh',
'homs',
'iasi',
'innsbruck',
'i̇zmir',
'jakarta',
'kiev',
'kingston',
'klaipėda',
'kobe',
'kosice',
'krakow',
'la plata',
'luxembourg',
'medellín',
'mexico',
'miskolc',
'montevideo',
'montreal',
'moscow',
'nagoya',
'nis',
'odessa',
'oslo',
'ottawa',
'palermo',
'paris',
'perth',
'phnom penh',
'phoenix',
'port elizabeth',
'poznan',
'prague',
'reykjavik',
'riga',
'rome',
'rosario',
'seville',
'skopje',
'stockholm',
'stuttgart',
'sydney',
'tbilisi',
'tegucigalpa',
'the hague',
'thessaloniki',
'tokyo',
'toulouse',
'trondheim',
'tunis',
'turku',
'utrecht',
'vantaa',
'vasteras',
'warsaw',
'winnipeg',
'wroclaw',
'zagreb',
'zaragoza',
];
let suffix_compressed_cities = {
burg: 'saint peters,yekaterin,ham,til,gothen,salz',
ton: 'hous,edmon,welling,hamil',
ion: 'hauts-bassins reg,nord reg,herakl',
ana: 'hav,tir,ljublj',
ara: 'guadalaj,ank,timișo',
an: 'tehr,mil,durb,bus,tain,abidj,amm,yerev',
ia: 'philadelph,brasíl,alexandr,pretor,valenc,sof,nicos',
on: 'ly,lond,yang,inche,daeje,lisb',
en: 'shenzh,eindhov,pils,copenhag,berg',
ng: 'beiji,chittago,pyongya,kaohsiu,taichu',
in: 'tianj,berl,tur,dubl,duned',
es: 'los angel,nant,napl,buenos air,f',
la: 'pueb,mani,barranquil,kampa,guatema',
or: 'salvad,san salvad,ulan bat,marib',
us: 'damasc,pirae,aarh,vilni',
as: 'carac,patr,burg,kaun',
va: 'craio,petah tik,gene,bratisla',
ai: 'shangh,mumb,chenn,chiang m',
ne: 'colog,melbour,brisba,lausan',
er: 'manchest,vancouv,tangi',
ka: 'dha,osa,banja lu',
ro: 'rio de janei,sappo,cai',
am: 'birmingh,amsterd,rotterd',
ur: 'kuala lump,winterth,kopavog',
ch: 'muni,zuri,christchur',
na: 'barcelo,vien,var',
ma: 'yokoha,li',
ul: 'istanb,seo,kab',
to: 'toron,qui,por',
iv: 'khark,lv,tel av',
sk: 'dnipropetrov,gdan,min'
};
cities = fns.uncompress_suffixes(cities, suffix_compressed_cities);
let prefix_compressed_cities = {
'new ': 'delhi,york,taipei',
san: 'a\'a,tiago, josé',
ta: 'ipei,mpere,llinn,rtu',
ba: 'ngalore,ngkok,ku,sel',
li: 'verpool,ège,nz,massol',
ma: 'rseille,ndalay,drid,lmo',
be: 'rn,lgrade,irut',
ka: 'rachi,raj,ndy',
da: 'egu,kar,ugavpils',
ch: 'icago,arleroi,ișinau',
co: 'lombo,nstanta,rk',
bu: 'rsa,charest,dapest'
};
cities = fns.uncompress_prefixes(cities, prefix_compressed_cities);
//some of the busiest airports in the world from
//https://www.world-airport-codes.com/world-top-30-airports.html
let airports = [
'atl',
'pek',
'lhr',
'hnd',
'ord',
'lax',
'cdg',
'dfw',
'cgk',
'dxb',
'fra',
'hkg',
'den',
'bkk',
'ams',
'jfk',
'ist',
'sfo',
'clt',
'las',
'phx',
'iax',
'kul',
'mia',
'icn',
'muc',
'syd',
'fco',
'mco',
'bcn',
'yyz',
'lgw',
'phl',
];
module.exports = {
countries: countries,
cities: cities,
airports: airports
};