compromise
Version:
natural language processing in the browser
310 lines (308 loc) • 3.99 kB
JavaScript
//a random copy+paste job from around the internet
//(dont mean to step on any toes)
//some countries have a higher lastname-signal than others
//this list is further augmented by some regexps, over in ./data/punct_rules.js
// https://en.wikipedia.org/wiki/List_of_most_common_surnames_in_Europe
module.exports = [
'lee',
'li',
'zhang',
'wang',
'nguyen',
'garcia',
'gonzalez',
'hernandez',
'smirnov',
'muller',
'wong',
'cheung',
'liu',
'lau',
'chen',
'chan',
'yang',
'yeung',
'huang',
'zhao',
'chiu',
'wu',
'zhou',
'chow',
'xu',
'tsui',
'zhu',
'hu',
'guo',
'gao',
'kwok',
'luo',
'devi',
'singh',
'kumar',
'das',
'kaur',
'sato',
'suzuki',
'takahashi',
'tanaka',
'watanabe',
'ito',
'yamamoto',
'nakamura',
'kobayashi',
'kato',
'yoshida',
'yamada',
'sasaki',
'yamaguchi',
'saito',
'matsumoto',
'inoue',
'kimura',
'hayashi',
'shimizu',
'yamazaki',
'ikeda',
'hashimoto',
'yamashita',
'ishikawa',
'nakajima',
'maeda',
'fujita',
'ogawa',
'harris',
'thompson',
'martinez',
'robinson',
'rodriguez',
'walker',
'wright',
'lopez',
'carter',
'perez',
'roberts',
'turner',
'phillips',
'parker',
'evans',
'edwards',
'collins',
'sanchez',
'morris',
'rogers',
'bailey',
'rivera',
'cooper',
'richardson',
'cox',
'torres',
'peterson',
'ramirez',
'brooks',
'sanders',
'bennett',
'barnes',
'henderson',
'coleman',
'jenkins',
'perry',
'powell',
'patterson',
'hughes',
'flores',
'simmons',
'foster',
'bryant',
'hayes',
'smith',
'jones',
'williams',
'miller',
'taylor',
'wilson',
'davis',
'clark',
'hall',
'thomas',
'moore',
'anderson',
'allen',
'lewis',
'jackson',
'adams',
'tryniski',
'campbell',
'gruber',
'huber',
'bauer',
'wagner',
'pichler',
'steiner',
'mammadov',
'aliyev',
'hasanov',
'ivanou',
'ivanov',
'kazlov',
'peeters',
'janssens',
'dimitrov',
'horvat',
'neilson',
'jensen',
'hansen',
'pedersen',
'andersen',
'christensen',
'larsen',
'vassiljev',
'petrov',
'kuznetsov',
'mihhailov',
'pavlov',
'semjonov',
'andrejev',
'aleksejev',
'johansson',
'nyman',
'lindholm',
'karlsson',
'andersson',
'dubois',
'durand',
'leroy',
'moreau',
'lefebvre',
'lefevre',
'roux',
'fournier',
'mercier',
'schmidt',
'schneider',
'fischer',
'meyer',
'weber',
'schulz',
'becker',
'hoffmann',
'kovacs',
'szabo',
'toth',
'nagy',
'byrne',
'murray',
'sullivan',
'rossi',
'russo',
'esposito',
'ricci',
'marino',
'klein',
'nowak',
'silva',
'santos',
'fernandez',
'ruiz',
'jimenez',
'alvarez',
'moreno',
'muñoz',
'alonso',
'gutierrez',
'romero',
'navarro',
'dominguez',
'gil',
'vazquez',
'serrano',
'ramos',
'blanco',
'sanz',
'castro',
'suarez',
'ortega',
'rubio',
'molina',
'delgado',
'morales',
'ortiz',
'marin',
'iglesias',
'boyko',
'davies',
'clarke',
'johnson',
'oliveira',
'sosa',
'rojas',
'munoz',
'diaz',
'gomez',
'xiao',
'tian',
'bahk',
'pahk',
'chung',
'jung',
'joung',
'chong',
'cheong',
'choung',
'choi',
'che',
'choy',
'chwe',
'yeun',
'yun',
'jhang',
'chang',
'cheon',
'kwon',
'soung',
'bhang',
'bahng',
'pahng',
'phang',
'kahn',
'tran',
'pham',
'huynh',
'hoang',
'phan',
'patel',
//these are famous ones
'mozart',
'bach',
'beethoven',
'nixon',
'vivaldi',
'obama',
'reagan',
'lenin',
'stalin',
'hitler',
'mussolini',
'kennedy',
'lincoln',
'gandhi',
'thatcher',
'orwell',
'darwin',
'einstein',
'picasso',
'edison',
'roosevelt',
'tolstoy',
'hemingway',
'hitchcock',
'messi',
'beckham',
'cohen',
]
// let obj = {}
// module.exports.forEach((str) => {
// if (obj[str]) {
// console.log(str)
// }
// obj[str] = true
// })