job-recognition
Version:
Library for finding all job titles in an arbitrary piece of text.
90 lines (84 loc) • 2.26 kB
JavaScript
var all = require( '../lib/allJobTitles' );
var _ = require( 'lodash' );
var pos = require( 'pos' );
var primary = [];
var modifier = [];
var single = [];
var stop = [];
var addToArray = function ( arr, itemUpper )
{
item = itemUpper.toLowerCase();
if ( item == itemUpper )
{
if ( ! stop.includes( item ) && item.search( /^[A-z]+$/ ) > -1 )
{
stop.push( item );
// console.log( '.', item );
}
return;
} else if ( itemUpper == itemUpper.toUpperCase() ) {
// this is likely an abbreviation for a title
if ( item.length > 1 && ! primary.includes( item ) )
{
primary.push( item );
// console.log( '*', item );
}
return;
}
if ( item.length < 2 ) return;
if ( ! arr.includes( item ) )
{
arr.push( item );
if ( arr == primary )
{
// console.log( '*', item );
} else {
// console.log( '-', item );
}
}
};
var processTitle = function ( title )
{
var titleWords = _.words( title );
if ( titleWords.length == 1 && ! single.includes( title.toLowerCase() ) )
{
single.push( title.toLowerCase() );
}
var words = new pos.Lexer().lex( titleWords.join( ' ' ) );
var tagger = new pos.Tagger();
var taggedWords = tagger.tag( words );
_.each( taggedWords, w =>
{
if ( w[ 1 ] == 'NNP' )
{
addToArray( primary, w[ 0 ] );
} else {
addToArray( modifier, w[ 0 ] );
}
});
};
_.each( all, title =>
{
var split = title.split( '(' );
if ( split.length > 1 )
{
var shortTitle = split[ 1 ].replace( ')', '' );
var abbrevAndSecondary = shortTitle.split( ',' );
processTitle( abbrevAndSecondary[ 0 ] );
title = title.replace( ' (' + shortTitle + ')', '' );
}
var titleAndSecondary = title.split( ',' );
processTitle( titleAndSecondary[ 0 ] );
});
primary.sort();
modifier.sort();
single.sort();
stop.sort();
console.log( '-- primary -------------------------------------' );
console.log( JSON.stringify( primary, null, '\t' ) );
console.log( '-- modifier ------------------------------------' );
console.log( JSON.stringify( modifier, null, '\t' ) );
console.log( '-- single --------------------------------------' );
console.log( JSON.stringify( single, null, '\t' ) );
console.log( '-- stop ----------------------------------------' );
console.log( JSON.stringify( stop, null, '\t' ) );