node-vntokenizer
Version:
Tokenizer for Vietnamese in Nodejs and Javascript
185 lines (162 loc) • 3.32 kB
JavaScript
var LexerRule = require('./lexerRule');
/**
* Create a LexerToken
*
* @param rule
* a rule
* @param text
* the text
* @param line
* the line location of the text in a file
* @param column
* the column location of the text in a file
*/
var TaggedWord = function(rule, text, line, column) {
/**
* A lexer rule
*/
var rule = rule || null;
/**
* The text
*/
var text = text || '';
/**
* The line location of the text in the file
*/
var line = line || -1;
/**
* The column location of the text in the file
*/
var column = column || -1;
/**
* Create a lexer token from a text
*
* @param text
* a text
*/
public TaggedWord(String text) {
this.rule = null;
this.text = text;
this.line = -1;
this.column = -1;
}
/**
* Return the rule that matched this token
*
* @return the rule that match this token
*/
function getRule() {
return rule;
}
/**
* Return the text that matched by this token
*
* @return the text matched by this token
*/
function getText() {
return text.trim();
}
/**
* Test if this rule is a phrase rule. A phrase is processed
* by a lexical segmenter.
*
* @return true/false
*/
function isPhrase() {
return rule.getName() === "phrase" ? true : false;
}
function stringStartsWith (string, prefix) {
return string.slice(0, prefix.length) == prefix;
}
/**
* Test if this rule is a named entity rule.
*
* @return true/false
*/
function isNamedEntity() {
return stringStartsWith(rule.getName(), "name");
}
/**
* @return true/false
*/
function isDate() {
return stringStartsWith(rule.getName(), "date");
}
/**
* @return true/false
*/
function isDateDay() {
return stringStartsWith(rule.getName(), "day");
}
/**
* @return true/false
*/
function isDateMonth() {
return stringStartsWith(rule.getName(), "month");
}
function isDateYear() {
return stringStartsWith(rule.getName(), "year");
}
function isNumber() {
return stringStartsWith(rule.getName(), "number");
}
/**
* @return Returns the column.
*/
function getColumn() {
return column;
}
/**
* @param column
* The column to set.
*/
function setColumn(column) {
this.column = column;
}
/**
* @return Returns the line.
*/
function getLine() {
return line;
}
/**
* @param line
* The line to set.
*/
function setLine(line) {
this.line = line;
}
/**
* Return a string representation of the token
*/
function toString() {
// return "[\"" + text + "\"" + " at (" + line + "," + column + ")]";
// return rule.getName() + ": " + text;
return text.trim();
}
/* (non-Javadoc)
* @see java.lang.Object#hashCode()
*/
function hashCode() {
return getText().hashCode();
}
/* (non-Javadoc)
* @see java.lang.Object#equals(java.lang.Object)
*/
function equals(obj) {
if (obj == null) return false;
if (!(obj instanceof TaggedWord)) {
return false;
}
// two lexer is considered equal if their text are equal.
//
return ((TaggedWord)obj).getText() == (getText());
}
/* (non-Javadoc)
* @see java.lang.Comparable#compareTo(java.lang.Object)
*/
function compareTo(o) {
return getText() === o.getText();
}
}
module.exports = TaggedWord;