node-wordnet
Version:
Node.js interface for Wordnet
86 lines (70 loc) • 2.97 kB
text/coffeescript
## Copyright (c) 2011, Chris Umbel
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to deal
## in the Software without restriction, including without limitation the rights
## to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
## copies of the Software, and to permit persons to whom the Software is
## furnished to do so, subject to the following conditions:
##
## The above copyright notice and this permission notice shall be included in
## all copies or substantial portions of the Software.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
## OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
## THE SOFTWARE.
WordNetFile = require('./wordnet_file')
fs = require('fs')
util = require('util')
module.exports = class DataFile extends WordNetFile
constructor: (dataDir, name) ->
super(dataDir, 'data.' + name)
get: (location, callback) ->
self = @
buff = new Buffer(4096)
@open (err, fd) ->
return callback.call self, err, null if err?
@appendLineChar fd, location, 0, buff, (err, line) ->
return callback.call self, err, null if err?
data = line.split('| ')
tokens = data[0].split(/\s+/)
ptrs = []
wCnt = parseInt(tokens[3], 16)
synonyms = []
for i in [0..wCnt - 1] by 1
synonyms.push(tokens[4 + i * 2]);
ptrOffset = (wCnt - 1) * 2 + 6
for i in [0..parseInt(tokens[ptrOffset], 10) - 1] by 1
base = i * 4 + ptrOffset
ptrs.push {
pointerSymbol: tokens[base + 1]
synsetOffset: parseInt(tokens[base + 2], 10)
pos: tokens[base + 3]
sourceTarget: tokens[base + 4]
}
## break "gloss" into definition vs. examples
glossArray = data[1].split("; ")
definition = glossArray[0]
examples = glossArray.slice(1)
for element, k in examples
examples[k] = examples[k].replace(/\"/g,'').replace(/\s\s+/g,'')
synsetOffset = parseInt(tokens[0], 10)
if synsetOffset != location
return callback.call self, "Invalid synsetOffset: " + location, null
callback.call self, null, {
synsetOffset: parseInt(tokens[0], 10)
lexFilenum: parseInt(tokens[1], 10)
pos: tokens[2]
wCnt: wCnt
lemma: tokens[4]
synonyms: synonyms
lexId: tokens[5]
ptrs: ptrs
gloss: data[1]
def: definition
exp: examples
}