spacy-js
Version:
JavaScript API for spaCy with Python REST API
201 lines (172 loc) • 4.97 kB
JavaScript
import { getSimilarity } from './util'
export class Doc {
constructor(words, spaces, attrs = {}) {
this._doc = attrs.doc || {}
this._tokens = attrs.tokens || []
this._ents = attrs.ents || []
this._sents = attrs.sents || []
this._chunks = attrs.noun_chunks || []
this._model = attrs.model
this._api = attrs.api
this.tokens = words.map((word, i) => new Token(this, word, spaces[i], this._tokens[i]))
for (let i = 0; i < this.tokens.length; i++) {
this[i] = this.tokens[i]
}
this.cats = this._doc.cats
this.isTagged = this._doc.is_tagged
this.isParsed = this._doc.is_parsed
this.isNered = this._doc.is_nered
this.isSentenced = this._doc.is_sentenced
}
inspect() {
return this.text
}
get text() {
let text = ''
for (let token of this.tokens) {
text += token.textWithWs
}
return text
}
get length() {
return this.tokens.length
}
get ents() {
return this._ents.map(({ start, end, label }) => new Span(this, start, end, label))
}
get sents() {
return this._sents.map(({ start, end }) => new Span(this, start, end))
}
get nounChunks() {
return this._chunks.map(({ start, end }) => new Span(this, start, end))
}
*[Symbol.iterator]() {
let i = 0
while (this.tokens[i] !== undefined) {
yield this.tokens[i]
++i
}
}
toString() {
return this.text
}
map(func) {
let tokens = []
for (let token of this) {
tokens.push(func(token))
}
return tokens
}
slice(start, end) {
return new Span(this, start, end)
}
async similarity(obj) {
return await getSimilarity(this._api, this._model, this.text, obj.text)
}
}
export class Span {
constructor(doc, start, end, label) {
this.doc = doc
this.start = start
this.end = end
this._label = label
this.tokens = [...this.doc].slice(this.start, this.end)
for (let i = 0; i < this.tokens.length; i++) {
this[i] = this.tokens[0]
}
}
get text() {
let text = ''
for (let token of this.tokens) {
text += token.textWithWs
}
return text.trim()
}
get length() {
return this.tokens.length
}
get label() {
if (this._label) {
return this._label
}
// Manually check if span is an entity
for (let ent of this.doc.ents) {
if (ent.start === this.start && ent.end == this.end) {
return ent.label
}
}
}
*[Symbol.iterator]() {
let i = 0
while (this.tokens[i] !== undefined) {
yield this.tokens[i]
++i
}
}
slice(start, end) {
return new Span(this, start, end)
}
toString() {
return this.text
}
inspect() {
return this.text
}
async similarity(obj) {
return await getSimilarity(this.doc._api, this.doc._model, this.text, obj.text)
}
}
export class Token {
constructor(doc, word, space, attrs = {}) {
this.doc = doc
this.whitespace = space ? ' ' : ''
this.text = word
this.textWithWs = this.text + this.whitespace
this.orth = attrs.orth
this.i = attrs.i
this.entType = attrs.ent_type
this.entIob = attrs.ent_iob
this.lemma = attrs.lemma
this.norm = attrs.norm
this.lower = attrs.lower
;(this.shape = attrs.shape), (this.prefix = attrs.prefix)
this.suffix = attrs.suffix
this.pos = attrs.pos
this.tag = attrs.tag
this.dep = attrs.dep
this.isAlpha = attrs.is_alpha
this.isAscii = attrs.is_ascii
this.isDigit = attrs.is_digit
this.isLower = attrs.is_lower
this.isUpper = attrs.is_upper
this.isTitle = attrs.is_title
this.isPunct = attrs.is_punct
this.isLeftPunct = attrs.is_left_punct
this.isRightPunct = attrs.is_right_punct
this.isSpace = attrs.is_space
this.isBracket = attrs.is_bracket
this.isCurrency = attrs.is_currency
this.likeUrl = attrs.like_url
this.likeNum = attrs.like_num
this.likeEmail = attrs.like_email
this.isOov = attrs.is_oov
this.isStop = attrs.is_stop
this.isSentStart = attrs.is_sent_start
this._head = attrs.head
}
get length() {
return this.text.length
}
get head() {
return this.doc[this._head]
}
toString() {
return this.text
}
inspect() {
return this.text
}
async similarity(obj) {
return await getSimilarity(this.doc._api, this.doc._model, this.text, obj.text)
}
}