UNPKG

spacy-js

Version:

JavaScript API for spaCy with Python REST API

256 lines (218 loc) 6.71 kB
'use strict'; function _interopDefault (ex) { return (ex && (typeof ex === 'object') && 'default' in ex) ? ex['default'] : ex; } var fetch = _interopDefault(require('node-fetch')); var url = _interopDefault(require('url')); async function makeRequest(api, endpoint, opts, method = 'POST') { const headers = { 'Accept': 'application/json', 'Content-Type': 'application/json' }; const credentials = 'same-origin'; const body = JSON.stringify(opts); const apiUrl = url.resolve(api, endpoint); try { const res = await fetch(apiUrl, { method, headers, credentials, body }); return await res.json(); } catch(err) { console.log(`Error fetching data from API: ${api}`); } } async function getSimilarity(api, model, text1, text2) { const json = await makeRequest(api, '/similarity', { model, text1, text2 }); return json.similarity; } class Doc { constructor(words, spaces, attrs = {}) { this._doc = attrs.doc || {}; this._tokens = attrs.tokens || []; this._ents = attrs.ents || []; this._sents = attrs.sents || []; this._chunks = attrs.noun_chunks || []; this._model = attrs.model; this._api = attrs.api; this.tokens = words.map((word, i) => new Token(this, word, spaces[i], this._tokens[i])); for (let i = 0; i < this.tokens.length; i++) { this[i] = this.tokens[i]; } this.cats = this._doc.cats; this.isTagged = this._doc.is_tagged; this.isParsed = this._doc.is_parsed; this.isNered = this._doc.is_nered; this.isSentenced = this._doc.is_sentenced; } inspect() { return this.text } get text() { let text = ''; for (let token of this.tokens) { text += token.textWithWs; } return text } get length() { return this.tokens.length } get ents() { return this._ents.map(({ start, end, label }) => new Span(this, start, end, label)) } get sents() { return this._sents.map(({ start, end }) => new Span(this, start, end)) } get nounChunks() { return this._chunks.map(({ start, end }) => new Span(this, start, end)) } *[Symbol.iterator]() { let i = 0; while (this.tokens[i] !== undefined) { yield this.tokens[i]; ++i; } } toString() { return this.text } map(func) { let tokens = []; for (let token of this) { tokens.push(func(token)); } return tokens } slice(start, end) { return new Span(this, start, end) } async similarity(obj) { return await getSimilarity(this._api, this._model, this.text, obj.text) } } class Span { constructor(doc, start, end, label) { this.doc = doc; this.start = start; this.end = end; this._label = label; this.tokens = [...this.doc].slice(this.start, this.end); for (let i = 0; i < this.tokens.length; i++) { this[i] = this.tokens[0]; } } get text() { let text = ''; for (let token of this.tokens) { text += token.textWithWs; } return text.trim() } get length() { return this.tokens.length } get label() { if (this._label) { return this._label } // Manually check if span is an entity for (let ent of this.doc.ents) { if (ent.start === this.start && ent.end == this.end) { return ent.label } } } *[Symbol.iterator]() { let i = 0; while (this.tokens[i] !== undefined) { yield this.tokens[i]; ++i; } } slice(start, end) { return new Span(this, start, end) } toString() { return this.text } inspect() { return this.text } async similarity(obj) { return await getSimilarity(this.doc._api, this.doc._model, this.text, obj.text) } } class Token { constructor(doc, word, space, attrs = {}) { this.doc = doc; this.whitespace = space ? ' ' : ''; this.text = word; this.textWithWs = this.text + this.whitespace; this.orth = attrs.orth; this.i = attrs.i; this.entType = attrs.ent_type; this.entIob = attrs.ent_iob; this.lemma = attrs.lemma; this.norm = attrs.norm; this.lower = attrs.lower ;(this.shape = attrs.shape), (this.prefix = attrs.prefix); this.suffix = attrs.suffix; this.pos = attrs.pos; this.tag = attrs.tag; this.dep = attrs.dep; this.isAlpha = attrs.is_alpha; this.isAscii = attrs.is_ascii; this.isDigit = attrs.is_digit; this.isLower = attrs.is_lower; this.isUpper = attrs.is_upper; this.isTitle = attrs.is_title; this.isPunct = attrs.is_punct; this.isLeftPunct = attrs.is_left_punct; this.isRightPunct = attrs.is_right_punct; this.isSpace = attrs.is_space; this.isBracket = attrs.is_bracket; this.isCurrency = attrs.is_currency; this.likeUrl = attrs.like_url; this.likeNum = attrs.like_num; this.likeEmail = attrs.like_email; this.isOov = attrs.is_oov; this.isStop = attrs.is_stop; this.isSentStart = attrs.is_sent_start; this._head = attrs.head; } get length() { return this.text.length } get head() { return this.doc[this._head] } toString() { return this.text } inspect() { return this.text } async similarity(obj) { return await getSimilarity(this.doc._api, this.doc._model, this.text, obj.text) } } class Language { constructor(model, api = 'http://localhost:8080') { const self = this; return async function(text) { const { words, spaces, attrs } = await self.makeDoc(model, text, api); return new Doc(words, spaces, attrs); } } async makeDoc(model, text, api) { const json = await makeRequest(api, 'parse', { model, text }); const words = json.tokens.map(({ text }) => text); const spaces = json.tokens.map(({ whitespace }) => Boolean(whitespace)); return { words, spaces, attrs: Object.assign({}, json, { api }) } } } // Root of the module let Spacy = { load: function(model, api) { return new Language(model, api); } }; // Named exports Spacy.Doc = Doc; Spacy.Token = Token; Spacy.Span = Span; module.exports = Spacy;