spacy-js
Version:
JavaScript API for spaCy with Python REST API
429 lines (428 loc) • 11.2 kB
JavaScript
export const text = 'Hello world! This is a sentence about Facebook.'
export const words = [
'Hello',
'world',
'!',
'This',
'is',
'a',
'sentence',
'about',
'Facebook',
'.'
]
export const spaces = [true, false, true, true, true, true, true, true, false, false]
export const attrs = {
model: 'en_core_web_sm',
doc: {
text: 'Hello world! This is a sentence about Facebook.',
text_with_ws: 'Hello world! This is a sentence about Facebook.',
cats: {},
is_tagged: true,
is_parsed: true,
is_sentenced: true
},
ents: [
{
start: 8,
end: 9,
label: 'ORG'
}
],
sents: [
{
start: 0,
end: 3
},
{
start: 3,
end: 10
}
],
noun_chunks: [
{
start: 0,
end: 2
},
{
start: 5,
end: 7
},
{
start: 8,
end: 9
}
],
tokens: [
{
text: 'Hello',
text_with_ws: 'Hello ',
whitespace: ' ',
orth: 15777305708150031551,
i: 0,
ent_type: '',
ent_iob: 'O',
lemma: 'hello',
norm: 'hello',
lower: 'hello',
shape: 'Xxxxx',
prefix: 'H',
suffix: 'llo',
pos: 'INTJ',
tag: 'UH',
dep: 'intj',
is_alpha: true,
is_ascii: true,
is_digit: false,
is_lower: false,
is_upper: false,
is_title: true,
is_punct: false,
is_left_punct: false,
is_right_punct: false,
is_space: false,
is_bracket: false,
is_currency: false,
like_url: false,
like_num: false,
like_email: false,
is_oov: true,
is_stop: false,
is_sent_start: null,
head: 1
},
{
text: 'world',
text_with_ws: 'world',
whitespace: '',
orth: 1703489418272052182,
i: 1,
ent_type: '',
ent_iob: 'O',
lemma: 'world',
norm: 'world',
lower: 'world',
shape: 'xxxx',
prefix: 'w',
suffix: 'rld',
pos: 'NOUN',
tag: 'NN',
dep: 'ROOT',
is_alpha: true,
is_ascii: true,
is_digit: false,
is_lower: true,
is_upper: false,
is_title: false,
is_punct: false,
is_left_punct: false,
is_right_punct: false,
is_space: false,
is_bracket: false,
is_currency: false,
like_url: false,
like_num: false,
like_email: false,
is_oov: true,
is_stop: false,
is_sent_start: null,
head: 1
},
{
text: '!',
text_with_ws: '! ',
whitespace: ' ',
orth: 17494803046312582752,
i: 2,
ent_type: '',
ent_iob: 'O',
lemma: '!',
norm: '!',
lower: '!',
shape: '!',
prefix: '!',
suffix: '!',
pos: 'PUNCT',
tag: '.',
dep: 'punct',
is_alpha: false,
is_ascii: true,
is_digit: false,
is_lower: false,
is_upper: false,
is_title: false,
is_punct: true,
is_left_punct: false,
is_right_punct: false,
is_space: false,
is_bracket: false,
is_currency: false,
like_url: false,
like_num: false,
like_email: false,
is_oov: true,
is_stop: false,
is_sent_start: null,
head: 1
},
{
text: 'This',
text_with_ws: 'This ',
whitespace: ' ',
orth: 12943039165150086467,
i: 3,
ent_type: '',
ent_iob: 'O',
lemma: 'this',
norm: 'this',
lower: 'this',
shape: 'Xxxx',
prefix: 'T',
suffix: 'his',
pos: 'DET',
tag: 'DT',
dep: 'nsubj',
is_alpha: true,
is_ascii: true,
is_digit: false,
is_lower: false,
is_upper: false,
is_title: true,
is_punct: false,
is_left_punct: false,
is_right_punct: false,
is_space: false,
is_bracket: false,
is_currency: false,
like_url: false,
like_num: false,
like_email: false,
is_oov: true,
is_stop: false,
is_sent_start: true,
head: 4
},
{
text: 'is',
text_with_ws: 'is ',
whitespace: ' ',
orth: 3411606890003347522,
i: 4,
ent_type: '',
ent_iob: 'O',
lemma: 'be',
norm: 'is',
lower: 'is',
shape: 'xx',
prefix: 'i',
suffix: 'is',
pos: 'VERB',
tag: 'VBZ',
dep: 'ROOT',
is_alpha: true,
is_ascii: true,
is_digit: false,
is_lower: true,
is_upper: false,
is_title: false,
is_punct: false,
is_left_punct: false,
is_right_punct: false,
is_space: false,
is_bracket: false,
is_currency: false,
like_url: false,
like_num: false,
like_email: false,
is_oov: true,
is_stop: true,
is_sent_start: null,
head: 4
},
{
text: 'a',
text_with_ws: 'a ',
whitespace: ' ',
orth: 11901859001352538922,
i: 5,
ent_type: '',
ent_iob: 'O',
lemma: 'a',
norm: 'gonna',
lower: 'a',
shape: 'x',
prefix: 'a',
suffix: 'a',
pos: 'DET',
tag: 'DT',
dep: 'det',
is_alpha: true,
is_ascii: true,
is_digit: false,
is_lower: true,
is_upper: false,
is_title: false,
is_punct: false,
is_left_punct: false,
is_right_punct: false,
is_space: false,
is_bracket: false,
is_currency: false,
like_url: false,
like_num: false,
like_email: false,
is_oov: true,
is_stop: true,
is_sent_start: null,
head: 6
},
{
text: 'sentence',
text_with_ws: 'sentence ',
whitespace: ' ',
orth: 18108853898452662235,
i: 6,
ent_type: '',
ent_iob: 'O',
lemma: 'sentence',
norm: 'sentence',
lower: 'sentence',
shape: 'xxxx',
prefix: 's',
suffix: 'nce',
pos: 'NOUN',
tag: 'NN',
dep: 'attr',
is_alpha: true,
is_ascii: true,
is_digit: false,
is_lower: true,
is_upper: false,
is_title: false,
is_punct: false,
is_left_punct: false,
is_right_punct: false,
is_space: false,
is_bracket: false,
is_currency: false,
like_url: false,
like_num: false,
like_email: false,
is_oov: true,
is_stop: false,
is_sent_start: null,
head: 4
},
{
text: 'about',
text_with_ws: 'about ',
whitespace: ' ',
orth: 942632335873952620,
i: 7,
ent_type: '',
ent_iob: 'O',
lemma: 'about',
norm: 'about',
lower: 'about',
shape: 'xxxx',
prefix: 'a',
suffix: 'out',
pos: 'ADP',
tag: 'IN',
dep: 'prep',
is_alpha: true,
is_ascii: true,
is_digit: false,
is_lower: true,
is_upper: false,
is_title: false,
is_punct: false,
is_left_punct: false,
is_right_punct: false,
is_space: false,
is_bracket: false,
is_currency: false,
like_url: false,
like_num: false,
like_email: false,
is_oov: true,
is_stop: true,
is_sent_start: null,
head: 6
},
{
text: 'Facebook',
text_with_ws: 'Facebook',
whitespace: '',
orth: 8081970590932371665,
i: 8,
ent_type: 'ORG',
ent_iob: 'B',
lemma: 'facebook',
norm: 'facebook',
lower: 'facebook',
shape: 'Xxxxx',
prefix: 'F',
suffix: 'ook',
pos: 'PROPN',
tag: 'NNP',
dep: 'pobj',
is_alpha: true,
is_ascii: true,
is_digit: false,
is_lower: false,
is_upper: false,
is_title: true,
is_punct: false,
is_left_punct: false,
is_right_punct: false,
is_space: false,
is_bracket: false,
is_currency: false,
like_url: false,
like_num: false,
like_email: false,
is_oov: true,
is_stop: false,
is_sent_start: null,
head: 7
},
{
text: '.',
text_with_ws: '.',
whitespace: '',
orth: 12646065887601541794,
i: 9,
ent_type: '',
ent_iob: 'O',
lemma: '.',
norm: '.',
lower: '.',
shape: '.',
prefix: '.',
suffix: '.',
pos: 'PUNCT',
tag: '.',
dep: 'punct',
is_alpha: false,
is_ascii: true,
is_digit: false,
is_lower: false,
is_upper: false,
is_title: false,
is_punct: true,
is_left_punct: false,
is_right_punct: false,
is_space: false,
is_bracket: false,
is_currency: false,
like_url: false,
like_num: false,
like_email: false,
is_oov: true,
is_stop: false,
is_sent_start: null,
head: 4
}
]
}