Reputation: 9194
I am really confused why the Scorer.score is returning ents_p, ents_r, and ents_f as None for the below example. I am seeing something every similar with my own custom model and want to understand why it is returning None?
Example Scorer Code - Returning None for ents_p, ents_r, ents_f
import spacy
from spacy.scorer import Scorer
from spacy.tokens import Doc
from spacy.training.example import Example
examples = [
('Who is Talha Tayyab?',
{(7, 19, 'PERSON')}),
('I like London and Berlin.',
{(7, 13, 'LOC'), (18, 24, 'LOC')}),
('Agra is famous for Tajmahal, The CEO of Facebook will visit India shortly to meet Murari Mahaseth and to visit Tajmahal.',
{(0, 4, 'LOC'), (40, 48, 'ORG'), (60, 65, 'GPE'), (82, 97, 'PERSON'), (111, 119, 'GPE')})
]
def my_evaluate(ner_model, examples):
scorer = Scorer()
example = []
for input_, annotations in examples:
pred = ner_model(input_)
print(pred,annotations)
temp = Example.from_dict(pred, dict.fromkeys(annotations))
example.append(temp)
scores = scorer.score(example)
return scores
ner_model = spacy.load('en_core_web_sm') # for spaCy's pretrained use 'en_core_web_sm'
results = my_evaluate(ner_model, examples)
print(results)
Scorer Results
{'token_acc': 1.0, 'token_p': 1.0, 'token_r': 1.0, 'token_f': 1.0, 'sents_p': None, 'sents_r': None, 'sents_f': None, 'tag_acc': None, 'pos_acc': None, 'morph_acc': None, 'morph_micro_p': None, 'morph_micro_r': None, 'morph_micro_f': None, 'morph_per_feat': None, 'dep_uas': None, 'dep_las': None, 'dep_las_per_type': None, 'ents_p': None, 'ents_r': None, 'ents_f': None, 'ents_per_type': None, 'cats_score': 0.0, 'cats_score_desc': 'macro F', 'cats_micro_p': 0.0, 'cats_micro_r': 0.0, 'cats_micro_f': 0.0, 'cats_macro_p': 0.0, 'cats_macro_r': 0.0, 'cats_macro_f': 0.0, 'cats_macro_auc': 0.0, 'cats_f_per_type': {}, 'cats_auc_per_type': {}}
It is clearly picking out entities from the text
doc = ner_model('Agra is famous for Tajmahal, The CEO of Facebook will visit India shortly to meet Murari Mahaseth and to visit Tajmahal.')
for ent in doc.ents:
print(ent.text, ent.label_)
Output
Agra PERSON
Tajmahal ORG
Facebook ORG
India GPE
Murari Mahaseth PERSON
Tajmahal ORG
Upvotes: 0
Views: 432
Reputation: 9194
For those that are interested, here is the complete example that matches the input from @aab.
import spacy
from spacy.scorer import Scorer
from spacy.tokens import Doc
from spacy.training.example import Example
examples = [
('Who is Talha Tayyab?',
{"entities":[(7, 19, 'PERSON')]}),
('I like London and Berlin.',
{"entities":[(7, 13, 'LOC'), (18, 24, 'LOC')]}),
('Agra is famous for Tajmahal, The CEO of Facebook will visit India shortly to meet Murari Mahaseth and to visit Tajmahal.',
{"entities":[(0, 4, 'LOC'), (40, 48, 'ORG'), (60, 65, 'GPE'), (82, 97, 'PERSON'), (111, 119, 'GPE')]})
]
def my_evaluate(ner_model, examples):
scorer = Scorer()
example = []
for input_, annotations in examples:
pred = ner_model(input_)
print(pred,annotations)
temp = Example.from_dict(pred, annotations)
example.append(temp)
scores = scorer.score(example)
return scores
ner_model = spacy.load('en_core_web_sm') # for spaCy's pretrained use 'en_core_web_sm'
results = my_evaluate(ner_model, examples)
print(results)
Output
Who is Talha Tayyab? {'entities': [(7, 19, 'PERSON')]}
I like London and Berlin. {'entities': [(7, 13, 'LOC'), (18, 24, 'LOC')]}
Agra is famous for Tajmahal, The CEO of Facebook will visit India shortly to meet Murari Mahaseth and to visit Tajmahal. {'entities': [(0, 4, 'LOC'), (40, 48, 'ORG'), (60, 65, 'GPE'), (82, 97, 'PERSON'), (111, 119, 'GPE')]}
{'token_acc': 1.0, 'token_p': 1.0, 'token_r': 1.0, 'token_f': 1.0, 'sents_p': None, 'sents_r': None, 'sents_f': None, 'tag_acc': None, 'pos_acc': None, 'morph_acc': None, 'morph_micro_p': None, 'morph_micro_r': None, 'morph_micro_f': None, 'morph_per_feat': None, 'dep_uas': None, 'dep_las': None, 'dep_las_per_type': None, 'ents_p': 0.4444444444444444, 'ents_r': 0.5, 'ents_f': 0.47058823529411764, 'ents_per_type': {'PERSON': {'p': 0.6666666666666666, 'r': 1.0, 'f': 0.8}, 'GPE': {'p': 0.3333333333333333, 'r': 0.5, 'f': 0.4}, 'LOC': {'p': 0.0, 'r': 0.0, 'f': 0.0}, 'ORG': {'p': 0.3333333333333333, 'r': 1.0, 'f': 0.5}}, 'cats_score': 0.0, 'cats_score_desc': 'macro F', 'cats_micro_p': 0.0, 'cats_micro_r': 0.0, 'cats_micro_f': 0.0, 'cats_macro_p': 0.0, 'cats_macro_r': 0.0, 'cats_macro_f': 0.0, 'cats_macro_auc': 0.0, 'cats_f_per_type': {}, 'cats_auc_per_type': {}}
Upvotes: 0
Reputation: 11474
This line is the issue, the annotations are not added to the reference docs because they're not in the right format:
Example.from_dict(pred, dict.fromkeys(annotations))
The expected format is:
Example.from_dict(pred, {"entities": [(start, end, label), (start, end, label), ...]})
You can also use the built-in Language.evaluate
if you create examples where Example.predicted
is unannotated, which also creates the scorer based on your pipeline so you don't end up a lot of irrelevant None
scores:
Example.from_dict(nlp.make_doc(text), {"entities": [(start, end, label), (start, end, label), ...]})
Once you have these kinds of examples, run:
```python
scores = ner_model.evaluate(examples)
Upvotes: 2