Reputation: 41
I am using Spacy for a model of recognition of named entities, Update of the Recognition of the Named Entity the documentation provided me with this code to update an existing model, this code only specifies the model that will be used as a base, the place where it would be stored and the Number of iterations
from __future__ import unicode_literals, print_function
import plac
import random
from pathlib import Path
import spacy
from spacy.util import minibatch, compounding
from pathlib import Path
# training data
TRAIN_DATA = [
("', 137 S Wilmington Street Raleigh, NC 27601 919.239.4070\t, • Server: Brian 20/1 Guests: 8\t10/01/2018 1:11 PM 20014, L Chicken Arti Pizza\t10.99, Subtotal Tax\t10.99 0.91, Total\t11.90, Balance Due\t11.90, Gratuity Suggestions To Help:, 20% = 2.20 18% = 1.2L,, 115% = 1.65 |f ,9., '?", {"entities": [(3, 19, "ORG")]}),
("Carolina Ale House, G1enwood, 0516 Table 23 #Party 1 JORDYN M SvrCk: 27 7:42p 09/30/18, Separate checks: 3-of-7\t, 2 Carolina Hurrlca\t15.50, 1 Smoked Cheddar Burger\t9.79, Sub Total:\t25.29, Tax:\t2.08, Sub Total:\t27.37, 20X GRATUIT\t5.06, 09/30 10:36pTO TAI : 32\t, D i d you enjoy Every delicious Bite’? Come back to See us and bring your friends*, You are always Welcome at our, House>", {"entities": [(8, 18, "ORG")]}),
(", P~ l-LMl NG *, PRIME STEAKHOUSE 8, WINE BAR, Kalelyh, nr 27612 919-571-6200, Sgj*1® IABIE 51\t6, UlER1 H SvrCk: 5 8:04p 10/02/18, 1\tBlueheny Lemon Drop, ^ Corona, 2\tCraft Beer 2 2 120 Tomahawk 1 Pork Chop 1 Scottish Salmon 4 Prime Dessert, 13.00, 35.00 14.50, 240.00, 40.00, 44.00 0.00, Sub Total: 386.50 „\tTax:\t31.89, 10/02 9:59pTOTAL :\t418.39, www.F1emingsSteakhouse.com ) rials'., Dine Rewards account not attached, Not a Dine Rewards member?, Join now at DINE-REWARDS.COM, ", {"entities": [(17, 35, "ORG")]}),
("Flying Saucer Draught Emporium, 328 Morgan Raleigh, NC, Server: Hope 10/30/7 Guests: 0, 10/04/2018 8:26 PM 20068, L10- Cocktail, 8.00, L10- Classic Daiquiri 1/2 Nacho Libre-r L10- Liqueur, L10- Baily’s Irish Cream L10- Rocks, Subtotal, Tax, Total, 5.50, 8.00, 21.50, 0.45, 21.95, Balance Due\t21., T»p: 3,zT., If you pay with debit card, your bank may hold additional funds temporarily. This is not a charge from Flying Saucer, www. beerknurd .com Taxi Taxi - 919.333.3333", {"entities": [(0, 30, "ORG")]}),
]
@plac.annotations(
model=("Model name. Defaults to blank 'en' model.", "option", "m", str),
output_dir=("Optional output directory", "option", "o", Path),
n_iter=("Number of training iterations", "option", "n", int),
)
def main(model='en_core_web_sm', output_dir=None, n_iter=100):
"""Load the model, set up the pipeline and train the entity recognizer."""
nlp = spacy.load(model) # load existing spaCy model
print("Loaded model '%s'" % model)
# create the built-in pipeline components and add them to the pipeline
# nlp.create_pipe works for built-ins that are registered with spaCy
if "ner" not in nlp.pipe_names:
ner = nlp.create_pipe("ner")
nlp.add_pipe(ner, last=True)
# otherwise, get it so we can add labels
else:
ner = nlp.get_pipe("ner")
# add labels
for _, annotations in TRAIN_DATA:
for ent in annotations.get("entities"):
ner.add_label(ent[2])
# get names of other pipes to disable them during training
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
with nlp.disable_pipes(*other_pipes): # only train NER
# reset and initialize the weights randomly – but only if we're
# training a new model
if model is None:
nlp.begin_training()
for itn in range(n_iter):
random.shuffle(TRAIN_DATA)
losses = {}
# batch up the examples using spaCy's minibatch
batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
for batch in batches:
texts, annotations = zip(*batch)
nlp.update(
texts, # batch of texts
annotations, # batch of annotations
drop=0.5, # dropout - make it harder to memorise data
losses=losses,
)
print("Losses", losses)
# test the trained model
for text, _ in TRAIN_DATA:
doc = nlp(text)
print("Entities", [(ent.text, ent.label_) for ent in doc.ents])
print("Tokens", [(t.text, t.ent_type_, t.ent_iob) for t in doc])
# save model to output directory
if output_dir is not None:
output_dir = Path(output_dir)
if not output_dir.exists():
output_dir.mkdir()
nlp.to_disk(output_dir)
print("Saved model to", output_dir)
# test the saved model
print("Loading from", output_dir)
nlp2 = spacy.load(output_dir)
for text, _ in TRAIN_DATA:
doc = nlp2(text)
print("Entities", [(ent.text, ent.label_) for ent in doc.ents])
print("Tokens", [(t.text, t.ent_type_, t.ent_iob) for t in doc])
if __name__ == "__main__":
plac.call(main('en_core_web_sm', Path.cwd(), 100))
After executing the code he showed me this error which I could not find any reference, even though the new model was generated, but when I tried it I only recognized the entities that were used as training (TRAIN_DATA) and I should also have recognized the entities of the base model 'en_core_web_sm' by Spacy.
Traceback (most recent call last): File "train.py", line 105, in <module> plac.call(main('en_core_web_sm', Path.cwd(), 100)) File "C:\ProgramData\Anaconda3\lib\site-packages\plac_core.py", line 324, in call parser = parser_from(obj) File "C:\ProgramData\Anaconda3\lib\site-packages\plac_core.py", line 133, in parser_from parser.populate_from(obj) File "C:\ProgramData\Anaconda3\lib\site-packages\plac_core.py", line 248, in populate_from self._set_func_argspec(func) File "C:\ProgramData\Anaconda3\lib\site-packages\plac_core.py", line 240, in _set_func_argspec self.argspec = getargspec(obj) File "C:\ProgramData\Anaconda3\lib\site-packages\plac_core.py", line 38, in getargspec str(callableobj)) TypeError: Could not determine the signature of None
Upvotes: 2
Views: 783
Reputation: 41
I solve this removing plac.
if __name__ == "__main__":
call(main('en_core_web_sm', Path.cwd(), 100))
Upvotes: 2