|
import spacy |
|
from negspacy.negation import Negex |
|
from spacy.matcher import PhraseMatcher |
|
from spacy.tokens import Span |
|
|
|
def negation(model: spacy, entities: list): |
|
""" |
|
Take in the current model pipeline and add in Negation model. |
|
Add in entities to the negation model |
|
Parameters: |
|
model: spacy model |
|
entities: list of entities |
|
|
|
Returns: |
|
model: spacy model with Negation added to the pipeline |
|
""" |
|
if 'parser' in model.pipe_names: |
|
model.remove_pipe('parser') |
|
|
|
if 'sentencizer' not in model.pipe_names: |
|
model.add_pipe('sentencizer') |
|
|
|
if 'negex' not in model.pipe_names: |
|
model.add_pipe('negex',config=entities) |
|
|
|
return model |
|
|
|
def infer_negation(neg_model: spacy, model: spacy, text: str ,pred_doc: spacy): |
|
""" |
|
To match results from the negation model with the results from the model. |
|
Replace the entity type of the spans or tokens in the predictions doc |
|
that should be negated with entity type "NEG". |
|
|
|
Parameters: |
|
neg_model: spacy negation model |
|
model: spacy model |
|
text: text sample |
|
pred_doc: prediction of the text sample from model |
|
|
|
Returns: |
|
pred_doc: spacy doc with all entities that should be negated replaced with the "NEG" entity type |
|
|
|
""" |
|
doc = neg_model(text) |
|
results = {'ent':[],'start':[], 'end':[]} |
|
for e in doc.ents: |
|
rs = str(e._.negex) |
|
if rs == "True": |
|
results['ent'].append(e.text) |
|
results['start'].append(e.start) |
|
results['end'].append(e.end) |
|
print('Negation: ', results) |
|
|
|
patterns = [model.make_doc(text) for text in results['ent']] |
|
matcher = PhraseMatcher(model.vocab) |
|
matcher.add('NEG', None, *patterns) |
|
|
|
|
|
matches = matcher(pred_doc) |
|
seen_tokens = set() |
|
new_entities = [] |
|
entities = pred_doc.ents |
|
|
|
|
|
for match in results['start']: |
|
count = 0 |
|
for match_id, start, end in matches: |
|
if match == start: |
|
new_entities.append(Span(pred_doc, start, end, label=match_id)) |
|
entities = [ |
|
e for e in entities if not (e.start < end and e.end > start) |
|
] |
|
seen_tokens.update(range(start, end)) |
|
matches.pop(count) |
|
count += 1 |
|
pred_doc.ents = tuple(entities) + tuple(new_entities) |
|
|
|
return pred_doc |