Sample / Cuad_others.py
Shredder's picture
Upload Cuad_others.py
6b12e0b
raw
history blame
1.97 kB
from predict import run_prediction
from io import StringIO
import json
import spacy
from spacy import displacy
from transformers import pipeline
import torch
import nltk
nltk.download('punkt')
##Summarization
summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
def summarize_text(text):
resp = summarizer(text)
stext = resp[0]['summary_text']
return stext
##Company Extraction
ner=pipeline('ner',model='Jean-Baptiste/camembert-ner-with-dates',tokenizer='Jean-Baptiste/camembert-ner-with-dates', aggregation_strategy="simple")
def fin_ner(text):
replaced_spans = ner(text)
new_spans=[]
for item in replaced_spans:
item['entity']=item['entity_group']
del item['entity_group']
new_spans.append(item)
return {"text": text, "entities": new_spans}
#CUAD STARTS
def load_questions():
questions = []
with open('questions.txt') as f:
questions = f.readlines()
return questions
def load_questions_short():
questions_short = []
with open('questionshort.txt') as f:
questions_short = f.readlines()
return questions_short
def quad(query,file):
with open(file) as f:
paragraph = f.read()
questions = load_questions()
questions_short = load_questions_short()
if (not len(paragraph)==0) and not (len(query)==0):
print('getting predictions')
predictions = run_prediction([query], paragraph, 'marshmellow77/roberta-base-cuad',n_best_size=5)
answer = ""
answer_p=""
if predictions['0'] == "":
answer = 'No answer found in document'
else:
with open("nbest.json") as jf:
data = json.load(jf)
for i in range(1):
raw_answer=data['0'][i]['text']
answer += f"{data['0'][i]['text']}\n"
answer_p =answer+ f"Probability: {round(data['0'][i]['probability']*100,1)}%\n\n"
return answer,answer_p