Spaces:

Shredder
/

CONBERT-2

Runtime error

App Files Files Community

Shredder commited on Sep 6, 2022

Commit

4b31fb8

1 Parent(s): 10f176f

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -45

app.py CHANGED Viewed

@@ -13,8 +13,9 @@ import pandas as pd
 import en_core_web_sm
 from fincat_utils import extract_context_words
 from fincat_utils import bert_embedding_extract
 import pickle
-lr_clf = pickle.load(open("lr_clf_FiNCAT.pickle",'rb'))
 nlp = en_core_web_sm.load()
 nltk.download('punkt')
@@ -41,35 +42,7 @@ def get_sustainability(text):
 #SUSTAINABILITY ENDS
 #CLAIM STARTS
-def score_fincat(txt):
-  li = []
-  highlight = []
-  txt = " " + txt + " "
-  k = ''
-  for word in txt.split():
-    if any(char.isdigit() for char in word):
-      if word[-1] in ['.', ',', ';', ":", "-", "!", "?", ")", '"', "'"]:
-        k = word[-1]
-        word = word[:-1]
-      st = txt.find(" " + word + k + " ")+1
-      k = ''
-      ed = st + len(word)
-      x = {'paragraph' : txt, 'offset_start':st, 'offset_end':ed}
-      context_text = extract_context_words(x)
-      features = bert_embedding_extract(context_text, word)
-      if(features[0]=='None'):
-          highlight.append(('None', '    '))
-          return highlight
-      prediction = lr_clf.predict(features.reshape(1, 768))
-      prediction_probability = '{:.4f}'.format(round(lr_clf.predict_proba(features.reshape(1, 768))[:,1][0], 4))
-      highlight.append((word, '    In-claim' if prediction==1 else 'Out-of-Claim'))
-     # li.append([word,'    In-claim' if prediction==1 else 'Out-of-Claim', prediction_probability])
-    else:
-      highlight.append((word, '    '))
-  #headers = ['numeral', 'prediction', 'probability']
-  #dff = pd.DataFrame(li)
- # dff.columns = headers
-  return highlight
 ##Summarization
@@ -123,6 +96,9 @@ def load_questions_short():
     return questions_short
 questions = load_questions()
 questions_short = load_questions_short()
 def quad(query,file):
     with open(file.name) as f:
         paragraph = f.read()
@@ -132,6 +108,7 @@ def quad(query,file):
         print('getting predictions')
     predictions = run_prediction([query], paragraph, 'marshmellow77/roberta-base-cuad',n_best_size=5)
     answer = ""
     if predictions['0'] == "":
         answer = 'No answer found in document'
     else:
@@ -140,23 +117,11 @@ def quad(query,file):
             for i in range(1):
                 raw_answer=data['0'][i]['text']
                 answer += f"{data['0'][i]['text']} -- \n"
-                answer += f"Probability: {round(data['0'][i]['probability']*100,1)}%\n\n"
-    #summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
-    #resp = summarizer(answer)
-    #stext = resp[0]['summary_text']
-   # highlight,dff=score_fincat(answer)
-    return answer,summarize_text(answer),fin_ner(answer),score_fincat(answer),get_sustainability(answer),fls(answer)
-# b6 = gr.Button("Get Sustainability")
-              #b6.click(get_sustainability, inputs = text, outputs = gr.HighlightedText())
-#iface = gr.Interface(fn=get_sustainability, inputs="textbox", title="CONBERT",description="SUSTAINABILITY TOOL", outputs=gr.HighlightedText(), allow_flagging="never")
-#iface.launch()
-iface = gr.Interface(fn=quad, inputs=[gr.Dropdown(choices=questions,label='SEARCH QUERY'),gr.inputs.File(label='TXT FILE')], title="CONBERT",description="SUSTAINABILITY TOOL",article='Article', outputs=[gr.outputs.Textbox(label='Answer'),gr.outputs.Textbox(label='Summary'),gr.HighlightedText(label='NER'),gr.HighlightedText(label='CLAIM'),gr.HighlightedText(label='SUSTAINABILITY'),gr.HighlightedText(label='FLS')], allow_flagging="never")
 iface.launch()

 import en_core_web_sm
 from fincat_utils import extract_context_words
 from fincat_utils import bert_embedding_extract
+from score_fincat import score_fincat
 import pickle
+#lr_clf = pickle.load(open("lr_clf_FiNCAT.pickle",'rb'))
 nlp = en_core_web_sm.load()
 nltk.download('punkt')
 #SUSTAINABILITY ENDS
 #CLAIM STARTS
 ##Summarization
     return questions_short
 questions = load_questions()
 questions_short = load_questions_short()
 def quad(query,file):
     with open(file.name) as f:
         paragraph = f.read()
         print('getting predictions')
     predictions = run_prediction([query], paragraph, 'marshmellow77/roberta-base-cuad',n_best_size=5)
     answer = ""
+    answer_p=""
     if predictions['0'] == "":
         answer = 'No answer found in document'
     else:
             for i in range(1):
                 raw_answer=data['0'][i]['text']
                 answer += f"{data['0'][i]['text']} -- \n"
+                answer_p =answer+ f"Probability: {round(data['0'][i]['probability']*100,1)}%\n\n"
+    return answer_p,summarize_text(answer),fin_ner(answer),score_fincat(answer),get_sustainability(answer),fls(answer)
+iface = gr.Interface(fn=quad, inputs=[gr.Dropdown(choices=questions_short,label='SEARCH QUERY'),gr.inputs.File(label='TXT FILE')], title="CONBERT",description="CONTRACT REVIEW TOOL",article='Article', outputs=[gr.outputs.Textbox(label='Answer'),gr.outputs.Textbox(label='Summary'),gr.HighlightedText(label='NER'),gr.HighlightedText(label='CLAIM'),gr.HighlightedText(label='SUSTAINABILITY'),gr.HighlightedText(label='FLS')], allow_flagging="never")
 iface.launch()