Shredder commited on
Commit
b6cb8b5
1 Parent(s): ef622a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -58
app.py CHANGED
@@ -4,41 +4,15 @@ import json
4
  import gradio as gr
5
  import spacy
6
  from spacy import displacy
7
- from transformers import RobertaTokenizer,pipeline
8
  import torch
9
  import nltk
10
- from nltk.tokenize import sent_tokenize
11
- from fin_readability_sustainability import BERTClass, do_predict
12
- import pandas as pd
13
- import en_core_web_sm
14
  from score_fincat import score_fincat
15
  from sus_fls import get_sustainability,fls
 
16
  nlp = en_core_web_sm.load()
17
  nltk.download('punkt')
18
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
19
 
20
-
21
-
22
-
23
- ##Summarization
24
- summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
25
- def summarize_text(text):
26
- resp = summarizer(text)
27
- stext = resp[0]['summary_text']
28
- return stext
29
-
30
-
31
- ##Company Extraction
32
- ner=pipeline('ner',model='Jean-Baptiste/camembert-ner-with-dates',tokenizer='Jean-Baptiste/camembert-ner-with-dates', aggregation_strategy="simple")
33
- def fin_ner(text):
34
- replaced_spans = ner(text)
35
- new_spans=[]
36
- for item in replaced_spans:
37
- item['entity']=item['entity_group']
38
- del item['entity_group']
39
- new_spans.append(item)
40
- return {"text": text, "entities": new_spans}
41
-
42
 
43
  #CUAD STARTS
44
  def load_questions():
@@ -46,41 +20,14 @@ def load_questions():
46
  with open('questions.txt') as f:
47
  questions = f.readlines()
48
  return questions
49
-
50
-
51
- def load_questions_short():
52
- questions_short = []
53
- with open('questionshort.txt') as f:
54
- questions_short = f.readlines()
55
- return questions_short
56
  questions = load_questions()
57
- questions_short = load_questions_short()
58
-
59
-
60
 
61
- def quad(query,file):
62
- with open(file.name) as f:
63
- paragraph = f.read()
64
- questions = load_questions()
65
- questions_short = load_questions_short()
66
- if (not len(paragraph)==0) and not (len(query)==0):
67
- print('getting predictions')
68
- predictions = run_prediction([query], paragraph, 'marshmellow77/roberta-base-cuad',n_best_size=5)
69
- answer = ""
70
- answer_p=""
71
- if predictions['0'] == "":
72
- answer = 'No answer found in document'
73
- else:
74
- with open("nbest.json") as jf:
75
- data = json.load(jf)
76
- for i in range(1):
77
- raw_answer=data['0'][i]['text']
78
- answer += f"{data['0'][i]['text']} -- \n"
79
- answer_p =answer+ f"Probability: {round(data['0'][i]['probability']*100,1)}%\n\n"
80
- return answer_p,summarize_text(answer),fin_ner(answer),score_fincat(answer),get_sustainability(answer),fls(answer)
81
 
82
 
83
- iface = gr.Interface(fn=quad, inputs=[gr.Dropdown(choices=questions_short,label='SEARCH QUERY'),gr.inputs.File(label='TXT FILE')], title="CONBERT",description="CONTRACT REVIEW TOOL",article='Article', outputs=[gr.outputs.Textbox(label='Answer'),gr.outputs.Textbox(label='Summary'),gr.HighlightedText(label='NER'),gr.HighlightedText(label='CLAIM'),gr.HighlightedText(label='SUSTAINABILITY'),gr.HighlightedText(label='FLS')], allow_flagging="never")
84
 
85
 
86
  iface.launch()
 
4
  import gradio as gr
5
  import spacy
6
  from spacy import displacy
 
7
  import torch
8
  import nltk
 
 
 
 
9
  from score_fincat import score_fincat
10
  from sus_fls import get_sustainability,fls
11
+ from Cuad_others import quad,summarize_text,fin_ner
12
  nlp = en_core_web_sm.load()
13
  nltk.download('punkt')
14
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  #CUAD STARTS
18
  def load_questions():
 
20
  with open('questions.txt') as f:
21
  questions = f.readlines()
22
  return questions
 
 
 
 
 
 
 
23
  questions = load_questions()
 
 
 
24
 
25
+ def mainFun(query,file):
26
+ answer,answer_p=quad(query,file.name)
27
+ return answer_p,summarize_text(answer),fin_ner(answer),score_fincat(answer),get_sustainability(answer),fls(answer)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
 
30
+ iface = gr.Interface(fn=mainFun, inputs=[gr.Dropdown(choices=questions_short,label='SEARCH QUERY'),gr.inputs.File(label='TXT FILE')], title="CONBERT",description="CONTRACT REVIEW TOOL",article='Article', outputs=[gr.outputs.Textbox(label='Answer'),gr.outputs.Textbox(label='Summary'),gr.HighlightedText(label='NER'),gr.HighlightedText(label='CLAIM'),gr.HighlightedText(label='SUSTAINABILITY'),gr.HighlightedText(label='FLS')], allow_flagging="never")
31
 
32
 
33
  iface.launch()