miracFence commited on
Commit
c3cb17c
·
1 Parent(s): fbe6e07

Upload initial test

Browse files
Files changed (1) hide show
  1. app.py +174 -0
app.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """ABSTRACTGEN_ES FINAL.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1XdfeMcdDbRuRmOGGiOmkiCP9Yih5JXyF
8
+
9
+ # installs
10
+ """
11
+
12
+ ! pip install gpt_2_simple
13
+ ! pip install tensorflow-estimator==1.15.1
14
+ ! pip install gradio
15
+ ! pip install huggingface_hub
16
+ ! pip install easynmt
17
+ ! pip install -U sentence-transformers
18
+
19
+ !curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash
20
+ !sudo apt-get install git-lfs
21
+
22
+ !git lfs install
23
+ !git clone https://huggingface.co/franz96521/AbstractGeneratorES
24
+
25
+ # Commented out IPython magic to ensure Python compatibility.
26
+ # %cd '/content/AbstractGeneratorES'
27
+
28
+ """# Init"""
29
+
30
+ import gpt_2_simple as gpt2
31
+ import os
32
+ import tensorflow as tf
33
+ import pandas as pd
34
+ import re
35
+
36
+ model_name = "124M"
37
+ if not os.path.isdir(os.path.join("models", model_name)):
38
+ print(f"Downloading {model_name} model...")
39
+ gpt2.download_gpt2(model_name=model_name)
40
+
41
+ path = 'AbstractGenerator/'
42
+ checkpoint_dir =path+'weights/'
43
+ data_path = path+'TrainigData/'
44
+
45
+
46
+
47
+ file_name_en = 'en'
48
+ file_path_en = data_path+file_name_en
49
+
50
+ file_name_es = 'es'
51
+ file_path_es = data_path+file_name_es
52
+
53
+
54
+ prefix= '<|startoftext|>'
55
+ sufix ='<|endoftext|>'
56
+
57
+ import gradio as gr
58
+ import random
59
+ from easynmt import EasyNMT
60
+
61
+ from sentence_transformers import SentenceTransformer, util
62
+
63
+ def generateAbstract(text):
64
+ tf.compat.v1.reset_default_graph()
65
+ sess = gpt2.start_tf_sess()
66
+ gpt2.load_gpt2(sess,checkpoint_dir=checkpoint_dir,run_name='run1')
67
+ txt = gpt2.generate(sess,prefix=str(text)+"\nABSTRACT", return_as_list=True,truncate=sufix,checkpoint_dir=checkpoint_dir,nsamples=1)[0]
68
+ return txt
69
+ def removeAbstract(text):
70
+ p = text.find("Introducción")
71
+ p2 = text.find("INTRODUCCIÓN")
72
+ print(p,p2)
73
+ if(p != -1):
74
+ return (text[:p] , text[p:] )
75
+ if(p2 != -1):
76
+ return (text[:p2] , text[p2:] )
77
+
78
+ def generated_similarity(type_of_input, cn_text):
79
+ if(type_of_input == "English"):
80
+ tf.compat.v1.reset_default_graph()
81
+ model2 = EasyNMT('opus-mt')
82
+ cn_text = model2.translate(cn_text, target_lang='es')
83
+
84
+
85
+ print(cn_text)
86
+ abstract_original , body = removeAbstract(cn_text)
87
+ tf.compat.v1.reset_default_graph()
88
+
89
+ generated_Abstract = generateAbstract(body)
90
+
91
+ sentences = [abstract_original, generated_Abstract]
92
+
93
+ model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
94
+
95
+ #Compute embedding for both lists
96
+ embedding_1= model.encode(sentences[0], convert_to_tensor=True)
97
+ embedding_2 = model.encode(sentences[1], convert_to_tensor=True)
98
+
99
+ generated_similarity = util.pytorch_cos_sim(embedding_1, embedding_2)
100
+ ## tensor([[0.6003]])
101
+ return f'''TEXTO SIN ABSTRACT\n
102
+ {body}\n
103
+ ABSTRACT ORIGINAL\n
104
+ {abstract_original}\n
105
+ ABSTRACT GENERADO\n
106
+ {generated_Abstract}\n
107
+ SIMILARIDAD DE ABSTRACT: {float(round(generated_similarity.item()*100, 3))}%
108
+ '''
109
+ elif type_of_input == "Spanish":
110
+ abstract_original , body = removeAbstract(cn_text)
111
+ tf.compat.v1.reset_default_graph()
112
+
113
+ generated_Abstract = generateAbstract(body)
114
+
115
+ sentences = [abstract_original, generated_Abstract]
116
+
117
+ model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
118
+
119
+ #Compute embedding for both lists
120
+ embedding_1= model.encode(sentences[0], convert_to_tensor=True)
121
+ embedding_2 = model.encode(sentences[1], convert_to_tensor=True)
122
+
123
+ generated_similarity = util.pytorch_cos_sim(embedding_1, embedding_2)
124
+ return f'''TEXTO SIN ABSTRACT\n
125
+ {body}\n
126
+ ABSTRACT ORIGINAL\n
127
+ {abstract_original}\n
128
+ ABSTRACT GENERADO\n
129
+ {generated_Abstract}\n
130
+ SIMILARIDAD DE ABSTRACT: {float(round(generated_similarity.item()*100, 3))}%
131
+ '''
132
+ def generated_abstract(type_of_input, cn_text):
133
+ if type_of_input == "English":
134
+ tf.compat.v1.reset_default_graph()
135
+ model2 = EasyNMT('opus-mt')
136
+ cn_text = model2.translate(cn_text, target_lang='es')
137
+ generated_Abstract = generateAbstract(cn_text)
138
+ return f'''TEXTO SIN ABSTRACT\n
139
+ {cn_text}\n
140
+ ABSTRACT GENERADO\n
141
+ {generated_Abstract}\n
142
+ '''
143
+ elif type_of_input == "Spanish":
144
+ tf.compat.v1.reset_default_graph()
145
+ generated_Abstract = generateAbstract(cn_text)
146
+ return f'''TEXTO SIN ABSTRACT\n
147
+ {cn_text}\n
148
+ ABSTRACT GENERADO\n
149
+ {generated_Abstract}\n
150
+ '''
151
+
152
+ block = gr.Blocks(theme="dark")
153
+
154
+ with block:
155
+ with gr.Tab("Full text and text similarity"):
156
+ type_of_input = gr.inputs.Radio(["English", "Spanish"], label="Input Language")
157
+ with gr.Row():
158
+ cn_text = gr.inputs.Textbox(placeholder="Full text", lines=7)
159
+ with gr.Row():
160
+ cn_results1 = gr.outputs.Textbox(label="Abstract generado")
161
+ cn_run = gr.Button("Run")
162
+ cn_run.click(generated_similarity, inputs=[type_of_input, cn_text], outputs=[cn_results1])
163
+
164
+ with gr.Tab("Only text with no abstract"):
165
+ gr.Markdown("Choose the disease(s) to predict:")
166
+ type_of_input = gr.inputs.Radio(["English", "Spanish"], label="Input Language")
167
+ with gr.Row():
168
+ cn_text = gr.inputs.Textbox(placeholder="Text without abstract", lines=7)
169
+ with gr.Row():
170
+ cn_results1 = gr.outputs.Textbox(label="Abstract generado")
171
+ cn_run = gr.Button("Run")
172
+ cn_run.click(generated_abstract, inputs=[type_of_input, cn_text], outputs=cn_results1)
173
+
174
+ block.launch(debug = True)