Spaces: Runtime error
Runtime error
File size: 7,499 Bytes
0d3b770 097f973 09b648b 674de7c 6ca1b39 a14002e 674de7c d92143f e729578 d92143f 33aed6f 1f0641d d92143f 674de7c f4b6651 0b47caf f4b6651 547d8fd 37f1af6 0133ff1 04beb1a b221e2d 179044f 293a651 daf2603 ecd64c9 04beb1a b9e4a6a 6d812d3 daf2603 37f1af6 0b47caf f4b6651 b4b2d88 9414639 b4b2d88 386e89d fa28fed 386e89d b4b2d88 3efa602 b4b2d88 386e89d a0fa291 674de7c d92143f 5464361 fe19668 674de7c 31ac05c b4b2d88 386e89d 74cb3e0 2880c10 17bfcb1 a2d6739 c0c1816 33565d1 0973fd8 46a3d5c 2880c10 fa28fed 2880c10 0cb3636 17564aa c0c1816 093cfc0 0cb3636 fbed6bd 093cfc0 fbed6bd 093cfc0 80ad7b9 2880c10 c0c1816 2880c10 da91567 d43334a d92143f 9306c89 17c8342 d1dc4ab cbff511 094936a cbff511 d1dc4ab 16b77d9 0973fd8 d92143f 80dae17 efb637d d43334a 2880c10 d92143f 2880c10 f4b6651 a9a3f95 674de7c 22f968f 212803e d099849 6998231 212803e 8eb51bf 013a409 2e26803 013a409 2e26803 d099849 3dc7ae1 d099849 0d3b770 40da0db 329008a c47b184 40da0db 0d3b770 6457f6e 22f968f 40da0db 22f968f 33350ec 0d3b770 618bce3 0d3b770 a14002e d099849 40da0db 2dc833e 0d3b770 d9b19b2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 |
from textblob import TextBlob
import gradio as gr
import math
import os
os.system("python -m textblob.download_corpora")
control_json={'control':'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ','char':'','leng':62}
string_json={'control':'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN','char':'OPQRSTUVWXYZ','leng':50}
cont_list=list(string_json['control'])
def get_sen_list(text):
sen_list=[]
blob = TextBlob(text)
for sentence in blob.sentences:
sen_list.append(str(sentence))
return sen_list
def proc_sen(sen_list,cnt):
blob_n = TextBlob(sen_list[cnt])
noun_p=blob_n.noun_phrases
#print(dir(noun_p))
noun_box1=[]
for ea in blob_n.parse().split(" "):
n=ea.split("/")
if n[1] == "NN":
if not n[0] in noun_box1:
noun_box1.append(n[0])
json_object={'sen_num':cnt,'sentence':str(sen_list[cnt]),'noun_phrase':noun_p.copy(),'nouns':noun_box1}
return json_object
def proc_nouns(sen_list):
print("get nouns")
noun_list={}
for nn in list(sen_list.keys()):
try:
#print(sen_list[nn]['nouns'])
for nnn in sen_list[nn]['nouns']:
#print(nnn)
if noun_list.get(nnn) != None:
noun_list[str(nnn)]=noun_list[str(nnn)].append(nn)
else:
noun_list[str(nnn)]=[nn]
for nnnn in sen_list[nn]['noun_phrase']:
#print(nnnn)
if noun_list.get(nnnn) != None:
noun_list[str(nnnn)]=noun_list[str(nnnn)].append(nn)
else:
noun_list[str(nnnn)]=[nn]
except Exception as e:
print (e)
pass
print("done nouns")
return noun_list
def sort_doc(text,steps_in=0,control=None):
text=str(text)
########################################
sen_list=get_sen_list(text)
key_cnt=len(sen_list)
sen_obj_box=[]
for ii,ee in enumerate(sen_list):
sen_obj=proc_sen(sen_list,ii)
sen_obj_box.append(sen_obj)
#sen_list=sen_obj_box
######################################
key_cnt=len(sen_obj_box)
print(key_cnt)
#noun_cnt=len(noun_box)
#print(noun_cnt)
if not steps_in:
control_char=list(control_json['control'])
char_len=len(control_char)
n_cnt=0
nx=key_cnt
while True:
if nx >= 1:
n_cnt+=1
nx = nx/char_len
else:
print("#######")
print(n_cnt)
print(nx)
print("#######")
steps=n_cnt
break
if steps_in:
steps=steps_in
if control:
control_len=control_json['leng']-steps
control_char_val=list(control_json['control'][:control_len])
control_val=list(control_json['control'][control_len:])
val_len=len(control_val)
json_out={}
noun_list={}
step_list=[]
big_cnt=0
cnt=0
go=True
step_cont_box=[]
for ii in range(steps):
print(ii)
step_cont_box.append(0)
#print (step_cont_box)
mod=0
pos=len(step_cont_box)-1
if go:
for i, ea in enumerate(sen_obj_box):
if go:
if cnt > char_len-1:
#print(step_cont_box)
go1=True
for ii,ev in enumerate(step_cont_box):
if go:
if ev >= char_len-1:
step_cont_box[ii]=0
if go1==True:
step_cont_box[ii-1]=step_cont_box[ii-1]+1
go1=False
cnt=1
else:
step_cont_box[pos]=cnt
cnt+=1
print(step_cont_box)
out_js=""
for iii,j in enumerate(step_cont_box):
print(j)
out_js = out_js+control_char[j]
sen_obj_out=sen_obj_box[i]
aa=3
bb=3
aa=i if i < 3 else 3
sen_obj_out['sentence']=sen_list[i-aa:i+bb]
#sen_obj_out=sen_obj[i-3:i+3]
#sen_obj=sen_obj_box[i]
#sen_obj=proc_sen(sen_list,i)
#json_out[out_js]={'nouns':ea}
json_out[str(out_js)]=sen_obj_out
#print ("#################")
#print (out_js)
#print (sen_obj)
#print ("#################")
big_cnt+=1
if big_cnt==key_cnt:
print("DONE")
go=False
noun_list=proc_nouns(json_out)
return json_out, [noun_list]
def find_query(query,sen,nouns):
blob_f = TextBlob(query)
noun_box={}
noun_list=[]
sen_box=[]
for ea in blob_f.parse().split(" "):
n=ea.split("/")
if n[1] == "NN":
noun_list.append(n[0])
nouns_l=list(nouns.keys())
for nn in nouns_l:
for nl in noun_list:
if nl in nn:
if nl in noun_box:
for ea_n in nouns[nn]:
noun_box[str(nl)].append(ea_n)
else:
noun_box[str(nl)]=[]
for ea_n in nouns[nn]:
noun_box[str(nl)].append(ea_n)
for ea in noun_box.values():
for vals in ea:
sen_box.append({'sen_num':sen[vals]['sen_num'],'sentence':sen[vals]['sentence']})
return noun_box,sen_box
def find_query_sen(query,sen,nouns):
blob_f = TextBlob(query)
noun_box={}
noun_list=[]
sen_box=[]
for ea in blob_f.parse().split(" "):
n=ea.split("/")
if n[1] == "NN":
noun_list.append(n[0])
nouns_l=list(nouns.keys())
for nn in nouns_l:
for nl in noun_list:
if nl in nn:
if nl in noun_box:
for ea_n in nouns[nn]:
noun_box[str(nl)].append(ea_n)
else:
noun_box[str(nl)]=[]
for ea_n in nouns[nn]:
noun_box[str(nl)].append(ea_n)
sen_out=""
for ea in noun_box.values():
for vals in ea:
print (f'SENETENCE VALS ::: {vals}')
sen_out+=f"{sen[vals]['sentence']}\n"
#sen_box.append({'sen_num':sen[vals]['sen_num'],'sentence':sen[vals]['sentence']})
return sen_out
with gr.Blocks() as app:
inp = gr.Textbox(label="Paste Text",lines=10)
btn = gr.Button("Load Document")
with gr.Row():
query=gr.Textbox(label="Search query")
search_btn=gr.Button("Search")
search_btn2=gr.Button("Search2")
out_box=gr.Textbox(label="Results")
sen_box=gr.Textbox(label="Sentences")
with gr.Row():
with gr.Column(scale=2):
sen=gr.JSON(label="Sentences")
with gr.Column(scale=1):
nouns=gr.JSON(label="Nouns")
search_btn.click(find_query,[query,sen,nouns],[out_box,sen_box])
search_btn2.click(find_query_sen,[query,sen,nouns],[out_box])
btn.click(sort_doc,[inp],[sen,nouns])
app.launch()
|