Spaces:
Sleeping
Sleeping
File size: 6,000 Bytes
0d3b770 097f973 09b648b 674de7c 6ca1b39 a14002e 674de7c d92143f e729578 d92143f 33aed6f 1f0641d d92143f 674de7c f4b6651 0b47caf f4b6651 547d8fd 37f1af6 293a651 daf2603 ecd64c9 daf2603 37f1af6 0b47caf f4b6651 b4b2d88 9414639 b4b2d88 386e89d b4b2d88 3efa602 b4b2d88 386e89d a0fa291 674de7c d92143f 5464361 fe19668 674de7c 31ac05c b4b2d88 386e89d 74cb3e0 2880c10 17bfcb1 a2d6739 c0c1816 33565d1 0973fd8 46a3d5c 2880c10 d92143f 2880c10 0cb3636 17564aa c0c1816 093cfc0 0cb3636 093cfc0 80ad7b9 2880c10 c0c1816 2880c10 da91567 d43334a d92143f 9306c89 17c8342 0973fd8 d92143f 8cd7d30 675b1b5 8cd7d30 d43334a 2880c10 d92143f 2880c10 f4b6651 674de7c 22f968f 212803e d099849 6998231 212803e 8eb51bf 013a409 2e26803 013a409 2e26803 d099849 3dc7ae1 d099849 0d3b770 6457f6e 22f968f 3dc7ae1 0d3b770 618bce3 0d3b770 a14002e d099849 0973fd8 0d3b770 d9b19b2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
from textblob import TextBlob
import gradio as gr
import math
import os
os.system("python -m textblob.download_corpora")
control_json={'control':'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ','char':'','leng':62}
string_json={'control':'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN','char':'OPQRSTUVWXYZ','leng':50}
cont_list=list(string_json['control'])
def get_sen_list(text):
sen_list=[]
blob = TextBlob(text)
for sentence in blob.sentences:
sen_list.append(str(sentence))
return sen_list
def proc_sen(sen_list,cnt):
blob_n = TextBlob(sen_list[cnt])
noun_p=blob_n.noun_phrases
#print(dir(noun_p))
noun_box1=[]
for ea in blob_n.parse().split(" "):
n=ea.split("/")
if n[1] == "NN":
if not n[0] in noun_box1:
noun_box1.append(n[0])
json_object={'sen_num':cnt,'sentence':str(sen_list[cnt]),'noun_phrase':noun_p.copy(),'nouns':noun_box1}
return json_object
def proc_nouns(sen_list):
print("get nouns")
noun_list={}
for nn in list(sen_list.keys()):
try:
for nnn in sen_list[nn]['nouns']:
if nnn in list(noun_list.keys()):
noun_list[str(nnn)].append(nn)
else:
noun_list[str(nnn)]=[nn]
for nnnn in sen_list[nn]['noun_phrase']:
if nnnn in list(noun_list.keys()):
noun_list[str(nnnn)].append(nn)
else:
noun_list[str(nnnn)]=[nn]
except Exception as e:
print (e)
pass
print("done nouns")
return noun_list
def sort_doc(text,steps_in=0,control=None):
text=str(text)
########################################
sen_list=get_sen_list(text)
key_cnt=len(sen_list)
sen_obj_box=[]
for ii,ee in enumerate(sen_list):
sen_obj=proc_sen(sen_list,ii)
sen_obj_box.append(sen_obj)
sen_list=sen_obj_box
######################################
key_cnt=len(sen_obj_box)
print(key_cnt)
#noun_cnt=len(noun_box)
#print(noun_cnt)
if not steps_in:
control_char=list(control_json['control'])
char_len=len(control_char)
n_cnt=0
nx=key_cnt
while True:
if nx >= 1:
n_cnt+=1
nx = nx/char_len
else:
print("#######")
print(n_cnt)
print(nx)
print("#######")
steps=n_cnt
break
if steps_in:
steps=steps_in
if control:
control_len=control_json['leng']-steps
control_char_val=list(control_json['control'][:control_len])
control_val=list(control_json['control'][control_len:])
val_len=len(control_val)
json_out={}
noun_list={}
step_list=[]
big_cnt=0
cnt=0
go=True
step_cont_box=[]
for ii in range(steps):
print(ii)
step_cont_box.append(0)
#print (step_cont_box)
mod=0
pos=len(step_cont_box)-1
if go:
for i, ea in enumerate(sen_list):
if go:
if cnt > char_len-1:
#print(step_cont_box)
go1=True
for ii,ev in enumerate(step_cont_box):
if go:
if ev >= char_len-1:
step_cont_box[ii]=0
if go1==True:
step_cont_box[ii-1]=step_cont_box[ii-1]+1
go1=False
cnt=1
else:
step_cont_box[pos]=cnt
cnt+=1
print(step_cont_box)
out_js=""
for iii,j in enumerate(step_cont_box):
print(j)
out_js = out_js+control_char[j]
sen_obj=sen_obj_box[i]
#sen_obj=proc_sen(sen_list,i)
#json_out[out_js]={'nouns':ea}
json_out[out_js]=sen_obj
print ("#################")
print (out_js)
print (sen_obj)
print ("#################")
big_cnt+=1
if big_cnt==key_cnt:
print("DONE")
go=False
noun_list=proc_nouns(json_out)
return json_out, noun_list
def find_query(query,sen,nouns):
blob_f = TextBlob(query)
noun_box={}
noun_list=[]
sen_box=[]
for ea in blob_f.parse().split(" "):
n=ea.split("/")
if n[1] == "NN":
noun_list.append(n[0])
nouns_l=list(nouns.keys())
for nn in nouns_l:
for nl in noun_list:
if nl in nn:
if nl in noun_box:
for ea_n in nouns[nn]:
noun_box[str(nl)].append(ea_n)
else:
noun_box[str(nl)]=[]
for ea_n in nouns[nn]:
noun_box[str(nl)].append(ea_n)
for ea in noun_box.values():
for vals in ea:
sen_box.append({'sen_num':sen[vals]['sen_num'],'sentence':sen[vals]['sentence']})
return noun_box,sen_box
with gr.Blocks() as app:
inp = gr.Textbox(label="Paste Text",lines=10)
btn = gr.Button("Load Document")
with gr.Row():
query=gr.Textbox(label="Search query")
search_btn=gr.Button("Search")
out_box=gr.Textbox(label="Results")
sen_box=gr.JSON(label="Sentences")
with gr.Row():
with gr.Column(scale=2):
sen=gr.JSON(label="Sentences")
with gr.Column(scale=1):
nouns=gr.JSON(label="Nouns")
search_btn.click(find_query,[query,sen,nouns],[out_box,sen_box])
btn.click(sort_doc,[inp],[sen,nouns])
app.launch()
|