Spaces:
Sleeping
Sleeping
File size: 5,240 Bytes
0d3b770 097f973 09b648b 674de7c 6ca1b39 a14002e 674de7c d92143f e729578 d92143f 1f0641d d92143f 674de7c f4b6651 0b47caf f4b6651 547d8fd 37f1af6 293a651 37f1af6 0b47caf f4b6651 6457f6e 9414639 674de7c bab69d9 5beb9ef 674de7c 0c71419 d92143f a0fa291 674de7c d92143f 07d605f 674de7c ab5426b d92143f 5464361 fe19668 674de7c 31ac05c 2da7365 d92143f 2da7365 d08dc84 2da7365 610f120 b24c3fb d5ef006 74cb3e0 2880c10 17bfcb1 a2d6739 c0c1816 33565d1 46a3d5c 2880c10 d92143f 2880c10 0cb3636 17564aa c0c1816 093cfc0 0cb3636 093cfc0 80ad7b9 2880c10 c0c1816 2880c10 da91567 d43334a d92143f 9306c89 17c8342 d92143f 8cd7d30 675b1b5 8cd7d30 d43334a 2880c10 d92143f 2880c10 f4b6651 674de7c 22f968f 212803e d099849 6998231 212803e 8eb51bf 013a409 2e26803 013a409 2e26803 d099849 3dc7ae1 d099849 0d3b770 6457f6e 22f968f 3dc7ae1 0d3b770 618bce3 0d3b770 a14002e d099849 bab69d9 0d3b770 d9b19b2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
from textblob import TextBlob
import gradio as gr
import math
import os
os.system("python -m textblob.download_corpora")
control_json={'control':'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ','char':'','leng':62}
string_json={'control':'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN','char':'OPQRSTUVWXYZ','leng':50}
cont_list=list(string_json['control'])
def get_sen_list(text):
sen_list=[]
blob = TextBlob(text)
for sentence in blob.sentences:
sen_list.append(str(sentence))
return sen_list
def proc_sen(sen_list,cnt):
blob_n = TextBlob(sen_list[cnt])
noun_p=blob_n.noun_phrases
#print(dir(noun_p))
noun_box1=[]
for ea in blob_n.parse().split(" "):
n=ea.split("/")
if n[1] == "NN":
noun_box1.append(n[0])
json_object={'sen_num':cnt,'sentence':str(sen_list[cnt]),'noun_phrase':noun_p.copy(),'nouns':noun_box1}
return json_object
def proc_nouns(sen_list):
print("get nouns")
noun_list={}
for nn in list(sen_list.keys()):
try:
for nnn in sen_list[nn]['nouns']:
if nnn in list(noun_list.keys()):
noun_list[str(nnn)].append(nn)
else:
noun_list[str(nnn)]=[nn]
except Exception as e:
print (e)
pass
print("done nouns")
return noun_list
def get_nouns(text,steps=1):
text=str(text)
control_len=control_json['leng']-steps
control_char=list(control_json['control'])
control_char_val=list(control_json['control'][:control_len])
control_val=list(control_json['control'][control_len:])
char_len=len(control_char)
val_len=len(control_val)
print(control_char)
print(control_val)
json_out={}
noun_list={}
step_list=[]
step_cont_box=[]
sen_list=get_sen_list(text)
key_cnt=len(sen_list)
print(key_cnt)
#noun_cnt=len(noun_box)
#print(noun_cnt)
big_cnt=0
cnt=0
go=True
n_cnt=0
nx=key_cnt
while True:
if nx >= 1:
n_cnt+=1
nx = nx/char_len
else:
print("#######")
print(n_cnt)
print(nx)
print("#######")
steps=n_cnt
break
for ii in range(steps):
print(ii)
step_cont_box.append(0)
#print (step_cont_box)
mod=0
pos=len(step_cont_box)-1
if go:
for i, ea in enumerate(sen_list):
if go:
if cnt > char_len-1:
#print(step_cont_box)
go1=True
for ii,ev in enumerate(step_cont_box):
if go:
if ev >= char_len-1:
step_cont_box[ii]=0
if go1==True:
step_cont_box[ii-1]=step_cont_box[ii-1]+1
go1=False
cnt=1
else:
step_cont_box[pos]=cnt
cnt+=1
print(step_cont_box)
out_js=""
for iii,j in enumerate(step_cont_box):
print(j)
out_js = out_js+control_char[j]
sen_obj=proc_sen(sen_list,i)
#json_out[out_js]={'nouns':ea}
json_out[out_js]=sen_obj
print ("#################")
print (out_js)
print (sen_obj)
print ("#################")
big_cnt+=1
if big_cnt==key_cnt:
print("DONE")
go=False
noun_list=proc_nouns(json_out)
return json_out, noun_list
def find_query(query,sen,nouns):
blob_f = TextBlob(query)
noun_box={}
noun_list=[]
sen_box=[]
for ea in blob_f.parse().split(" "):
n=ea.split("/")
if n[1] == "NN":
noun_list.append(n[0])
nouns_l=list(nouns.keys())
for nn in nouns_l:
for nl in noun_list:
if nl in nn:
if nl in noun_box:
for ea_n in nouns[nn]:
noun_box[str(nl)].append(ea_n)
else:
noun_box[str(nl)]=[]
for ea_n in nouns[nn]:
noun_box[str(nl)].append(ea_n)
for ea in noun_box.values():
for vals in ea:
sen_box.append({'sen_num':sen[vals]['sen_num'],'sentence':sen[vals]['sentence']})
return noun_box,sen_box
with gr.Blocks() as app:
inp = gr.Textbox(label="Paste Text",lines=10)
btn = gr.Button("Load Document")
with gr.Row():
query=gr.Textbox(label="Search query")
search_btn=gr.Button("Search")
out_box=gr.Textbox(label="Results")
sen_box=gr.JSON(label="Sentences")
with gr.Row():
with gr.Column(scale=2):
sen=gr.JSON(label="Sentences")
with gr.Column(scale=1):
nouns=gr.JSON(label="Nouns")
search_btn.click(find_query,[query,sen,nouns],[out_box,sen_box])
btn.click(get_nouns,[inp],[sen,nouns])
app.launch()
|