sort_document / app.py
Omnibus's picture
Update app.py
04beb1a verified
raw
history blame
7.5 kB
from textblob import TextBlob
import gradio as gr
import math
import os
os.system("python -m textblob.download_corpora")
control_json={'control':'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ','char':'','leng':62}
string_json={'control':'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN','char':'OPQRSTUVWXYZ','leng':50}
cont_list=list(string_json['control'])
def get_sen_list(text):
sen_list=[]
blob = TextBlob(text)
for sentence in blob.sentences:
sen_list.append(str(sentence))
return sen_list
def proc_sen(sen_list,cnt):
blob_n = TextBlob(sen_list[cnt])
noun_p=blob_n.noun_phrases
#print(dir(noun_p))
noun_box1=[]
for ea in blob_n.parse().split(" "):
n=ea.split("/")
if n[1] == "NN":
if not n[0] in noun_box1:
noun_box1.append(n[0])
json_object={'sen_num':cnt,'sentence':str(sen_list[cnt]),'noun_phrase':noun_p.copy(),'nouns':noun_box1}
return json_object
def proc_nouns(sen_list):
print("get nouns")
noun_list={}
for nn in list(sen_list.keys()):
try:
print(sen_list[nn]['nouns'])
for nnn in sen_list[nn]['nouns']:
#print(nnn)
if noun_list.get(nnn) != None:
noun_list[str(nnn)]=noun_list[str(nnn)].append(nn)
else:
noun_list[str(nnn)]=[nn]
for nnnn in sen_list[nn]['noun_phrase']:
#print(nnnn)
if nnnn in list(noun_list.keys()):
noun_list[str(nnnn)]=noun_list[str(nnnn)].append(nn)
else:
noun_list[str(nnnn)]=[nn]
except Exception as e:
print (e)
pass
print("done nouns")
return noun_list
def sort_doc(text,steps_in=0,control=None):
text=str(text)
########################################
sen_list=get_sen_list(text)
key_cnt=len(sen_list)
sen_obj_box=[]
for ii,ee in enumerate(sen_list):
sen_obj=proc_sen(sen_list,ii)
sen_obj_box.append(sen_obj)
#sen_list=sen_obj_box
######################################
key_cnt=len(sen_obj_box)
print(key_cnt)
#noun_cnt=len(noun_box)
#print(noun_cnt)
if not steps_in:
control_char=list(control_json['control'])
char_len=len(control_char)
n_cnt=0
nx=key_cnt
while True:
if nx >= 1:
n_cnt+=1
nx = nx/char_len
else:
print("#######")
print(n_cnt)
print(nx)
print("#######")
steps=n_cnt
break
if steps_in:
steps=steps_in
if control:
control_len=control_json['leng']-steps
control_char_val=list(control_json['control'][:control_len])
control_val=list(control_json['control'][control_len:])
val_len=len(control_val)
json_out={}
noun_list={}
step_list=[]
big_cnt=0
cnt=0
go=True
step_cont_box=[]
for ii in range(steps):
print(ii)
step_cont_box.append(0)
#print (step_cont_box)
mod=0
pos=len(step_cont_box)-1
if go:
for i, ea in enumerate(sen_obj_box):
if go:
if cnt > char_len-1:
#print(step_cont_box)
go1=True
for ii,ev in enumerate(step_cont_box):
if go:
if ev >= char_len-1:
step_cont_box[ii]=0
if go1==True:
step_cont_box[ii-1]=step_cont_box[ii-1]+1
go1=False
cnt=1
else:
step_cont_box[pos]=cnt
cnt+=1
print(step_cont_box)
out_js=""
for iii,j in enumerate(step_cont_box):
print(j)
out_js = out_js+control_char[j]
sen_obj_out=sen_obj_box[i]
aa=3
bb=3
aa=i if i < 3 else 3
sen_obj_out['sentence']=sen_list[i-aa:i+bb]
#sen_obj_out=sen_obj[i-3:i+3]
#sen_obj=sen_obj_box[i]
#sen_obj=proc_sen(sen_list,i)
#json_out[out_js]={'nouns':ea}
json_out[str(out_js)]=sen_obj_out
#print ("#################")
#print (out_js)
#print (sen_obj)
#print ("#################")
big_cnt+=1
if big_cnt==key_cnt:
print("DONE")
go=False
noun_list=proc_nouns(json_out)
return json_out, [noun_list]
def find_query(query,sen,nouns):
blob_f = TextBlob(query)
noun_box={}
noun_list=[]
sen_box=[]
for ea in blob_f.parse().split(" "):
n=ea.split("/")
if n[1] == "NN":
noun_list.append(n[0])
nouns_l=list(nouns.keys())
for nn in nouns_l:
for nl in noun_list:
if nl in nn:
if nl in noun_box:
for ea_n in nouns[nn]:
noun_box[str(nl)].append(ea_n)
else:
noun_box[str(nl)]=[]
for ea_n in nouns[nn]:
noun_box[str(nl)].append(ea_n)
for ea in noun_box.values():
for vals in ea:
sen_box.append({'sen_num':sen[vals]['sen_num'],'sentence':sen[vals]['sentence']})
return noun_box,sen_box
def find_query_sen(query,sen,nouns):
blob_f = TextBlob(query)
noun_box={}
noun_list=[]
sen_box=[]
for ea in blob_f.parse().split(" "):
n=ea.split("/")
if n[1] == "NN":
noun_list.append(n[0])
nouns_l=list(nouns.keys())
for nn in nouns_l:
for nl in noun_list:
if nl in nn:
if nl in noun_box:
for ea_n in nouns[nn]:
noun_box[str(nl)].append(ea_n)
else:
noun_box[str(nl)]=[]
for ea_n in nouns[nn]:
noun_box[str(nl)].append(ea_n)
sen_out=""
for ea in noun_box.values():
for vals in ea:
print (f'SENETENCE VALS ::: {vals}')
sen_out+=f"{sen[vals]['sentence']}\n"
#sen_box.append({'sen_num':sen[vals]['sen_num'],'sentence':sen[vals]['sentence']})
return sen_out
with gr.Blocks() as app:
inp = gr.Textbox(label="Paste Text",lines=10)
btn = gr.Button("Load Document")
with gr.Row():
query=gr.Textbox(label="Search query")
search_btn=gr.Button("Search")
search_btn2=gr.Button("Search2")
out_box=gr.Textbox(label="Results")
sen_box=gr.Textbox(label="Sentences")
with gr.Row():
with gr.Column(scale=2):
sen=gr.JSON(label="Sentences")
with gr.Column(scale=1):
nouns=gr.JSON(label="Nouns")
search_btn.click(find_query,[query,sen,nouns],[out_box,sen_box])
search_btn2.click(find_query_sen,[query,sen,nouns],[out_box])
btn.click(sort_doc,[inp],[sen,nouns])
app.launch()