Spaces:
Sleeping
Sleeping
from textblob import TextBlob | |
import gradio as gr | |
import math | |
import os | |
os.system("python -m textblob.download_corpora") | |
control_json={'control':'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ','char':'','leng':62} | |
string_json={'control':'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN','char':'OPQRSTUVWXYZ','leng':50} | |
cont_list=list(string_json['control']) | |
def get_sen_list(text): | |
sen_list=[] | |
blob = TextBlob(text) | |
for sentence in blob.sentences: | |
sen_list.append(str(sentence)) | |
return sen_list | |
def proc_sen(sen_list,cnt): | |
blob_n = TextBlob(sen_list[cnt]) | |
noun_p=blob_n.noun_phrases | |
#print(dir(noun_p)) | |
noun_box1=[] | |
for ea in blob_n.parse().split(" "): | |
n=ea.split("/") | |
if n[1] == "NN": | |
if not n[0] in noun_box1: | |
noun_box1.append(n[0]) | |
json_object={'sen_num':cnt,'sentence':str(sen_list[cnt]),'noun_phrase':noun_p.copy(),'nouns':noun_box1} | |
return json_object | |
def proc_nouns(sen_list): | |
print("get nouns") | |
noun_list={} | |
for nn in list(sen_list.keys()): | |
try: | |
for nnn in sen_list[nn]['nouns']: | |
if nnn in list(noun_list.keys()): | |
noun_list[str(nnn)].append(nn) | |
else: | |
noun_list[str(nnn)]=[nn] | |
for nnnn in sen_list[nn]['noun_phrase']: | |
if nnnn in list(noun_list.keys()): | |
noun_list[str(nnnn)].append(nn) | |
else: | |
noun_list[str(nnnn)]=[nn] | |
except Exception as e: | |
print (e) | |
pass | |
print("done nouns") | |
return noun_list | |
def sort_doc(text,steps_in=0,control=None): | |
text=str(text) | |
######################################## | |
sen_list=get_sen_list(text) | |
key_cnt=len(sen_list) | |
sen_obj_box=[] | |
for ii,ee in enumerate(sen_list): | |
sen_obj=proc_sen(sen_list,ii) | |
sen_obj_box.append(sen_obj) | |
sen_list=sen_obj_box | |
###################################### | |
key_cnt=len(sen_obj_box) | |
print(key_cnt) | |
#noun_cnt=len(noun_box) | |
#print(noun_cnt) | |
if not steps_in: | |
control_char=list(control_json['control']) | |
char_len=len(control_char) | |
n_cnt=0 | |
nx=key_cnt | |
while True: | |
if nx >= 1: | |
n_cnt+=1 | |
nx = nx/char_len | |
else: | |
print("#######") | |
print(n_cnt) | |
print(nx) | |
print("#######") | |
steps=n_cnt | |
break | |
if steps_in: | |
steps=steps_in | |
if control: | |
control_len=control_json['leng']-steps | |
control_char_val=list(control_json['control'][:control_len]) | |
control_val=list(control_json['control'][control_len:]) | |
val_len=len(control_val) | |
json_out={} | |
noun_list={} | |
step_list=[] | |
big_cnt=0 | |
cnt=0 | |
go=True | |
step_cont_box=[] | |
for ii in range(steps): | |
print(ii) | |
step_cont_box.append(0) | |
#print (step_cont_box) | |
mod=0 | |
pos=len(step_cont_box)-1 | |
if go: | |
for i, ea in enumerate(sen_list): | |
if go: | |
if cnt > char_len-1: | |
#print(step_cont_box) | |
go1=True | |
for ii,ev in enumerate(step_cont_box): | |
if go: | |
if ev >= char_len-1: | |
step_cont_box[ii]=0 | |
if go1==True: | |
step_cont_box[ii-1]=step_cont_box[ii-1]+1 | |
go1=False | |
cnt=1 | |
else: | |
step_cont_box[pos]=cnt | |
cnt+=1 | |
print(step_cont_box) | |
out_js="" | |
for iii,j in enumerate(step_cont_box): | |
print(j) | |
out_js = out_js+control_char[j] | |
sen_obj=sen_obj_box[i] | |
#sen_obj=proc_sen(sen_list,i) | |
#json_out[out_js]={'nouns':ea} | |
json_out[out_js]=sen_obj | |
print ("#################") | |
print (out_js) | |
print (sen_obj) | |
print ("#################") | |
big_cnt+=1 | |
if big_cnt==key_cnt: | |
print("DONE") | |
go=False | |
noun_list=proc_nouns(json_out) | |
return json_out, noun_list | |
def find_query(query,sen,nouns): | |
blob_f = TextBlob(query) | |
noun_box={} | |
noun_list=[] | |
sen_box=[] | |
for ea in blob_f.parse().split(" "): | |
n=ea.split("/") | |
if n[1] == "NN": | |
noun_list.append(n[0]) | |
nouns_l=list(nouns.keys()) | |
for nn in nouns_l: | |
for nl in noun_list: | |
if nl in nn: | |
if nl in noun_box: | |
for ea_n in nouns[nn]: | |
noun_box[str(nl)].append(ea_n) | |
else: | |
noun_box[str(nl)]=[] | |
for ea_n in nouns[nn]: | |
noun_box[str(nl)].append(ea_n) | |
for ea in noun_box.values(): | |
for vals in ea: | |
sen_box.append({'sen_num':sen[vals]['sen_num'],'sentence':sen[vals]['sentence']}) | |
return noun_box,sen_box | |
with gr.Blocks() as app: | |
inp = gr.Textbox(label="Paste Text",lines=10) | |
btn = gr.Button("Load Document") | |
with gr.Row(): | |
query=gr.Textbox(label="Search query") | |
search_btn=gr.Button("Search") | |
out_box=gr.Textbox(label="Results") | |
sen_box=gr.JSON(label="Sentences") | |
with gr.Row(): | |
with gr.Column(scale=2): | |
sen=gr.JSON(label="Sentences") | |
with gr.Column(scale=1): | |
nouns=gr.JSON(label="Nouns") | |
search_btn.click(find_query,[query,sen,nouns],[out_box,sen_box]) | |
btn.click(sort_doc,[inp],[sen,nouns]) | |
app.launch() | |