File size: 7,499 Bytes
0d3b770
 
097f973
09b648b
 
674de7c
6ca1b39
a14002e
674de7c
d92143f
 
 
 
 
 
 
 
 
 
e729578
d92143f
 
 
 
33aed6f
 
1f0641d
d92143f
674de7c
f4b6651
0b47caf
f4b6651
547d8fd
37f1af6
0133ff1
 
04beb1a
b221e2d
179044f
293a651
daf2603
ecd64c9
04beb1a
b9e4a6a
6d812d3
daf2603
 
37f1af6
 
 
0b47caf
f4b6651
 
b4b2d88
9414639
b4b2d88
386e89d
 
 
 
 
 
 
 
 
fa28fed
386e89d
 
 
 
 
 
 
 
b4b2d88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3efa602
b4b2d88
 
 
 
 
386e89d
 
a0fa291
674de7c
d92143f
5464361
fe19668
674de7c
 
31ac05c
b4b2d88
386e89d
74cb3e0
2880c10
17bfcb1
a2d6739
c0c1816
33565d1
0973fd8
 
46a3d5c
2880c10
fa28fed
2880c10
0cb3636
17564aa
c0c1816
 
093cfc0
0cb3636
fbed6bd
093cfc0
fbed6bd
093cfc0
80ad7b9
2880c10
c0c1816
2880c10
da91567
d43334a
d92143f
9306c89
17c8342
d1dc4ab
cbff511
 
 
 
094936a
cbff511
 
d1dc4ab
 
16b77d9
0973fd8
 
d92143f
80dae17
efb637d
 
 
 
d43334a
2880c10
d92143f
2880c10
 
f4b6651
a9a3f95
674de7c
 
22f968f
 
212803e
 
d099849
6998231
212803e
 
 
8eb51bf
 
013a409
 
 
2e26803
 
013a409
2e26803
 
 
d099849
 
3dc7ae1
d099849
0d3b770
40da0db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
329008a
c47b184
40da0db
 
 
 
 
0d3b770
6457f6e
22f968f
 
 
 
40da0db
22f968f
33350ec
0d3b770
618bce3
 
0d3b770
a14002e
d099849
40da0db
2dc833e
0d3b770
d9b19b2
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
from textblob import TextBlob
import gradio as gr
import math
import os 
os.system("python -m textblob.download_corpora")
control_json={'control':'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ','char':'','leng':62}
string_json={'control':'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN','char':'OPQRSTUVWXYZ','leng':50}
cont_list=list(string_json['control'])

def get_sen_list(text):
    sen_list=[]
    blob = TextBlob(text)
    for sentence in blob.sentences:
        sen_list.append(str(sentence))
    return sen_list
    
def proc_sen(sen_list,cnt):
    blob_n = TextBlob(sen_list[cnt])
    noun_p=blob_n.noun_phrases
    #print(dir(noun_p))
    noun_box1=[]
    for ea in blob_n.parse().split(" "):
        n=ea.split("/")
        if n[1] == "NN":
            if not n[0] in noun_box1:
                noun_box1.append(n[0])
    json_object={'sen_num':cnt,'sentence':str(sen_list[cnt]),'noun_phrase':noun_p.copy(),'nouns':noun_box1}    
    return json_object

def proc_nouns(sen_list):
    print("get nouns")
    noun_list={}
    for nn in list(sen_list.keys()):
        try:
            #print(sen_list[nn]['nouns'])
            for nnn in sen_list[nn]['nouns']:
                #print(nnn)
                if noun_list.get(nnn) != None:
                    noun_list[str(nnn)]=noun_list[str(nnn)].append(nn)
                else:
                    noun_list[str(nnn)]=[nn]  
            for nnnn in sen_list[nn]['noun_phrase']:
                #print(nnnn)
                if noun_list.get(nnnn) != None:
                    noun_list[str(nnnn)]=noun_list[str(nnnn)].append(nn)   
                else:
                    noun_list[str(nnnn)]=[nn]                      
        except Exception as e:
            print (e)
            pass
    print("done nouns")
    return noun_list

def sort_doc(text,steps_in=0,control=None):
    text=str(text)

########################################    
    sen_list=get_sen_list(text)
    key_cnt=len(sen_list)
    
    sen_obj_box=[]
    for ii,ee in enumerate(sen_list):
        sen_obj=proc_sen(sen_list,ii)
        sen_obj_box.append(sen_obj)
        
    #sen_list=sen_obj_box
######################################    
    key_cnt=len(sen_obj_box)
    print(key_cnt)
    #noun_cnt=len(noun_box)
    #print(noun_cnt)


    
    if not steps_in:
    
        control_char=list(control_json['control'])
        char_len=len(control_char)
        n_cnt=0
        nx=key_cnt
        while True:
            if nx >= 1:
                n_cnt+=1
                nx = nx/char_len
            else:
                print("#######")
                print(n_cnt)
                print(nx)
                print("#######")
                steps=n_cnt
                break    
    if steps_in:
        steps=steps_in
    if control:
        control_len=control_json['leng']-steps
        control_char_val=list(control_json['control'][:control_len])
        control_val=list(control_json['control'][control_len:])
        val_len=len(control_val)
    


    json_out={}
    noun_list={}
    step_list=[]
    
    big_cnt=0
    cnt=0
    go=True


    step_cont_box=[]
    for ii in range(steps):
        print(ii)
        step_cont_box.append(0)
    #print (step_cont_box)
    mod=0
    pos=len(step_cont_box)-1


    
    if go:
        for i, ea in enumerate(sen_obj_box):
            if go:
                if cnt > char_len-1:
                    #print(step_cont_box)
                    go1=True
                    for ii,ev in enumerate(step_cont_box):
                        if go:
                            if ev >= char_len-1:
                                step_cont_box[ii]=0
                                if go1==True:
                                    step_cont_box[ii-1]=step_cont_box[ii-1]+1
                                    go1=False
                    cnt=1
                else:
                    step_cont_box[pos]=cnt
                    cnt+=1
                print(step_cont_box)
                out_js=""
                for iii,j in enumerate(step_cont_box):
                    print(j)
                    out_js = out_js+control_char[j]
                
                sen_obj_out=sen_obj_box[i]

                aa=3
                bb=3
                aa=i if i < 3 else 3
                
                sen_obj_out['sentence']=sen_list[i-aa:i+bb]  
                
                #sen_obj_out=sen_obj[i-3:i+3]    
                #sen_obj=sen_obj_box[i]    
                #sen_obj=proc_sen(sen_list,i)
                
                #json_out[out_js]={'nouns':ea}
                json_out[str(out_js)]=sen_obj_out
                #print ("#################")
                #print (out_js)
                #print (sen_obj)
                #print ("#################")
                
                big_cnt+=1
                if big_cnt==key_cnt:
                    print("DONE")
                    go=False
    noun_list=proc_nouns(json_out)
    return json_out, [noun_list]


def find_query(query,sen,nouns):
    blob_f = TextBlob(query)
    noun_box={}
    noun_list=[]
    sen_box=[]
    for ea in blob_f.parse().split(" "):
        n=ea.split("/")
        if n[1] == "NN":
            noun_list.append(n[0])
    nouns_l=list(nouns.keys())
    for nn in nouns_l:
        for nl in noun_list:
            if nl in nn:
                if nl in noun_box:
                    for ea_n in nouns[nn]:
                        noun_box[str(nl)].append(ea_n)    
                else:
                    noun_box[str(nl)]=[]         
                    for ea_n in nouns[nn]:
                        noun_box[str(nl)].append(ea_n)
    for ea in noun_box.values():
        for vals in ea:
            sen_box.append({'sen_num':sen[vals]['sen_num'],'sentence':sen[vals]['sentence']})
    return noun_box,sen_box

def find_query_sen(query,sen,nouns):
    blob_f = TextBlob(query)
    noun_box={}
    noun_list=[]
    sen_box=[]
    for ea in blob_f.parse().split(" "):
        n=ea.split("/")
        if n[1] == "NN":
            noun_list.append(n[0])
    nouns_l=list(nouns.keys())
    for nn in nouns_l:
        for nl in noun_list:
            if nl in nn:
                if nl in noun_box:
                    for ea_n in nouns[nn]:
                        noun_box[str(nl)].append(ea_n)    
                else:
                    noun_box[str(nl)]=[]         
                    for ea_n in nouns[nn]:
                        noun_box[str(nl)].append(ea_n)
    sen_out=""
    for ea in noun_box.values():
        for vals in ea:
            print (f'SENETENCE VALS ::: {vals}')
            sen_out+=f"{sen[vals]['sentence']}\n"
            
            #sen_box.append({'sen_num':sen[vals]['sen_num'],'sentence':sen[vals]['sentence']})
    return sen_out


with gr.Blocks() as app:
    inp = gr.Textbox(label="Paste Text",lines=10)
    btn = gr.Button("Load Document")
    with gr.Row():
        query=gr.Textbox(label="Search query")
        search_btn=gr.Button("Search")
        search_btn2=gr.Button("Search2")
    out_box=gr.Textbox(label="Results")
    sen_box=gr.Textbox(label="Sentences")
    with gr.Row():
        with gr.Column(scale=2):
            sen=gr.JSON(label="Sentences")
        with gr.Column(scale=1):
            nouns=gr.JSON(label="Nouns")
    search_btn.click(find_query,[query,sen,nouns],[out_box,sen_box])
    search_btn2.click(find_query_sen,[query,sen,nouns],[out_box])
    btn.click(sort_doc,[inp],[sen,nouns])
app.launch()