File size: 5,488 Bytes
0d3b770
 
097f973
09b648b
 
674de7c
6ca1b39
a14002e
674de7c
d92143f
 
 
 
 
 
 
 
 
 
e729578
d92143f
 
 
 
 
1f0641d
d92143f
674de7c
f4b6651
0b47caf
f4b6651
547d8fd
37f1af6
 
293a651
 
 
daf2603
ecd64c9
daf2603
 
 
 
37f1af6
 
 
0b47caf
 
f4b6651
 
6457f6e
9414639
674de7c
bab69d9
 
5beb9ef
674de7c
 
 
0c71419
d92143f
a0fa291
674de7c
d92143f
 
 
 
07d605f
674de7c
ab5426b
d92143f
 
5464361
fe19668
674de7c
 
31ac05c
2da7365
d92143f
2da7365
d08dc84
2da7365
 
 
 
 
 
 
610f120
b24c3fb
d5ef006
74cb3e0
2880c10
17bfcb1
a2d6739
c0c1816
33565d1
46a3d5c
2880c10
d92143f
2880c10
0cb3636
17564aa
c0c1816
 
093cfc0
0cb3636
093cfc0
 
 
 
80ad7b9
2880c10
c0c1816
2880c10
da91567
d43334a
d92143f
9306c89
17c8342
d92143f
 
 
8cd7d30
675b1b5
8cd7d30
 
d43334a
2880c10
d92143f
2880c10
 
f4b6651
 
674de7c
 
22f968f
 
212803e
 
d099849
6998231
212803e
 
 
8eb51bf
 
013a409
 
 
2e26803
 
013a409
2e26803
 
 
d099849
 
3dc7ae1
d099849
0d3b770
 
6457f6e
22f968f
 
 
 
 
3dc7ae1
0d3b770
618bce3
 
0d3b770
a14002e
d099849
bab69d9
0d3b770
d9b19b2
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
from textblob import TextBlob
import gradio as gr
import math
import os 
os.system("python -m textblob.download_corpora")
control_json={'control':'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ','char':'','leng':62}
string_json={'control':'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN','char':'OPQRSTUVWXYZ','leng':50}
cont_list=list(string_json['control'])

def get_sen_list(text):
    sen_list=[]
    blob = TextBlob(text)
    for sentence in blob.sentences:
        sen_list.append(str(sentence))
    return sen_list
    
def proc_sen(sen_list,cnt):
    blob_n = TextBlob(sen_list[cnt])
    noun_p=blob_n.noun_phrases
    #print(dir(noun_p))
    noun_box1=[]
    for ea in blob_n.parse().split(" "):
        n=ea.split("/")
        if n[1] == "NN":
            noun_box1.append(n[0])
    json_object={'sen_num':cnt,'sentence':str(sen_list[cnt]),'noun_phrase':noun_p.copy(),'nouns':noun_box1}    
    return json_object

def proc_nouns(sen_list):
    print("get nouns")
    noun_list={}
    for nn in list(sen_list.keys()):
        try:
            for nnn in sen_list[nn]['nouns']:
                if nnn in list(noun_list.keys()):
                    noun_list[str(nnn)].append(nn)    
                else:
                    noun_list[str(nnn)]=[nn]  
            for nnnn in sen_list[nn]['noun_phrase']:
                if nnnn in list(noun_list.keys()):
                    noun_list[str(nnnn)].append(nn)    
                else:
                    noun_list[str(nnnn)]=[nn]                      
        except Exception as e:
            print (e)
            pass
    print("done nouns")
            
    return noun_list

def get_nouns(text,steps=1):
    text=str(text)
    control_len=control_json['leng']-steps
    control_char=list(control_json['control'])
    control_char_val=list(control_json['control'][:control_len])
    control_val=list(control_json['control'][control_len:])
    char_len=len(control_char)
    val_len=len(control_val)
    print(control_char)
    print(control_val)
    
    json_out={}
    noun_list={}
    step_list=[]
    step_cont_box=[]
    
    sen_list=get_sen_list(text)

    key_cnt=len(sen_list)
    print(key_cnt)
    #noun_cnt=len(noun_box)
    #print(noun_cnt)
    
    big_cnt=0
    cnt=0
    go=True

    n_cnt=0
    nx=key_cnt
    while True:
        if nx >= 1:
            n_cnt+=1
            nx = nx/char_len
        else:
            print("#######")
            print(n_cnt)
            print(nx)
            print("#######")
            steps=n_cnt
            break

    for ii in range(steps):
        print(ii)
        step_cont_box.append(0)
    #print (step_cont_box)
    mod=0
    pos=len(step_cont_box)-1
    
    if go:
        for i, ea in enumerate(sen_list):
            if go:
                if cnt > char_len-1:
                    #print(step_cont_box)
                    go1=True
                    for ii,ev in enumerate(step_cont_box):
                        if go:
                            if ev >= char_len-1:
                                step_cont_box[ii]=0
                                if go1==True:
                                    step_cont_box[ii-1]=step_cont_box[ii-1]+1
                                    go1=False
                    cnt=1
                else:
                    step_cont_box[pos]=cnt
                    cnt+=1
                print(step_cont_box)
                out_js=""
                for iii,j in enumerate(step_cont_box):
                    print(j)
                    out_js = out_js+control_char[j]
                sen_obj=proc_sen(sen_list,i)
                #json_out[out_js]={'nouns':ea}
                json_out[out_js]=sen_obj
                print ("#################")
                print (out_js)
                print (sen_obj)
                print ("#################")
                
                big_cnt+=1
                if big_cnt==key_cnt:
                    print("DONE")
                    go=False
    noun_list=proc_nouns(json_out)
    return json_out, noun_list


def find_query(query,sen,nouns):
    blob_f = TextBlob(query)
    noun_box={}
    noun_list=[]
    sen_box=[]
    for ea in blob_f.parse().split(" "):
        n=ea.split("/")
        if n[1] == "NN":
            noun_list.append(n[0])
    nouns_l=list(nouns.keys())
    for nn in nouns_l:
        for nl in noun_list:
            if nl in nn:
                if nl in noun_box:
                    for ea_n in nouns[nn]:
                        noun_box[str(nl)].append(ea_n)    
                else:
                    noun_box[str(nl)]=[]         
                    for ea_n in nouns[nn]:
                        noun_box[str(nl)].append(ea_n)
    for ea in noun_box.values():
        for vals in ea:
            sen_box.append({'sen_num':sen[vals]['sen_num'],'sentence':sen[vals]['sentence']})
    return noun_box,sen_box

with gr.Blocks() as app:
    inp = gr.Textbox(label="Paste Text",lines=10)
    btn = gr.Button("Load Document")
    with gr.Row():
        query=gr.Textbox(label="Search query")
        search_btn=gr.Button("Search")
    out_box=gr.Textbox(label="Results")
    sen_box=gr.JSON(label="Sentences")
    with gr.Row():
        with gr.Column(scale=2):
            sen=gr.JSON(label="Sentences")
        with gr.Column(scale=1):
            nouns=gr.JSON(label="Nouns")
    search_btn.click(find_query,[query,sen,nouns],[out_box,sen_box])
    btn.click(get_nouns,[inp],[sen,nouns])
app.launch()