Madhana commited on
Commit
deaec20
·
1 Parent(s): 0adcfd3

Upload disaster_help_ner_app.py

Browse files
Files changed (1) hide show
  1. disaster_help_ner_app.py +225 -0
disaster_help_ner_app.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """disaster_help_ner_production.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1_wqnv01NeDdOLp2X1KT6WfvNgVwwTu5a
8
+
9
+ Note: This is a continuation of [this](https://colab.research.google.com/drive/1HlZLIVcAtWVeitZjWV3LclrH3gBwuymp?usp=sharing) notebook:
10
+
11
+ # Imports
12
+ """
13
+
14
+ '''
15
+ !pip install transformers
16
+
17
+ !pip install https://huggingface.co/Madhana/en_pipeline/resolve/main/en_pipeline-any-py3-none-any.whl
18
+
19
+ !pip install spacy
20
+ !pip install spacy-transformers
21
+
22
+ !pip install gradio
23
+ '''
24
+
25
+ import json
26
+ import requests
27
+ import configparser
28
+
29
+ import spacy
30
+ import spacy_transformers
31
+ from spacy import displacy
32
+ from spacy.tokens import Span
33
+ from transformers import pipeline
34
+ from spacy.matcher import PhraseMatcher
35
+
36
+ import csv
37
+ import numpy as np
38
+ import pandas as pd
39
+
40
+ import geopy
41
+ import gradio as gr
42
+ from geopy.geocoders import Nominatim
43
+
44
+ """# Telegram """
45
+
46
+ # bot_token = 'get from user using gradio'
47
+
48
+ offset = None
49
+
50
+ def get_data(bot_token):
51
+ global offset
52
+ try:
53
+ if offset == None:
54
+ response = requests.get("https://api.telegram.org/bot{}/getUpdates".format(bot_token))
55
+ response_json = json.loads(response.text)
56
+ last_update_id = int(response_json['result'][-1]['update_id'])
57
+ # without 'last_update_id + 1' there will be duplicate results
58
+ offset = last_update_id + 1
59
+ else:
60
+ response = requests.get('https://api.telegram.org/bot{}/getUpdates?offset={}'.format(bot_token, offset))
61
+ response_json = json.loads(response.text)
62
+ last_update_id = int(response_json['result'][-1]['update_id'])
63
+ # without 'last_update_id + 1' there will be duplicate results
64
+ offset = last_update_id + 1
65
+ text_list = [r['channel_post']['text'] for r in response_json['result']]
66
+ return text_list
67
+ except KeyError:
68
+ print('An error occurred. Possibly empty request result or your Telegram Bot Token is incorrect.')
69
+ error_list = ['An error occurred. Possibly empty request result or your Telegram Bot Token is incorrect.']
70
+ return error_list
71
+ except Exception as e:
72
+ print('An error occurred. Possibly empty request result or your Telegram Bot Token is incorrect.') #, e
73
+ error_list = ['An error occurred. Possibly empty request result or your Telegram Bot Token is incorrect.']
74
+ return error_list
75
+
76
+ """# Classifier"""
77
+
78
+ def classify_message(bot_token):
79
+ if get_data(bot_token) != ['An error occurred. Possibly empty request result or your Telegram Bot Token is incorrect.']:
80
+ disaster_docs = []
81
+ classifier = pipeline("sentiment-analysis", model="Madhana/disaster_msges_classifier_v1")
82
+ results = []
83
+ for data in get_data(bot_token):
84
+ classification = classifier(data)
85
+ label = classification[0]['label']
86
+ results.append((data, label))
87
+ if label == 'DISASTER':
88
+ disaster_docs.append(data)
89
+ return disaster_docs
90
+ else:
91
+ error_list = ['An error occurred. Possibly empty request result or your Telegram Bot Token is incorrect.']
92
+ return error_list
93
+
94
+ """# NER Pipeline"""
95
+
96
+ @spacy.Language.component("disaster_ner")
97
+ def disaster_ner(doc):
98
+ matcher = PhraseMatcher(doc.vocab)
99
+ patterns = list(nlp.tokenizer.pipe(Tamil_words))
100
+ matcher.add("Tamil_words", None, *patterns)
101
+ matches = matcher(doc)
102
+ spans = [Span(doc, start, end, label="YO!") for match_id, start, end in matches]
103
+ doc.ents = spans
104
+ return doc
105
+
106
+ Tamil_words = ['மதனா பாலா'] # umm, that's my name in Tamil, consider this as a easter egg in this app lol.
107
+
108
+ nlp = spacy.load("en_pipeline")
109
+ nlp.add_pipe("disaster_ner", name="disaster_ner", before='ner')
110
+
111
+ entity_types = ["NAME", "STREET", "NEIGHBORHOOD", "CITY", "PHONE NUMBER","YO!"]
112
+ df = pd.DataFrame(columns=["Text"] + entity_types)
113
+
114
+ def create_address(row):
115
+ return f"{row['STREET']}, {row['NEIGHBORHOOD']}, {row['CITY']}"
116
+
117
+ geolocator = Nominatim(user_agent="disaster-ner-app")
118
+
119
+ def geocode_address(address):
120
+ try:
121
+ location = geolocator.geocode(address)
122
+ return (location.latitude, location.longitude)
123
+ except:
124
+ return None
125
+
126
+ """# With Classifier"""
127
+
128
+ def get_classifier_ner(bot_token):
129
+ data = classify_message(bot_token)
130
+
131
+ for text in data:
132
+ doc = nlp(text)
133
+ row = [text]
134
+ entities = {ent.label_: ent.text for ent in doc.ents}
135
+ for entity_type in entity_types:
136
+ row.append(entities.get(entity_type, ""))
137
+ # html = displacy.render(doc, style="ent")
138
+ # row.append(html)
139
+
140
+ num_cols = len(df.columns)
141
+ while len(row) < num_cols:
142
+ row.append("")
143
+
144
+ df.loc[len(df)] = row
145
+
146
+ df['Address'] = df.apply(create_address, axis=1)
147
+ df['Coordinates'] = df['Address'].apply(geocode_address)
148
+
149
+ return df
150
+
151
+ """## Without Classifier"""
152
+
153
+ def get_ner(bot_token):
154
+ data = get_data(bot_token)
155
+
156
+ for text in data:
157
+ doc = nlp(text)
158
+ row = [text]
159
+ entities = {ent.label_: ent.text for ent in doc.ents}
160
+ for entity_type in entity_types:
161
+ row.append(entities.get(entity_type, ""))
162
+ # html = displacy.render(doc, style="ent")
163
+ # row.append(html)
164
+
165
+ num_cols = len(df.columns)
166
+ while len(row) < num_cols:
167
+ row.append("")
168
+
169
+ df.loc[len(df)] = row
170
+
171
+ df['Address'] = df.apply(create_address, axis=1)
172
+ df['Coordinates'] = df['Address'].apply(geocode_address)
173
+
174
+ return df
175
+
176
+ """# Gradio"""
177
+
178
+ def process_ner_data(your_bot_token):
179
+ return get_ner(your_bot_token)
180
+
181
+ def process_classifier_ner_data(your_bot_token):
182
+ return get_classifier_ner(your_bot_token)
183
+
184
+
185
+ demo = gr.Blocks()
186
+
187
+ with demo:
188
+ gr.Markdown("Telegram Disaster Recovery Assistant")
189
+ with gr.Tabs():
190
+ with gr.TabItem("Structured Telegram Messages"):
191
+ with gr.Row():
192
+ your_bot_token = gr.Textbox(type='password', label="Enter your Bot Token")
193
+ ner_df = gr.Dataframe(headers=["NAME", "STREET", "NEIGHBORHOOD", "CITY", "PHONE NUMBER","YO!"])
194
+
195
+
196
+ classifier_ner_button = gr.Button("Get Classifier-NER Output")
197
+ ner_button = gr.Button("Get NER Output")
198
+ clear = gr.Button("Clear")
199
+
200
+ with gr.TabItem("User Guide"):
201
+ with gr.Row():
202
+ gr.Markdown("""This is an Telegram based Disaster Recovery Assist app that uses Named Entity Recognition to extract important entities from the unstructured text and stores it in an dataframe.
203
+ You need to provide your personal Telegram Bot API token (API token of the bot that is added to the channel as an administrator) to use this app.
204
+ Steps to create a Telegram Bot:
205
+ 1. Download the Telegram app on your device or use the web version.
206
+ 2. Open the app and search for the "BotFather" bot.
207
+ 3. Start a chat with the BotFather bot by clicking on the "START" button.
208
+ 4. Type "/newbot" and follow the on-screen instructions to create a new bot.
209
+ 5. Choose a name and username for your bot. \6. Once your bot is created, the BotFather will give you a unique API token.
210
+
211
+ Steps to add your telegram bot to your channel as an administrator:
212
+ 1. Create a new channel or choose an existing one that you want to use the bot in.
213
+ 2. Add your bot to the channel as an administrator. To do this, go to the channel settings, click on "Administrators", and then click on "Add Administrator". Search for your bot and add it to the channel.
214
+ 3. Now you can send commands to the bot in the channel by mentioning the bot using the "@" symbol followed by the bot's username. For example, "@my_bot help" will send the "help" command to the bot.
215
+
216
+ *If you get any errors or dependency issues, feel free to reach out to me!*""")
217
+
218
+
219
+
220
+ ner_button.click(process_ner_data,inputs=your_bot_token, outputs=ner_df)
221
+ classifier_ner_button.click(process_classifier_ner_data,inputs=your_bot_token, outputs=ner_df)
222
+ clear.click(lambda: None, None, ner_df, queue=True)
223
+
224
+ demo.queue(concurrency_count=3)
225
+ demo.launch(share=True, debug=True)