ReliefNer / app.py
Madhana's picture
Update app.py
efa9e01
raw
history blame
8.45 kB
# -*- coding: utf-8 -*-
"""disaster_help_ner_production.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1_wqnv01NeDdOLp2X1KT6WfvNgVwwTu5a
Note: This is a continuation of [this](https://colab.research.google.com/drive/1HlZLIVcAtWVeitZjWV3LclrH3gBwuymp?usp=sharing) notebook:
# Imports
"""
import json
import requests
import configparser
import spacy
import spacy_transformers
from spacy import displacy
from spacy.tokens import Span
from transformers import pipeline
from spacy.matcher import PhraseMatcher
import csv
import numpy as np
import pandas as pd
import geopy
import gradio as gr
from geopy.geocoders import Nominatim
"""# Telegram """
# bot_token = 'get from user using gradio'
offset = None
def get_data(bot_token):
global offset
try:
if offset == None:
response = requests.get("https://api.telegram.org/bot{}/getUpdates".format(bot_token))
response_json = json.loads(response.text)
last_update_id = int(response_json['result'][-1]['update_id'])
# without 'last_update_id + 1' there will be duplicate results
offset = last_update_id + 1
else:
response = requests.get('https://api.telegram.org/bot{}/getUpdates?offset={}'.format(bot_token, offset))
response_json = json.loads(response.text)
last_update_id = int(response_json['result'][-1]['update_id'])
# without 'last_update_id + 1' there will be duplicate results
offset = last_update_id + 1
text_list = [r['channel_post']['text'] for r in response_json['result']]
return text_list
except KeyError:
# print('An error occurred. Possibly empty request result or your Telegram Bot Token is incorrect.')
error_list = ['An error occurred. Possibly empty request result or your Telegram Bot Token is incorrect.']
return error_list
except Exception as e:
# print('An error occurred. Possibly empty request result or your Telegram Bot Token is incorrect.') #, e
error_list = ['An error occurred. Possibly empty request result or your Telegram Bot Token is incorrect.']
return error_list
get_data('6261742702:AAGkil1tWLckpBbC088rTO3Lb-lH2hJVmBg')
"""# Classifier"""
def classify_message(bot_token):
error_msg = ['An error occurred. Possibly empty request result or your Telegram Bot Token is incorrect.']
disaster_docs = []
classifier = pipeline("sentiment-analysis", model="Madhana/disaster_msges_classifier_v1")
results = []
for data in get_data(bot_token):
if data == error_msg[0]:
return error_msg
classification = classifier(data)
label = classification[0]['label']
results.append((data, label))
if label == 'DISASTER':
disaster_docs.append(data)
return disaster_docs
"""# NER Pipeline"""
@spacy.Language.component("disaster_ner")
def disaster_ner(doc):
matcher = PhraseMatcher(doc.vocab)
patterns = list(nlp.tokenizer.pipe(Tamil_words))
matcher.add("Tamil_words", None, *patterns)
matches = matcher(doc)
spans = [Span(doc, start, end, label="YO!") for match_id, start, end in matches]
doc.ents = spans
return doc
Tamil_words = ['மதனா பாலா'] # umm, that's my name in Tamil, consider this as a easter egg in this app lol.
nlp = spacy.load("en_pipeline")
nlp.add_pipe("disaster_ner", name="disaster_ner", before='ner')
def create_address(row):
return f"{row['STREET']}, {row['NEIGHBORHOOD']}, {row['CITY']}"
geolocator = Nominatim(user_agent="disaster-ner-app")
def geocode_address(address):
try:
location = geolocator.geocode(address)
return (location.latitude, location.longitude)
except:
return None
"""# With Classifier"""
def get_classifier_ner(bot_token):
data = classify_message(bot_token)
entity_types = ["NAME", "STREET", "NEIGHBORHOOD", "CITY", "PHONE NUMBER","YO!"]
df = pd.DataFrame(columns=["Text"] + entity_types)
for text in data:
doc = nlp(text)
row = [text]
entities = {ent.label_: ent.text for ent in doc.ents}
for entity_type in entity_types:
row.append(entities.get(entity_type, ""))
# html = displacy.render(doc, style="ent")
# row.append(html)
num_cols = len(df.columns)
while len(row) < num_cols:
row.append("")
df.loc[len(df)] = row
df['Address'] = df.apply(create_address, axis=1)
df['Coordinates'] = df['Address'].apply(geocode_address)
return df
"""## Without Classifier"""
def get_ner(bot_token):
data = get_data(bot_token)
entity_types = ["NAME", "STREET", "NEIGHBORHOOD", "CITY", "PHONE NUMBER","YO!"]
df = pd.DataFrame(columns=["Text"] + entity_types)
for text in data:
doc = nlp(text)
row = [text]
entities = {ent.label_: ent.text for ent in doc.ents}
for entity_type in entity_types:
row.append(entities.get(entity_type, ""))
# html = displacy.render(doc, style="ent")
# row.append(html)
num_cols = len(df.columns)
while len(row) < num_cols:
row.append("")
df.loc[len(df)] = row
df['Address'] = df.apply(create_address, axis=1)
df['Coordinates'] = df['Address'].apply(geocode_address)
return df
"""# Gradio"""
def process_ner_data(your_bot_token):
return get_ner(your_bot_token)
def process_classifier_ner_data(your_bot_token):
return get_classifier_ner(your_bot_token)
demo = gr.Blocks()
with demo:
gr.Markdown("Telegram Disaster Recovery Assistant")
with gr.Tabs():
with gr.TabItem("Structured Telegram Messages"):
with gr.Row():
your_bot_token = gr.Textbox(type='password', label="Enter your Bot Token")
ner_df = gr.Dataframe(headers=["NAME", "STREET", "NEIGHBORHOOD", "CITY", "PHONE NUMBER","YO!"])
classifier_ner_button = gr.Button("Get Classifier-NER Output")
ner_button = gr.Button("Get NER Output")
clear = gr.Button("Clear")
with gr.TabItem("User Guide"):
with gr.Row():
gr.Markdown("""This is an Telegram based Disaster Recovery Assist app that uses Named Entity Recognition to extract important entities from the unstructured text and stores it in an dataframe.
You need to provide your personal Telegram Bot API token (API token of the bot that is added to the channel as an administrator) to use this app.
**Steps to create a Telegram Bot**:
1. Download the Telegram app on your device or use the web version.
2. Open the app and search for the "BotFather" bot.
3. Start a chat with the BotFather bot by clicking on the "START" button.
4. Type "/newbot" and follow the on-screen instructions to create a new bot.
5. Choose a name and username for your bot. \6. Once your bot is created, the BotFather will give you a unique API token.
**Steps to add your telegram bot to your channel as an administrator**:
1. Create a new channel or choose an existing one that you want to use the bot in.
2. Add your bot to the channel as an administrator. To do this, go to the channel settings, click on "Administrators", and then click on "Add Administrator". Search for your bot and add it to the channel.
3. Now you can send commands to the bot in the channel by mentioning the bot using the "@" symbol followed by the bot's username. For example, "@my_bot help" will send the "help" command to the bot.
**Get Classifier-NER Output VS Get NER Output**:
The 'Get Classifier Ner Output' function first classifies the message as either a disaster message or a random message, and then applies the NER pipeline to the classified output. In contrast, the 'Get NER Output' function applies the NER pipeline directly to the message.
*If you get any errors or dependency issues, feel free to reach out to me!*""")
ner_button.click(process_ner_data,inputs=your_bot_token, outputs=ner_df)
classifier_ner_button.click(process_classifier_ner_data,inputs=your_bot_token, outputs=ner_df)
clear.click(lambda: None, None, ner_df, queue=True)
demo.queue(concurrency_count=3)
demo.launch() # share=True, debug=True