Update app.py
Browse files
app.py
CHANGED
@@ -37,7 +37,7 @@ from geopy.geocoders import Nominatim
|
|
37 |
|
38 |
offset = None
|
39 |
|
40 |
-
def get_data(bot_token):
|
41 |
global offset
|
42 |
try:
|
43 |
if offset == None:
|
@@ -66,7 +66,7 @@ def get_data(bot_token):
|
|
66 |
|
67 |
"""# Classifier"""
|
68 |
|
69 |
-
def classify_message(bot_token):
|
70 |
error_msg = ['An error occurred. Possibly empty request result or your Telegram Bot Token is incorrect.']
|
71 |
disaster_docs = []
|
72 |
classifier = pipeline("sentiment-analysis", model="Madhana/disaster_msges_classifier_v1")
|
@@ -84,7 +84,7 @@ def classify_message(bot_token):
|
|
84 |
"""# NER Pipeline"""
|
85 |
|
86 |
@spacy.Language.component("disaster_ner")
|
87 |
-
def disaster_ner(doc):
|
88 |
matcher = PhraseMatcher(doc.vocab)
|
89 |
patterns = list(nlp.tokenizer.pipe(Tamil_words))
|
90 |
matcher.add("Tamil_words", None, *patterns)
|
@@ -98,12 +98,12 @@ Tamil_words = ['மதனா பாலா'] # umm, that's my name in Tamil, cons
|
|
98 |
nlp = spacy.load("en_pipeline")
|
99 |
nlp.add_pipe("disaster_ner", name="disaster_ner", before='ner')
|
100 |
|
101 |
-
def create_address(row):
|
102 |
return f"{row['STREET']}, {row['NEIGHBORHOOD']}, {row['CITY']}"
|
103 |
|
104 |
geolocator = Nominatim(user_agent="disaster-ner-app")
|
105 |
|
106 |
-
def geocode_address(address):
|
107 |
try:
|
108 |
location = geolocator.geocode(address)
|
109 |
return (location.latitude, location.longitude)
|
@@ -112,7 +112,7 @@ def geocode_address(address):
|
|
112 |
|
113 |
"""# With Classifier"""
|
114 |
|
115 |
-
def get_classifier_ner(bot_token):
|
116 |
data = classify_message(bot_token)
|
117 |
entity_types = ["NAME", "STREET", "NEIGHBORHOOD", "CITY", "PHONE NUMBER","YO!"]
|
118 |
df = pd.DataFrame(columns=["Text"] + entity_types)
|
@@ -139,7 +139,7 @@ def get_classifier_ner(bot_token):
|
|
139 |
|
140 |
"""## Without Classifier"""
|
141 |
|
142 |
-
def get_ner(bot_token):
|
143 |
data = get_data(bot_token)
|
144 |
entity_types = ["NAME", "STREET", "NEIGHBORHOOD", "CITY", "PHONE NUMBER","YO!"]
|
145 |
df = pd.DataFrame(columns=["Text"] + entity_types)
|
@@ -167,10 +167,10 @@ def get_ner(bot_token):
|
|
167 |
|
168 |
"""# Gradio"""
|
169 |
|
170 |
-
def process_ner_data(your_bot_token):
|
171 |
return get_ner(your_bot_token)
|
172 |
|
173 |
-
def process_classifier_ner_data(your_bot_token):
|
174 |
return get_classifier_ner(your_bot_token)
|
175 |
|
176 |
demo = gr.Blocks()
|
|
|
37 |
|
38 |
offset = None
|
39 |
|
40 |
+
def get_data(bot_token: str) -> List[str]:
|
41 |
global offset
|
42 |
try:
|
43 |
if offset == None:
|
|
|
66 |
|
67 |
"""# Classifier"""
|
68 |
|
69 |
+
def classify_message(bot_token: str) -> Union[List[str], List[str]]:
|
70 |
error_msg = ['An error occurred. Possibly empty request result or your Telegram Bot Token is incorrect.']
|
71 |
disaster_docs = []
|
72 |
classifier = pipeline("sentiment-analysis", model="Madhana/disaster_msges_classifier_v1")
|
|
|
84 |
"""# NER Pipeline"""
|
85 |
|
86 |
@spacy.Language.component("disaster_ner")
|
87 |
+
def disaster_ner(doc: spacy.tokens.Doc) -> spacy.tokens.Doc:
|
88 |
matcher = PhraseMatcher(doc.vocab)
|
89 |
patterns = list(nlp.tokenizer.pipe(Tamil_words))
|
90 |
matcher.add("Tamil_words", None, *patterns)
|
|
|
98 |
nlp = spacy.load("en_pipeline")
|
99 |
nlp.add_pipe("disaster_ner", name="disaster_ner", before='ner')
|
100 |
|
101 |
+
def create_address(row: pd.Series) -> str:
|
102 |
return f"{row['STREET']}, {row['NEIGHBORHOOD']}, {row['CITY']}"
|
103 |
|
104 |
geolocator = Nominatim(user_agent="disaster-ner-app")
|
105 |
|
106 |
+
def geocode_address(address: str) -> tuple:
|
107 |
try:
|
108 |
location = geolocator.geocode(address)
|
109 |
return (location.latitude, location.longitude)
|
|
|
112 |
|
113 |
"""# With Classifier"""
|
114 |
|
115 |
+
def get_classifier_ner(bot_token: str) -> pd.DataFrame:
|
116 |
data = classify_message(bot_token)
|
117 |
entity_types = ["NAME", "STREET", "NEIGHBORHOOD", "CITY", "PHONE NUMBER","YO!"]
|
118 |
df = pd.DataFrame(columns=["Text"] + entity_types)
|
|
|
139 |
|
140 |
"""## Without Classifier"""
|
141 |
|
142 |
+
def get_ner(bot_token: str) -> pd.DataFrame:
|
143 |
data = get_data(bot_token)
|
144 |
entity_types = ["NAME", "STREET", "NEIGHBORHOOD", "CITY", "PHONE NUMBER","YO!"]
|
145 |
df = pd.DataFrame(columns=["Text"] + entity_types)
|
|
|
167 |
|
168 |
"""# Gradio"""
|
169 |
|
170 |
+
def process_ner_data(your_bot_token) -> pd.DataFrame:
|
171 |
return get_ner(your_bot_token)
|
172 |
|
173 |
+
def process_classifier_ner_data(your_bot_token) -> pd.DataFrame:
|
174 |
return get_classifier_ner(your_bot_token)
|
175 |
|
176 |
demo = gr.Blocks()
|