azamat's picture
Add some beauty
3f23b29
raw
history blame
2.98 kB
import re
import requests
import gradio as gr
import pandas as pd
from transformers import pipeline
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
def process_tweet(tweet):
# remove links
tweet = re.sub('((www\.[\s]+)|(https?://[^\s]+))', '', tweet)
# remove usernames
tweet = re.sub('@[^\s]+', '', tweet)
# remove additional white spaces
tweet = re.sub('[\s]+', ' ', tweet)
# replace hashtags with words
tweet = re.sub(r'#([^\s]+)', r'\1', tweet)
# trim
tweet = tweet.strip('\'"')
return tweet #if len(tweet) > 0 else ""
tokenizer = AutoTokenizer.from_pretrained(
"azamat/geocoder_coordinates_model"
)
relevancy_pipeline = pipeline("sentiment-analysis", model="azamat/geocoder_relevancy_model")
coordinates_model = AutoModelForSequenceClassification.from_pretrained(
"azamat/geocoder_coordinates_model",
)
def predict_relevancy(text):
outputs = relevancy_pipeline(text)
return outputs[0]['label'], outputs[0]['score']
def predict_coordinates(text):
encoding = tokenizer(text, padding="max_length", truncation=True, \
max_length=128, return_tensors='pt')
outputs = coordinates_model(**encoding)
return round(outputs[0][0][0].item(), 3), round(outputs[0][0][1].item(), 3)
def reverse_geocode(lat, lon):
payload = {
'lat' : lat,
'lon' : lon,
'zoom' : 12,
'format' : 'jsonv2',
'accept-language' : 'en'
}
try:
r = requests.get('https://geocode.maps.co/reverse', params=payload)
return r.json()['display_name']
except:
return "No data"
def predict(text):
text = process_tweet(text)
data = {
"relevancy_score" : 0,
"lat" : 0,
"lon" : 0,
"reversed lat/lon" : ""
}
relevancy_label, relevancy_score = predict_relevancy(text)
if relevancy_label == 'relevant':
data['relevancy_score'] = round(relevancy_score * 100, 2)
lat, lon = predict_coordinates(text)
data['lat'] = lat
data['lon'] = lon
reverse_geocoded = reverse_geocode(lat, lon)
data['reversed lat/lon'] = reverse_geocoded
return pd.DataFrame([data])
with gr.Blocks() as demo:
gr.Markdown("# **<p align='center'>Twitter geocoding with 🤗 Transformers</p>**")
gr.Markdown("# **<div align='center'>Pipeline consists of:</div>**")
gr.Markdown("# **<div align='center'>1) Relevancy scoring model</div>**")
gr.Markdown("# **<div align='center'>2) Coordinate predicting model</div>**")
gr.Markdown("# **<div align='center'>3) Nominatim API for reverse geocoding lat/lon</div>**")
inputs = gr.Textbox(placeholder="Enter the tweet")
outputs = [gr.Dataframe(label="Geocoded data")]
inputs.submit(predict, inputs=inputs, outputs=outputs)
if __name__ == "__main__":
demo.launch()