Spaces:
Runtime error
Runtime error
File size: 3,111 Bytes
3e22f77 31ae1e9 3e22f77 7a321ee 3e22f77 beb01b9 3e22f77 7a321ee 3e22f77 7a321ee 3e22f77 7a321ee 3e22f77 e0e99ed 3e22f77 31ae1e9 910a9eb 31ae1e9 910a9eb 31ae1e9 3e22f77 7a321ee 3e22f77 910a9eb 7a321ee 3e22f77 7a321ee 31ae1e9 7a321ee 3e22f77 7a321ee da3ad5a 3f23b29 e804a95 6534a76 7a321ee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import re
import requests
import gradio as gr
import pandas as pd
from transformers import pipeline
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
def process_tweet(tweet):
# remove links
tweet = re.sub('((www\.[\s]+)|(https?://[^\s]+))', '', tweet)
# remove usernames
tweet = re.sub('@[^\s]+', '', tweet)
# remove additional white spaces
tweet = re.sub('[\s]+', ' ', tweet)
# replace hashtags with words
tweet = re.sub(r'#([^\s]+)', r'\1', tweet)
# trim
tweet = tweet.strip('\'"')
return tweet
tokenizer = AutoTokenizer.from_pretrained(
"azamat/geocoder_coordinates_model"
)
relevancy_pipeline = pipeline("sentiment-analysis", model="azamat/geocoder_relevancy_model")
coordinates_model = AutoModelForSequenceClassification.from_pretrained(
"azamat/geocoder_coordinates_model",
)
def predict_relevancy(text):
outputs = relevancy_pipeline(text)
return outputs[0]['label'], outputs[0]['score']
def predict_coordinates(text):
encoding = tokenizer(text, padding="max_length", truncation=True, \
max_length=128, return_tensors='pt')
outputs = coordinates_model(**encoding)
return round(outputs[0][0][0].item(), 3), round(outputs[0][0][1].item(), 3)
def reverse_geocode(lat, lon):
payload = {
'lat' : lat,
'lon' : lon,
'zoom' : 12,
'format' : 'jsonv2',
'accept-language' : 'en'
}
try:
r = requests.get('https://geocode.maps.co/reverse', params=payload)
return r.json()['display_name']
except:
return "No data"
def predict(text):
text = process_tweet(text)
data = {
"relevancy_score" : 0,
"lat" : 0,
"lon" : 0,
"reversed lat/lon" : ""
}
relevancy_label, relevancy_score = predict_relevancy(text)
if relevancy_label == 'relevant':
data['relevancy_score'] = round(relevancy_score * 100, 2)
lat, lon = predict_coordinates(text)
data['lat'] = lat
data['lon'] = lon
reverse_geocoded = reverse_geocode(lat, lon)
data['reversed lat/lon'] = reverse_geocoded
return pd.DataFrame([data])
with gr.Blocks() as demo:
gr.Markdown("# **<p align='center'>Twitter geocoding with 🤗 Transformers</p>**")
gr.Markdown("### <div align='left'>Pipeline consists of:</div>")
gr.Markdown("### <div align='left'>1) Relevancy scoring model - predicts whether a tweet has geocoding related information</div>")
gr.Markdown("### <div align='left'>2) Coordinate predicting model - predicts exact latitude and longitude of user by tweet</div>")
gr.Markdown("### <div align='left'>3) Nominatim API for reverse geocoding lat/lon - uses open street map to reverse geocode lat and lon</div>")
inputs = gr.Textbox(placeholder="Enter the tweet")
outputs = [gr.Dataframe(label="Geocoded data")]
inputs.submit(predict, inputs=inputs, outputs=outputs)
if __name__ == "__main__":
demo.launch() |