Spaces:
Sleeping
Sleeping
File size: 3,314 Bytes
0d1ee8d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import json
import pandas as pd
import plotly.express as px
# Language codes predicted by language detection model
LANG_CODES = ['ar', 'bg', 'de', 'el', 'en', 'es', 'fr', 'hi', 'it', 'ja',
'nl', 'pl', 'pt', 'ru', 'sw', 'th', 'tr', 'ur', 'vi', 'zh']
COUNTRY_TO_LANG_CODE = {
'Algeria': 'ar',
'Chad': 'ar',
'Djibouti': 'ar',
'Egypt': 'ar',
'Iraq': 'ar',
'Jordan': 'ar',
'Kuwait': 'ar',
'Lebanon': 'ar',
'Libya': 'ar',
'Mali': 'ar',
'Mauritania': 'ar',
'Morocco': 'ar',
'Oman': 'ar',
'Palestine': 'ar',
'Qatar': 'ar',
'Saudi Arabia': 'ar',
'Somalia': 'ar',
'Sudan': 'ar',
'Syria': 'ar',
'Tunisia': 'ar',
'United Arab Emirates': 'ar',
'Yemen': 'ar',
'Bulgaria': 'bg',
'Germany': 'de',
'Greece': 'el',
'Cyprus': 'el',
'United States of America': 'en',
'Ireland': 'en',
'United Kingdom': 'en',
'Canada': 'en',
'Australia': 'en',
'Mexico': 'es',
'Mexico': 'es',
'Colombia': 'es',
'Spain': 'es',
'Argentina': 'es',
'Peru': 'es',
'Venezuela': 'es',
'Chile': 'es',
'Guatemala': 'es',
'Ecuador': 'es',
'Bolivia': 'es',
'Cuba': 'es',
'Dominican Rep.': 'es',
'Honduras': 'es',
'Paraguay': 'es',
'El Salvador': 'es',
'Nicaragua': 'es',
'Costa Rica': 'es',
'Panama': 'es',
'Uruguay': 'es',
'Guinea': 'es',
'France': 'fr',
'India': 'hi',
'Italy': 'it',
'Japan': 'ja',
'Netherlands': 'nl',
'Belgium': 'nl',
'Poland': 'pl',
'Portugal': 'pt',
'Russia': 'ru',
'Uganda': 'sw',
'Kenya': 'sw',
'Tanzania': 'sw',
'Thailand': 'th',
'Turkey': 'tr',
'Pakistan': 'ur',
'Vietnam': 'vi',
'China': 'zh'
}
def lang_map(df):
with open('data/countries.geo.json') as f:
countries = json.load(f)
country_list = [country['properties']['name']
for country in dict(countries)['features']]
LANG_CODES = df.value_counts('predicted_language')
countries_data = []
lang_count_data = []
lang_code_data = []
for country in country_list:
if country in COUNTRY_TO_LANG_CODE:
country_lang = COUNTRY_TO_LANG_CODE[country]
if country_lang in LANG_CODES.index:
countries_data.append(country)
lang_count = LANG_CODES.loc[COUNTRY_TO_LANG_CODE[country]]
lang_count_data.append(lang_count)
lang_code_data.append(country_lang)
lang_df = pd.DataFrame({
'country': countries_data,
'count': lang_count_data,
'lang_code': lang_code_data
})
fig = px.choropleth(
lang_df,
geojson=countries,
locations='country',
locationmode='country names',
color='count',
color_continuous_scale=[
[0, "rgb(45,45,48)"],
[0.33, "rgb(116,173,209)"],
[0.66, "rgb(255,255,0)"],
[1, "rgb(255,94,5)"]
],
scope='world',
hover_data=['lang_code'],
labels={'count': "Language Count"},
template='plotly_dark'
)
fig.update_geos(showcountries=True)
fig.update_layout(
title_text="Language Map",
margin={"r": 0, "t": 20, "l": 0, "b": 0}
)
return fig
|