Spaces:

molokhovdmitry
/

social-stat

Sleeping

File size: 3,314 Bytes

0d1ee8d

import json
import pandas as pd
import plotly.express as px

# Language codes predicted by language detection model
LANG_CODES = ['ar', 'bg', 'de', 'el', 'en', 'es', 'fr', 'hi', 'it', 'ja',
              'nl', 'pl', 'pt', 'ru', 'sw', 'th', 'tr', 'ur', 'vi', 'zh']

COUNTRY_TO_LANG_CODE = {
    'Algeria': 'ar',
    'Chad': 'ar',
    'Djibouti': 'ar',
    'Egypt': 'ar',
    'Iraq': 'ar',
    'Jordan': 'ar',
    'Kuwait': 'ar',
    'Lebanon': 'ar',
    'Libya': 'ar',
    'Mali': 'ar',
    'Mauritania': 'ar',
    'Morocco': 'ar',
    'Oman': 'ar',
    'Palestine': 'ar',
    'Qatar': 'ar',
    'Saudi Arabia': 'ar',
    'Somalia': 'ar',
    'Sudan': 'ar',
    'Syria': 'ar',
    'Tunisia': 'ar',
    'United Arab Emirates': 'ar',
    'Yemen': 'ar',
    'Bulgaria': 'bg',
    'Germany': 'de',
    'Greece': 'el',
    'Cyprus': 'el',
    'United States of America': 'en',
    'Ireland': 'en',
    'United Kingdom': 'en',
    'Canada': 'en',
    'Australia': 'en',
    'Mexico': 'es',
    'Mexico': 'es',
    'Colombia': 'es',
    'Spain': 'es',
    'Argentina': 'es',
    'Peru': 'es',
    'Venezuela': 'es',
    'Chile': 'es',
    'Guatemala': 'es',
    'Ecuador': 'es',
    'Bolivia': 'es',
    'Cuba': 'es',
    'Dominican Rep.': 'es',
    'Honduras': 'es',
    'Paraguay': 'es',
    'El Salvador': 'es',
    'Nicaragua': 'es',
    'Costa Rica': 'es',
    'Panama': 'es',
    'Uruguay': 'es',
    'Guinea': 'es',
    'France': 'fr',
    'India': 'hi',
    'Italy': 'it',
    'Japan': 'ja',
    'Netherlands': 'nl',
    'Belgium': 'nl',
    'Poland': 'pl',
    'Portugal': 'pt',
    'Russia': 'ru',
    'Uganda': 'sw',
    'Kenya': 'sw',
    'Tanzania': 'sw',
    'Thailand': 'th',
    'Turkey': 'tr',
    'Pakistan': 'ur',
    'Vietnam': 'vi',
    'China': 'zh'
}


def lang_map(df):
    with open('data/countries.geo.json') as f:
        countries = json.load(f)
    country_list = [country['properties']['name']
                    for country in dict(countries)['features']]
    LANG_CODES = df.value_counts('predicted_language')

    countries_data = []
    lang_count_data = []
    lang_code_data = []
    for country in country_list:
        if country in COUNTRY_TO_LANG_CODE:
            country_lang = COUNTRY_TO_LANG_CODE[country]
            if country_lang in LANG_CODES.index:
                countries_data.append(country)
                lang_count = LANG_CODES.loc[COUNTRY_TO_LANG_CODE[country]]
                lang_count_data.append(lang_count)
                lang_code_data.append(country_lang)
    lang_df = pd.DataFrame({
        'country': countries_data,
        'count': lang_count_data,
        'lang_code': lang_code_data
    })

    fig = px.choropleth(
        lang_df,
        geojson=countries,
        locations='country',
        locationmode='country names',
        color='count',
        color_continuous_scale=[
            [0, "rgb(45,45,48)"],
            [0.33, "rgb(116,173,209)"],
            [0.66, "rgb(255,255,0)"],
            [1, "rgb(255,94,5)"]
        ],
        scope='world',
        hover_data=['lang_code'],
        labels={'count': "Language Count"},
        template='plotly_dark'
    )
    fig.update_geos(showcountries=True)
    fig.update_layout(
        title_text="Language Map",
        margin={"r": 0, "t": 20, "l": 0, "b": 0}
    )

    return fig