File size: 4,758 Bytes
cab3f4d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53c86f7
cab3f4d
 
53c86f7
 
 
 
cab3f4d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53c86f7
 
b6e02e5
 
 
 
 
 
 
 
 
 
 
53c86f7
b6e02e5
53c86f7
b6e02e5
 
 
 
 
 
 
 
 
 
 
53c86f7
b6e02e5
 
cab3f4d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53c86f7
cab3f4d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8143d78
cab3f4d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import streamlit as st
from datasets import load_dataset
import pandas as pd
import plotly.graph_objects as go
from transformers import pipeline


@st.cache_data
def fetch_counts():
    dataset = load_dataset("atlasia/darija-translation", split="train")
    dataset = pd.DataFrame(dataset)
    n_eng = len(dataset["en"].dropna())
    n_fr = len(dataset["fr"].dropna())
    n = len(dataset)
    return {"n_eng": n_eng, "n_fr": n_fr, "n": n}


def terjman(input_text: str) -> str:
    pipe = pipeline("text2text-generation", model="atlasia/Terjman-Large")
    result = pipe(input_text, max_length=512)
    return result[0]["generated_text"]

def transliterate(input_text: str) -> str:
    pipe = pipeline("text2text-generation", model="atlasia/Transliteration-Moroccan-Darija")
    result = pipe(input_text, max_length=50)
    return result[0]["generated_text"]

if __name__ == "__main__":
    st.image("atlasia_white_wtext_nobg.png")

    counts = fetch_counts()
    n_goal = 100000
    total_submissions = counts["n"]

    st.text("")
    # center text
    st.markdown(
        """
    <h1 style='text-align: center; font-size: 20px;'>
    Contribute now to help build a better Darija dataset for all Moroccans
    Contribute here: <a href="https://atlasia.ma" target="_blank">https://atlasia.ma</a>
    </h1>
    """,
        unsafe_allow_html=True,
    )

    st.divider()
    
    # with st.container() as c:
    #     # add a block where users can input text and get a translation
    #     st.markdown(
    #         """
    #         <div style='text-align: center;'>
    #             <h3>🔠Keyboard: Transliterate Letters from Latin to Arabic</h3
    #             >
    #         </div>
    #         """,
    #         unsafe_allow_html=True,
    #     )
        
    #     col1, col2 = st.columns(2)

    #     with col1:
    #         input_text = st.text_area(":grey[Enter a word/letter in English ⬇]", "")
    #         button = st.button("Transliterate")
    #     with col2:
    #         if button:
    #             with st.spinner('Transliterating...'):
    #                 translation = transliterate(input_text)
    #             st.text_area(
    #                 ":grey[Transliteration]",
    #                 translation,
    #             )
                
    # st.divider()
    
    with st.container() as c:
        # add a block where users can input text and get a translation
        st.markdown(
            """
            <div style='text-align: center;'>
                <h3>💬Terjman: Translate to Darija</h3
                >
            </div>
            """,
            unsafe_allow_html=True,
        )
        st.caption(
            """
            <div style='text-align: center;'>
                <h3>This model has been developed thanks to your contributions. 
                While it's not perfect yet, your continued input is key for making it better.</h3>
            </div>
            """,
            unsafe_allow_html=True,
        )
        col1, col2 = st.columns(2)

        with col1:
            input_text = st.text_area(":grey[Enter a sentence in English ⬇]", "")
            button = st.button("Translate")
        with col2:
            if button:
                with st.spinner('Translating...'):
                    translation = terjman(input_text)
                st.text_area(
                    ":grey[Translation in Darija]",
                    translation,
                )

    # add a separator
    st.divider()
    st.markdown(
        """
        <div style='text-align: center;'>
            <h3>📊 Data statistics</h3
            >
        </div>
        """,
        unsafe_allow_html=True,
    )

    # make progress chart
    fig = go.Figure(
        go.Indicator(
            domain={"x": [0, 1], "y": [0, 1]},
            value=total_submissions,
            mode="gauge+number+delta",
            title={"text": "Number of translations"},
            delta={"reference": 42000},
            gauge={
                "axis": {"range": [0, n_goal]},
                "steps": [
                    {"range": [0, total_submissions], "color": "gray"},
                ],
                "threshold": {
                    "line": {"color": "green", "width": 4},
                    "thickness": 0.75,
                    "value": n_goal / 2,
                },
            },
        )
    )

    st.plotly_chart(fig, use_container_width=True)

    labels = ["English", "French"]
    values = [counts["n_eng"], counts["n_fr"]]
    # change color to blue and white
    fig = go.Figure(data=[go.Pie(labels=labels, values=values, pull=[0.2, 0])])
    fig.update_traces(marker=dict(colors=["#46607b", "#FFFFFF"]))
    st.plotly_chart(fig, use_container_width=True)