File size: 4,211 Bytes
d36d50b
 
 
75c487d
d36d50b
 
ebc546a
d36d50b
 
f81acf7
d36d50b
 
bb42b73
d36d50b
75c487d
 
d36d50b
 
 
bb42b73
d36d50b
 
 
 
 
 
 
bb42b73
 
 
 
 
 
 
 
 
 
 
 
 
ebc546a
d36d50b
bb42b73
d36d50b
 
ebc546a
bb42b73
 
d36d50b
ebc546a
bb42b73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d36d50b
 
 
 
ebc546a
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import os
import yaml
import gdown 
import time
import gradio as gr
from predict import PredictTri
from gradio import blocks 

output_path = "tashkeela-d2.pt"
gdrive_templ = "https://drive.google.com/file/d/{}/view?usp=sharing"
if not os.path.exists(output_path):
    model_gdrive_id = "1FGelqImFkESbTyRsx_elkKIOZ9VbhRuo"
    gdown.download(gdrive_templ.format(model_gdrive_id), output=output_path, quiet=False, fuzzy=True)

time.sleep(1)

output_path = "vocab.vec"
if not os.path.exists(output_path):
    vocab_gdrive_id = "1-0muGvcSYEf8RAVRcwXay4MRex6kmCii"
    gdown.download(gdrive_templ.format(vocab_gdrive_id), output=output_path, quiet=False, fuzzy=True)

with open("config.yaml", 'r', encoding="utf-8") as file:
    config = yaml.load(file, Loader=yaml.FullLoader)

config["train"]["max-sent-len"] = config["predictor"]["window"]
config["train"]["max-token-count"] = config["predictor"]["window"] * 3

predictor = PredictTri(config)

def diacritze_full(text):
    do_hard_mask = None 
    threshold = None 
    predictor.create_dataloader(text, False, do_hard_mask, threshold)
    diacritized_lines = predictor.predict_partial(do_partial=False, lines=text.split('\n'))
    return diacritized_lines

def diacritze_partial(text, mask_mode, threshold):
    do_partial = True 
    predictor.create_dataloader(text, do_partial, mask_mode=="Hard", threshold)
    diacritized_lines = predictor.predict_partial(do_partial=do_partial, lines=text.split('\n'))
    return diacritized_lines

with gr.Blocks(theme=gr.themes.Default(text_size="lg")) as demo:
    gr.Markdown(
    """
    # Partial Diacritization: A Context-Contrastive Inference Approach
    ### Authors: Muhammad ElNokrashy, Badr AlKhamissi
    ### Paper Link: TBD
    """)

    with gr.Tab(label="Full Diacritization"):

        full_input_txt = gr.Textbox(
            placeholder="ุงูƒุชุจ ู‡ู†ุง", 
            lines=5, 
            label="Input",
            type='text',
            rtl=True,
            text_align='right',
        )

        full_output_txt = gr.Textbox(
            lines=5, 
            label="Output",
            type='text',
            rtl=True,  
            text_align='right',
            show_copy_button=True,
        )

        full_btn = gr.Button(value="Shakkel")
        full_btn.click(diacritze_full, inputs=[full_input_txt], outputs=[full_output_txt])

        gr.Examples(
            examples=[
                "ูˆู„ูˆ ุญู…ู„ ู…ู† ู…ุฌู„ุณ ุงู„ุฎูŠุงุฑ ุŒ ูˆู„ู… ูŠู…ู†ุน ู…ู† ุงู„ูƒู„ุงู…"
            ],
            inputs=full_input_txt,
            outputs=full_output_txt,
            fn=diacritze_full,
            cache_examples=True,
        )

    with gr.Tab(label="Partial Diacritization") as partial_settings:
        with gr.Row():
            masking_mode = gr.Radio(choices=["Hard", "Soft"], value="Hard", label="Masking Mode")
            threshold_slider = gr.Slider(label="Soft Masking Threshold", minimum=0, maximum=1, value=0.1)
        
        partial_input_txt = gr.Textbox(
            placeholder="ุงูƒุชุจ ู‡ู†ุง", 
            lines=5, 
            label="Input",
            type='text',
            rtl=True,
            text_align='right',
        )

        partial_output_txt = gr.Textbox(
            lines=5, 
            label="Output",
            type='text',
            rtl=True,  
            text_align='right',
            show_copy_button=True,
        )

        partial_btn = gr.Button(value="Shakkel")
        partial_btn.click(diacritze_partial, inputs=[partial_input_txt, masking_mode, threshold_slider], outputs=[partial_output_txt])

        gr.Examples(
            examples=[
                ["ูˆู„ูˆ ุญู…ู„ ู…ู† ู…ุฌู„ุณ ุงู„ุฎูŠุงุฑ ุŒ ูˆู„ู… ูŠู…ู†ุน ู…ู† ุงู„ูƒู„ุงู…", "Hard", 0],
            ],
            inputs=[partial_input_txt, masking_mode, threshold_slider],
            outputs=partial_output_txt,
            fn=diacritze_partial,
            cache_examples=True,
        )



if __name__ == "__main__":
    demo.queue().launch(
        # share=False,
        # debug=False,
        # server_port=7860,
        # server_name="0.0.0.0",
        # ssl_verify=False,
        # ssl_certfile="cert.pem",
        # ssl_keyfile="key.pem"
    )