File size: 4,211 Bytes
d36d50b 75c487d d36d50b ebc546a d36d50b f81acf7 d36d50b bb42b73 d36d50b 75c487d d36d50b bb42b73 d36d50b bb42b73 ebc546a d36d50b bb42b73 d36d50b ebc546a bb42b73 d36d50b ebc546a bb42b73 d36d50b ebc546a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import os
import yaml
import gdown
import time
import gradio as gr
from predict import PredictTri
from gradio import blocks
output_path = "tashkeela-d2.pt"
gdrive_templ = "https://drive.google.com/file/d/{}/view?usp=sharing"
if not os.path.exists(output_path):
model_gdrive_id = "1FGelqImFkESbTyRsx_elkKIOZ9VbhRuo"
gdown.download(gdrive_templ.format(model_gdrive_id), output=output_path, quiet=False, fuzzy=True)
time.sleep(1)
output_path = "vocab.vec"
if not os.path.exists(output_path):
vocab_gdrive_id = "1-0muGvcSYEf8RAVRcwXay4MRex6kmCii"
gdown.download(gdrive_templ.format(vocab_gdrive_id), output=output_path, quiet=False, fuzzy=True)
with open("config.yaml", 'r', encoding="utf-8") as file:
config = yaml.load(file, Loader=yaml.FullLoader)
config["train"]["max-sent-len"] = config["predictor"]["window"]
config["train"]["max-token-count"] = config["predictor"]["window"] * 3
predictor = PredictTri(config)
def diacritze_full(text):
do_hard_mask = None
threshold = None
predictor.create_dataloader(text, False, do_hard_mask, threshold)
diacritized_lines = predictor.predict_partial(do_partial=False, lines=text.split('\n'))
return diacritized_lines
def diacritze_partial(text, mask_mode, threshold):
do_partial = True
predictor.create_dataloader(text, do_partial, mask_mode=="Hard", threshold)
diacritized_lines = predictor.predict_partial(do_partial=do_partial, lines=text.split('\n'))
return diacritized_lines
with gr.Blocks(theme=gr.themes.Default(text_size="lg")) as demo:
gr.Markdown(
"""
# Partial Diacritization: A Context-Contrastive Inference Approach
### Authors: Muhammad ElNokrashy, Badr AlKhamissi
### Paper Link: TBD
""")
with gr.Tab(label="Full Diacritization"):
full_input_txt = gr.Textbox(
placeholder="ุงูุชุจ ููุง",
lines=5,
label="Input",
type='text',
rtl=True,
text_align='right',
)
full_output_txt = gr.Textbox(
lines=5,
label="Output",
type='text',
rtl=True,
text_align='right',
show_copy_button=True,
)
full_btn = gr.Button(value="Shakkel")
full_btn.click(diacritze_full, inputs=[full_input_txt], outputs=[full_output_txt])
gr.Examples(
examples=[
"ููู ุญู
ู ู
ู ู
ุฌูุณ ุงูุฎูุงุฑ ุ ููู
ูู
ูุน ู
ู ุงูููุงู
"
],
inputs=full_input_txt,
outputs=full_output_txt,
fn=diacritze_full,
cache_examples=True,
)
with gr.Tab(label="Partial Diacritization") as partial_settings:
with gr.Row():
masking_mode = gr.Radio(choices=["Hard", "Soft"], value="Hard", label="Masking Mode")
threshold_slider = gr.Slider(label="Soft Masking Threshold", minimum=0, maximum=1, value=0.1)
partial_input_txt = gr.Textbox(
placeholder="ุงูุชุจ ููุง",
lines=5,
label="Input",
type='text',
rtl=True,
text_align='right',
)
partial_output_txt = gr.Textbox(
lines=5,
label="Output",
type='text',
rtl=True,
text_align='right',
show_copy_button=True,
)
partial_btn = gr.Button(value="Shakkel")
partial_btn.click(diacritze_partial, inputs=[partial_input_txt, masking_mode, threshold_slider], outputs=[partial_output_txt])
gr.Examples(
examples=[
["ููู ุญู
ู ู
ู ู
ุฌูุณ ุงูุฎูุงุฑ ุ ููู
ูู
ูุน ู
ู ุงูููุงู
", "Hard", 0],
],
inputs=[partial_input_txt, masking_mode, threshold_slider],
outputs=partial_output_txt,
fn=diacritze_partial,
cache_examples=True,
)
if __name__ == "__main__":
demo.queue().launch(
# share=False,
# debug=False,
# server_port=7860,
# server_name="0.0.0.0",
# ssl_verify=False,
# ssl_certfile="cert.pem",
# ssl_keyfile="key.pem"
)
|