File size: 5,558 Bytes
2df040b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e54f46d
42b2dcb
2df040b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42b2dcb
2df040b
 
 
 
 
 
42b2dcb
 
2df040b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import gradio as gr
from gr_nlp_toolkit import Pipeline

# Author: Lefteris Loukas
# Date: August 2024
# Description: A Gradio interface for the Grεεk NLP Toolkit, which includes Greeklish to Greek conversion, dependency parsing, part-of-speech tagging, and named entity recognition.
# Point-of-Contact: http://nlp.cs.aueb.gr/

# Initialize Pipelines
nlp_pos_ner_dp_with_g2g = Pipeline("pos,ner,dp,g2g")

G2G_PLACEHOLDER = "e.g., H thessaloniki einai mia poli sti boreia ellada"
NER_PLACEHOLDER = "e.g., Η Αργεντινή κέρδισε το Παγκόσμιο Κύπελλο το 2022"
POS_PLACEHOLDER = "e.g., Μου αρέσει να διαβάζω τα post του Andrew Ng στο Twitter."
DP_PLACEHOLDER = "e.g., Προτιμώ την πρωινή πτήση από την Αθήνα στη Θεσσαλονίκη."


def greeklish_to_greek(text):
    if not text:
        text = G2G_PLACEHOLDER[5:]

    doc = nlp_pos_ner_dp_with_g2g(text)
    return " ".join([token.text for token in doc.tokens])


def process_text(text, task):
    doc = nlp_pos_ner_dp_with_g2g(text)
    task_mapping = {
        "dp": lambda token: f"Text: {token.text}, Head: {token.head}, Deprel: {token.deprel}",
        "pos": lambda token: f"Text: {token.text}, UPOS: {token.upos}, Feats: {token.feats}",
        "ner": lambda token: f"Text: {token.text}, NER: {token.ner}",
    }
    return "\n".join([task_mapping[task](token) for token in doc.tokens])


def dependency_parsing(text):
    if not text:
        text = DP_PLACEHOLDER[5:]
    return process_text(text, "dp")


def pos_tagging(text):
    if not text:
        text = POS_PLACEHOLDER[5:]
    return process_text(text, "pos")


def named_entity_recognition(text):
    if not text:
        text = NER_PLACEHOLDER[5:]

    return process_text(text, "ner")


# Define the Gradio interface
def create_demo():
    theme = gr.themes.Soft()
    with gr.Blocks(theme=theme) as demo:
        gr.Markdown(
            """
        # The Grεεk NLP Toolkit 🇬🇷
        This is a demonstration space for our open-source Python toolkit (`gr-nlp-toolkit`), which supports state-of-the-art natural language processing capabilities in Greek. 
        
        ## Key Features:
        - Greeklish to Greek Conversion (G2G)
        - Dependency Parsing (DP)
        - Part-of-Speech (POS) Tagging
        - Named Entity Recognition (NER)
        """
        )

        with gr.Tab("Greeklish to Greek"):
            g2g_input = gr.Textbox(
                label="Enter Greeklish text",
                placeholder=G2G_PLACEHOLDER,
            )
            g2g_output = gr.Textbox(label="Greek text")
            g2g_button = gr.Button("Submit")
            g2g_button.click(greeklish_to_greek, inputs=g2g_input, outputs=g2g_output)

        with gr.Tab("Dependency Parsing"):
            dp_input = gr.Textbox(
                label="Enter text",
                placeholder=DP_PLACEHOLDER,
            )
            dp_output = gr.Textbox(label="Dependency Parsing annotations")
            dp_button = gr.Button("Submit")
            dp_button.click(dependency_parsing, inputs=dp_input, outputs=dp_output)

        with gr.Tab("Part-of-Speech Tagging"):
            pos_input = gr.Textbox(
                label="Enter text",
                placeholder=POS_PLACEHOLDER,
            )
            pos_output = gr.Textbox(label="POS Tagging annotations")
            pos_button = gr.Button("Submit")
            pos_button.click(pos_tagging, inputs=pos_input, outputs=pos_output)

        with gr.Tab("Named Entity Recognition"):
            ner_input = gr.Textbox(
                label="Enter text",
                placeholder=NER_PLACEHOLDER,
            )
            ner_output = gr.Textbox(label="NER annotations")
            ner_button = gr.Button("Submit")
            ner_button.click(
                named_entity_recognition, inputs=ner_input, outputs=ner_output
            )

        gr.Markdown(
            """

        ## Installation

        The Grεεk NLP toolkit is available on PyPI for Python 3.9+:
        
        ```sh
        pip install gr-nlp-toolkit
        ```

        ## Github Repository

        Visit the [GitHub repository]("https://github.com/nlpaueb/gr-nlp-toolkit") for more information, such as documentation and full usage examples.

        ## About the Project
        
        The Greek NLP Toolkit is the state-of-the-art natural language processing toolkit for modern Greek, [developed by the Natural Language Processing Group at the Athens University of Economics and Business](http://nlp.cs.aueb.gr/).
        It supports named entity recognition, part-of-speech tagging, morphological tagging, dependency parsing, 
        and Greeklish to Greek conversion. This project is part of ongoing research aimed at advancing Greek language processing capabilities.
        <br>
        <br>    

        <div style="text-align: center;">
            <a href="https://github.com/nlpaueb/gr-nlp-toolkit">
                <img src="https://img.shields.io/badge/GitHub-Repository-181717?logo=github" alt="GitHub" style="display: block; margin: auto;">
            </a>
            <a href="https://github.com/nlpaueb/gr-nlp-toolkit">https://github.com/nlpaueb/gr-nlp-toolkit</a>
        </div>

        

        © 2024 The Greek NLP Toolkit. All rights reserved.
        """
        )

    return demo


# Launch the Gradio interface
if __name__ == "__main__":
    demo = create_demo()

    DEPLOY_TO_THE_PUBLIC_FLAG = False
    demo.launch(share=DEPLOY_TO_THE_PUBLIC_FLAG)