import gradio as gr
from gr_nlp_toolkit import Pipeline
# Author: Lefteris Loukas
# Date: August 2024
# Description: A Gradio interface for the Grεεk NLP Toolkit, which includes Greeklish to Greek conversion, dependency parsing, part-of-speech tagging, and named entity recognition.
# Point-of-Contact: http://nlp.cs.aueb.gr/
# Initialize Pipelines
nlp_pos_ner_dp_with_g2g = Pipeline("pos,ner,dp,g2g")
G2G_PLACEHOLDER = "e.g., H thessaloniki einai mia poli sti boreia ellada"
NER_PLACEHOLDER = "e.g., Η Αργεντινή κέρδισε το Παγκόσμιο Κύπελλο το 2022"
POS_PLACEHOLDER = "e.g., Μου αρέσει να διαβάζω τα post του Andrew Ng στο Twitter."
DP_PLACEHOLDER = "e.g., Προτιμώ την πρωινή πτήση από την Αθήνα στη Θεσσαλονίκη."
def greeklish_to_greek(text):
if not text:
text = G2G_PLACEHOLDER[5:]
doc = nlp_pos_ner_dp_with_g2g(text)
return " ".join([token.text for token in doc.tokens])
def process_text(text, task):
doc = nlp_pos_ner_dp_with_g2g(text)
task_mapping = {
"dp": lambda token: f"Text: {token.text}, Head: {token.head}, Deprel: {token.deprel}",
"pos": lambda token: f"Text: {token.text}, UPOS: {token.upos}, Feats: {token.feats}",
"ner": lambda token: f"Text: {token.text}, NER: {token.ner}",
}
return "\n".join([task_mapping[task](token) for token in doc.tokens])
def dependency_parsing(text):
if not text:
text = DP_PLACEHOLDER[5:]
return process_text(text, "dp")
def pos_tagging(text):
if not text:
text = POS_PLACEHOLDER[5:]
return process_text(text, "pos")
def named_entity_recognition(text):
if not text:
text = NER_PLACEHOLDER[5:]
return process_text(text, "ner")
# Define the Gradio interface
def create_demo():
theme = gr.themes.Soft()
with gr.Blocks(theme=theme) as demo:
gr.Markdown(
"""
# The Grεεk NLP Toolkit 🇬🇷
This is a demonstration space for our open-source Python toolkit (`gr-nlp-toolkit`), which supports state-of-the-art natural language processing capabilities in Greek.
## Key Features:
- Greeklish to Greek Conversion (G2G)
- Dependency Parsing (DP)
- Part-of-Speech (POS) Tagging
- Named Entity Recognition (NER)
"""
)
with gr.Tab("Greeklish to Greek"):
g2g_input = gr.Textbox(
label="Enter Greeklish text",
placeholder=G2G_PLACEHOLDER,
)
g2g_output = gr.Textbox(label="Greek text")
g2g_button = gr.Button("Submit")
g2g_button.click(greeklish_to_greek, inputs=g2g_input, outputs=g2g_output)
with gr.Tab("Dependency Parsing"):
dp_input = gr.Textbox(
label="Enter text",
placeholder=DP_PLACEHOLDER,
)
dp_output = gr.Textbox(label="Dependency Parsing annotations")
dp_button = gr.Button("Submit")
dp_button.click(dependency_parsing, inputs=dp_input, outputs=dp_output)
with gr.Tab("Part-of-Speech Tagging"):
pos_input = gr.Textbox(
label="Enter text",
placeholder=POS_PLACEHOLDER,
)
pos_output = gr.Textbox(label="POS Tagging annotations")
pos_button = gr.Button("Submit")
pos_button.click(pos_tagging, inputs=pos_input, outputs=pos_output)
with gr.Tab("Named Entity Recognition"):
ner_input = gr.Textbox(
label="Enter text",
placeholder=NER_PLACEHOLDER,
)
ner_output = gr.Textbox(label="NER annotations")
ner_button = gr.Button("Submit")
ner_button.click(
named_entity_recognition, inputs=ner_input, outputs=ner_output
)
gr.Markdown(
"""
## Installation
The Grεεk NLP toolkit is available on PyPI for Python 3.9+:
```sh
pip install gr-nlp-toolkit
```
## Github Repository
Visit the [GitHub repository]("https://github.com/nlpaueb/gr-nlp-toolkit") for more information, such as documentation and full usage examples.
## About the Project
The Greek NLP Toolkit is the state-of-the-art natural language processing toolkit for modern Greek, [developed by the Natural Language Processing Group at the Athens University of Economics and Business](http://nlp.cs.aueb.gr/).
It supports named entity recognition, part-of-speech tagging, morphological tagging, dependency parsing,
and Greeklish to Greek conversion. This project is part of ongoing research aimed at advancing Greek language processing capabilities.