File size: 7,638 Bytes
ceb43b7
2df040b
 
 
 
ceb43b7
 
2df040b
 
ceb43b7
2df040b
ceb43b7
 
 
 
 
2df040b
 
 
 
 
 
 
ceb43b7
2df040b
 
 
 
ceb43b7
 
 
2df040b
 
 
ceb43b7
2df040b
ceb43b7
 
 
2df040b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ceb43b7
 
 
 
 
 
 
 
 
2df040b
 
 
36f15ba
 
909c3a4
 
 
2df040b
 
 
909c3a4
 
2df040b
909c3a4
 
 
 
 
 
2df040b
 
36f15ba
2df040b
 
 
 
909c3a4
2df040b
 
 
909c3a4
 
2df040b
909c3a4
2df040b
909c3a4
 
 
 
 
 
 
 
2df040b
909c3a4
 
 
2df040b
 
 
 
 
42b2dcb
909c3a4
2df040b
 
 
 
 
42b2dcb
 
ceb43b7
 
 
b7b04eb
ceb43b7
 
 
2932744
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ceb43b7
 
2df040b
 
 
ceb43b7
 
2df040b
 
 
 
 
 
 
 
 
 
 
 
58a6b56
2df040b
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
import spaces  # isort:skip
import gradio as gr
from gr_nlp_toolkit import Pipeline

# Author: Lefteris Loukas
# Date: January 2025
# Description: A Gradio interface for the Greek NLP Toolkit (gr-nlp-toolkit), which includes Greeklish to Greek conversion, dependency parsing, part-of-speech tagging, and named entity recognition.
# Point-of-Contact: http://nlp.cs.aueb.gr/


# Initialize Pipelines
@spaces.GPU
def allocate_pipeline():
    nlp_pipeline = Pipeline("pos,ner,dp,g2g")
    return nlp_pipeline


G2G_PLACEHOLDER = "e.g., H thessaloniki einai mia poli sti boreia ellada"
NER_PLACEHOLDER = "e.g., Η Αργεντινή κέρδισε το Παγκόσμιο Κύπελλο το 2022"
POS_PLACEHOLDER = "e.g., Μου αρέσει να διαβάζω τα post του Andrew Ng στο Twitter."
DP_PLACEHOLDER = "e.g., Προτιμώ την πρωινή πτήση από την Αθήνα στη Θεσσαλονίκη."


@spaces.GPU
def greeklish_to_greek(text):
    if not text:
        text = G2G_PLACEHOLDER[5:]

    # doc = nlp_pos_ner_dp_with_g2g(text)
    nlp_pipeline = allocate_pipeline()
    doc = nlp_pipeline(text)
    return " ".join([token.text for token in doc.tokens])


@spaces.GPU
def process_text(text, task):
    # doc = nlp_pos_ner_dp_with_g2g(text)
    nlp_pipeline = allocate_pipeline()
    doc = nlp_pipeline(text)
    task_mapping = {
        "dp": lambda token: f"Text: {token.text}, Head: {token.head}, Deprel: {token.deprel}",
        "pos": lambda token: f"Text: {token.text}, UPOS: {token.upos}, Feats: {token.feats}",
        "ner": lambda token: f"Text: {token.text}, NER: {token.ner}",
    }
    return "\n".join([task_mapping[task](token) for token in doc.tokens])


def dependency_parsing(text):
    if not text:
        text = DP_PLACEHOLDER[5:]
    return process_text(text, "dp")


def pos_tagging(text):
    if not text:
        text = POS_PLACEHOLDER[5:]
    return process_text(text, "pos")


def named_entity_recognition(text):
    if not text:
        text = NER_PLACEHOLDER[5:]

    return process_text(text, "ner")


# Define the Gradio interface
def create_demo():
    theme = gr.themes.Soft()
    with gr.Blocks(theme=theme) as demo:
        gr.Markdown(
            """
        # GR-NLP-TOOLKIT Playground 🇬🇷

        <p align="left">
            <a href="https://github.com/nlpaueb/gr-nlp-toolkit">
                <img src="https://github.com/nlpaueb/gr-nlp-toolkit/blob/main/logo.png?raw=true" width="200">
            </a>
        </p>
        
        This is an interactive playground/demo for our open-source Python toolkit (`gr-nlp-toolkit`), which supports state-of-the-art natural language processing capabilities in Greek. 
        
        ## Key Features:
        - Named Entity Recognition (NER)
        - Part-of-Speech (POS) Tagging
        - Morphological Tagging
        - Dependency Parsing (DP)
        - Greeklish to Greek Conversion (G2G)
        
        """
        )

        with gr.Tab("Named Entity Recognition"):
            ner_input = gr.Textbox(
                label="Enter text",
                placeholder=NER_PLACEHOLDER,
            )
            ner_output = gr.Textbox(label="NER annotations")
            ner_button = gr.Button("Submit")
            ner_button.click(
                named_entity_recognition, inputs=ner_input, outputs=ner_output
            )

        with gr.Tab("POS and Morphological Tagging"):
            pos_input = gr.Textbox(
                label="Enter text",
                placeholder=POS_PLACEHOLDER,
            )
            pos_output = gr.Textbox(label="POS and Morphological Tagging annotations")
            pos_button = gr.Button("Submit")
            pos_button.click(pos_tagging, inputs=pos_input, outputs=pos_output)

        with gr.Tab("Dependency Parsing"):
            dp_input = gr.Textbox(
                label="Enter text",
                placeholder=DP_PLACEHOLDER,
            )
            dp_output = gr.Textbox(label="Dependency Parsing annotations")
            dp_button = gr.Button("Submit")
            dp_button.click(dependency_parsing, inputs=dp_input, outputs=dp_output)

        with gr.Tab("Greeklish to Greek"):
            g2g_input = gr.Textbox(
                label="Enter Greeklish text",
                placeholder=G2G_PLACEHOLDER,
            )
            g2g_output = gr.Textbox(label="Greek text")
            g2g_button = gr.Button("Submit")
            g2g_button.click(greeklish_to_greek, inputs=g2g_input, outputs=g2g_output)

        gr.Markdown(
            """

        ## Installation

        The Greek NLP toolkit is available on PyPI for Python 3.9+:
        
        ```sh
        pip install gr-nlp-toolkit
        ```

        ## Github Repository

        Visit the <a href="https://github.com/nlpaueb/gr-nlp-toolkit" target="_blank">GitHub repository</a> for more information, such as documentation and full usage examples.

        ## Paper
        The software was presented at COLING 2025. Read the full technical report/paper here: https://aclanthology.org/2025.coling-demos.17/

        If you use our toolkit, please cite it:
        ```bibtex
        @inproceedings{loukas-etal-coling2025-greek-nlp-toolkit,
            title = "{GR}-{NLP}-{TOOLKIT}: An Open-Source {NLP} Toolkit for {M}odern {G}reek",
            author = "Loukas, Lefteris  and
              Smyrnioudis, Nikolaos  and
              Dikonomaki, Chrysa  and
              Barbakos, Spiros  and
              Toumazatos, Anastasios  and
              Koutsikakis, John  and
              Kyriakakis, Manolis  and
              Georgiou, Mary  and
              Vassos, Stavros  and
              Pavlopoulos, John  and
              Androutsopoulos, Ion",
            editor = "Rambow, Owen  and
              Wanner, Leo  and
              Apidianaki, Marianna  and
              Al-Khalifa, Hend  and
              Eugenio, Barbara Di  and
              Schockaert, Steven  and
              Mather, Brodie  and
              Dras, Mark",
            booktitle = "Proceedings of the 31st International Conference on Computational Linguistics: System Demonstrations",
            month = jan,
            year = "2025",
            address = "Abu Dhabi, UAE",
            publisher = "Association for Computational Linguistics",
            url = "https://aclanthology.org/2025.coling-demos.17/",
            pages = "174--182",
        }
        ```

        ## About the Project
        
        [The Greek NLP Toolkit](https://github.com/nlpaueb/gr-nlp-toolkit) is the state-of-the-art natural language processing toolkit for modern Greek, maintained by the <a href="http://nlp.cs.aueb.gr/" target="_blank">Natural Language Processing Group at the Athens University of Economics and Business</a>.
        For technical questions, contact us via Github issues. For licensing and commercial inquiries, please contact us via the Contact page in the website.
        <br>
        <br>    

        <div style="text-align: center;">
            <a href="https://github.com/nlpaueb/gr-nlp-toolkit">
                <img src="https://img.shields.io/badge/GitHub-Repository-181717?logo=github" alt="GitHub" style="display: block; margin: auto;">
            </a>
            <a href="https://github.com/nlpaueb/gr-nlp-toolkit">https://github.com/nlpaueb/gr-nlp-toolkit</a>
        </div>

        

        © 2025 The Greek NLP Toolkit. All rights reserved.
        """
        )

    return demo


# Launch the Gradio interface
if __name__ == "__main__":
    demo = create_demo()

    DEPLOY_TO_THE_PUBLIC_FLAG = False
    demo.launch(share=DEPLOY_TO_THE_PUBLIC_FLAG)