File size: 5,391 Bytes
ada9d7c
4a39fff
 
 
55f9b9d
 
 
 
 
 
 
 
be30936
05373fd
55f9b9d
 
 
4a39fff
 
982fd1b
4a39fff
87bbff8
 
 
 
4a39fff
 
 
 
 
 
 
 
 
8f83e8d
 
4a39fff
 
 
 
 
 
87bbff8
d74c2a3
4a39fff
 
 
87bbff8
4a39fff
 
ada9d7c
 
 
 
 
 
 
 
 
 
 
9d572f3
55f9b9d
b0cf51d
 
eb0f569
 
ba91ea6
be30936
2559909
 
be30936
ee95081
 
be30936
ee95081
2559909
ee95081
2559909
ee95081
55f9b9d
ba91ea6
bc4505b
144e6f4
 
 
 
93bb100
be30936
6c054fe
ba91ea6
 
7471ca8
ada9d7c
 
eb0f569
 
 
 
 
 
 
 
ada9d7c
6810129
 
55f9b9d
6810129
982fd1b
6810129
 
14600a6
55f9b9d
6810129
 
 
3293495
 
 
95b6f20
 
6810129
 
 
aeabbe6
6c054fe
95b6f20
6c054fe
ada9d7c
55f9b9d
cc397ec
55f9b9d
 
cc397ec
55f9b9d
 
3293495
55f9b9d
cc397ec
55f9b9d
 
 
242791e
ada9d7c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import gradio as gr
import matplotlib.pyplot as plt
import networkx as nx

from model import Parser


parser = Parser()

def parse(text):
    output = parser.parse(text)

    dependency_tree = render_dependency_tree(output["forms"], output["heads"], output["deprel"])
    table = render_table(output["forms"], output["lemmas"], output["upos"], output["xpos"], output["feats"], output["ne"])

    return dependency_tree, table


def render_dependency_tree(words, parents, labels):
    fig, ax = plt.subplots(figsize=(40, 16))

    main_font_size = 40 if len(words) < 10 else 30 if len(words) < 20 else 24 if len(words) < 40 else 16
    minor_font_size = 30 if len(words) < 10 else 22 if len(words) < 20 else 16 if len(words) < 40 else 12
    pad = main_font_size // 2

    # Create a directed graph
    G = nx.DiGraph()

    # Adding nodes to the graph
    for i, word in enumerate(words):
        G.add_node(i, label=word)

    # Adding edges with labels
    for i, (parent, label) in enumerate(zip(parents, labels)):
        if parent != 0:
            G.add_edge(parent - 1, i, label=label)

    # Position nodes using Graphviz
    pos = nx.nx_agraph.graphviz_layout(G, prog='dot')

    # Draw the graph
    nx.draw(G, pos, ax=ax, with_labels=True, labels=nx.get_node_attributes(G, 'label'), 
            arrows=True, node_color='#ffffff', node_size=0, node_shape='s', font_size=main_font_size, bbox = dict(facecolor="white", pad=pad)
    )

    # Draw edge labels
    edge_labels = nx.get_edge_attributes(G, 'label')
    nx.draw_networkx_edge_labels(G, pos, ax=ax, edge_labels=edge_labels, rotate=False, alpha=1.0, font_size=minor_font_size)

    return fig


description = """
<div style="text-align: center;">
    <h1>Norsk UD (Bokmål og Nynorsk)</h1>
    <p align="center">
        <img src="https://huggingface.co/ltg/norbert3-base/resolve/main/norbert.png" width=6.75%>
    </p><p></p>
</div>
"""


def render_table(forms, lemmas, upos, xpos, feats, named_entities):
    feats = [[f"*{f.split('=')[0]}:* {f.split('=')[1]}" for f in (feat.split("|")) if '=' in f] for feat in feats]
    max_len = max(1, max([len(feat) for feat in feats]))
    feats = [feat + [""] * (max_len - len(feat)) for feat in feats]
    feats = list(zip(*feats))

    named_entities_converted = []
    for i, ne in enumerate(named_entities):
        if ne == "O":
            named_entities_converted.append("")
        elif ne.startswith("B") and (i + 1 == len(named_entities) or named_entities[i + 1].startswith("I")):
            named_entities_converted.append(f"<<— {ne.split('-')[1]} —")
        elif ne.startswith("B"):
            named_entities_converted.append(f"<<— {ne.split('-')[1]} —>>")
        elif ne.startswith("I") and i + 1 < len(named_entities) and named_entities[i + 1].startswith("I"):
            named_entities_converted.append("————")
        else:
            named_entities_converted.append(f"——>>")

    array = [
        [""] + forms,
        ["*LEMMAS:*"] + lemmas,
        ["*UPOS:*"] + upos,
        ["*XPOS:*"] + xpos,
        ["*UFEATS:*"] + list(feats[0]),
        *([""] + list(row) for row in feats[1:]),
        ["*NE:*"] + named_entities_converted,
        ['' for _ in range(len(forms) + 1)]
    ]

    return {"data": array[1:], "headers": array[0]}


custom_css = \
"""
/* Hide sort buttons at gr.DataFrame */
.sort-button {
    display: none !important;
}
"""
with gr.Blocks(theme='sudeepshouche/minimalist', css=custom_css) as demo:
    gr.HTML(description)

    with gr.Row():
        with gr.Column(scale=1, variant="panel"):
            source = gr.Textbox(
                label="Input sentence", placeholder="Write a sentence to parse", show_label=False, lines=1, max_lines=5, autofocus=True
            )
            submit = gr.Button("Submit", variant="primary")

        with gr.Column(scale=1, variant="panel"):
            dataset = gr.Dataset(components=[gr.Textbox(visible=False)],
                label="Input examples",
                samples=[
                    ["Thomassen er på vei til sin neste gjerning."],
                    ["På toppen av dette kom de metodiske utfordringer."],
                    ["Berntsen har påtatt seg en både viktig og vanskelig oppgave."],
                    ["Ikke bare har det vært et problem, som han selv skriver i forordet, å bli klok på Borten."],
                    ["Statsministeren i Norges første brede og varige borgerlige koalisjonsregjering etterlot seg timelange radiointervjuer med tidligere Dagsnytt-redaktør Per Bøhn og 70-80 stappfulle esker med usorterte papirer på loft og i kjeller hjemme på gården i Flå."]
                ]
            )

    with gr.Column(scale=1, variant="panel"):
        #gr.Label("", show_label=False, container=False)
        table = gr.DataFrame([[""] * 42 for _ in range(8)], headers=[""] * 42, interactive=False, datatype="markdown")
        dependency_plot = gr.Plot(None, container=False)

    source.submit(
        fn=parse, inputs=[source], outputs=[dependency_plot, table], queue=True
    )
    submit.click(
        fn=parse, inputs=[source], outputs=[dependency_plot, table], queue=True
    )
    dataset.click(
        fn=lambda text: text[0], inputs=[dataset], outputs=[source]
    ).then(
        fn=parse, inputs=[source], outputs=[dependency_plot, table], queue=True
    )


demo.queue(max_size=32)
demo.launch()