File size: 5,391 Bytes
ada9d7c 4a39fff 55f9b9d be30936 05373fd 55f9b9d 4a39fff 982fd1b 4a39fff 87bbff8 4a39fff 8f83e8d 4a39fff 87bbff8 d74c2a3 4a39fff 87bbff8 4a39fff ada9d7c 9d572f3 55f9b9d b0cf51d eb0f569 ba91ea6 be30936 2559909 be30936 ee95081 be30936 ee95081 2559909 ee95081 2559909 ee95081 55f9b9d ba91ea6 bc4505b 144e6f4 93bb100 be30936 6c054fe ba91ea6 7471ca8 ada9d7c eb0f569 ada9d7c 6810129 55f9b9d 6810129 982fd1b 6810129 14600a6 55f9b9d 6810129 3293495 95b6f20 6810129 aeabbe6 6c054fe 95b6f20 6c054fe ada9d7c 55f9b9d cc397ec 55f9b9d cc397ec 55f9b9d 3293495 55f9b9d cc397ec 55f9b9d 242791e ada9d7c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import gradio as gr
import matplotlib.pyplot as plt
import networkx as nx
from model import Parser
parser = Parser()
def parse(text):
output = parser.parse(text)
dependency_tree = render_dependency_tree(output["forms"], output["heads"], output["deprel"])
table = render_table(output["forms"], output["lemmas"], output["upos"], output["xpos"], output["feats"], output["ne"])
return dependency_tree, table
def render_dependency_tree(words, parents, labels):
fig, ax = plt.subplots(figsize=(40, 16))
main_font_size = 40 if len(words) < 10 else 30 if len(words) < 20 else 24 if len(words) < 40 else 16
minor_font_size = 30 if len(words) < 10 else 22 if len(words) < 20 else 16 if len(words) < 40 else 12
pad = main_font_size // 2
# Create a directed graph
G = nx.DiGraph()
# Adding nodes to the graph
for i, word in enumerate(words):
G.add_node(i, label=word)
# Adding edges with labels
for i, (parent, label) in enumerate(zip(parents, labels)):
if parent != 0:
G.add_edge(parent - 1, i, label=label)
# Position nodes using Graphviz
pos = nx.nx_agraph.graphviz_layout(G, prog='dot')
# Draw the graph
nx.draw(G, pos, ax=ax, with_labels=True, labels=nx.get_node_attributes(G, 'label'),
arrows=True, node_color='#ffffff', node_size=0, node_shape='s', font_size=main_font_size, bbox = dict(facecolor="white", pad=pad)
)
# Draw edge labels
edge_labels = nx.get_edge_attributes(G, 'label')
nx.draw_networkx_edge_labels(G, pos, ax=ax, edge_labels=edge_labels, rotate=False, alpha=1.0, font_size=minor_font_size)
return fig
description = """
<div style="text-align: center;">
<h1>Norsk UD (Bokmål og Nynorsk)</h1>
<p align="center">
<img src="https://huggingface.co/ltg/norbert3-base/resolve/main/norbert.png" width=6.75%>
</p><p></p>
</div>
"""
def render_table(forms, lemmas, upos, xpos, feats, named_entities):
feats = [[f"*{f.split('=')[0]}:* {f.split('=')[1]}" for f in (feat.split("|")) if '=' in f] for feat in feats]
max_len = max(1, max([len(feat) for feat in feats]))
feats = [feat + [""] * (max_len - len(feat)) for feat in feats]
feats = list(zip(*feats))
named_entities_converted = []
for i, ne in enumerate(named_entities):
if ne == "O":
named_entities_converted.append("")
elif ne.startswith("B") and (i + 1 == len(named_entities) or named_entities[i + 1].startswith("I")):
named_entities_converted.append(f"<<— {ne.split('-')[1]} —")
elif ne.startswith("B"):
named_entities_converted.append(f"<<— {ne.split('-')[1]} —>>")
elif ne.startswith("I") and i + 1 < len(named_entities) and named_entities[i + 1].startswith("I"):
named_entities_converted.append("————")
else:
named_entities_converted.append(f"——>>")
array = [
[""] + forms,
["*LEMMAS:*"] + lemmas,
["*UPOS:*"] + upos,
["*XPOS:*"] + xpos,
["*UFEATS:*"] + list(feats[0]),
*([""] + list(row) for row in feats[1:]),
["*NE:*"] + named_entities_converted,
['' for _ in range(len(forms) + 1)]
]
return {"data": array[1:], "headers": array[0]}
custom_css = \
"""
/* Hide sort buttons at gr.DataFrame */
.sort-button {
display: none !important;
}
"""
with gr.Blocks(theme='sudeepshouche/minimalist', css=custom_css) as demo:
gr.HTML(description)
with gr.Row():
with gr.Column(scale=1, variant="panel"):
source = gr.Textbox(
label="Input sentence", placeholder="Write a sentence to parse", show_label=False, lines=1, max_lines=5, autofocus=True
)
submit = gr.Button("Submit", variant="primary")
with gr.Column(scale=1, variant="panel"):
dataset = gr.Dataset(components=[gr.Textbox(visible=False)],
label="Input examples",
samples=[
["Thomassen er på vei til sin neste gjerning."],
["På toppen av dette kom de metodiske utfordringer."],
["Berntsen har påtatt seg en både viktig og vanskelig oppgave."],
["Ikke bare har det vært et problem, som han selv skriver i forordet, å bli klok på Borten."],
["Statsministeren i Norges første brede og varige borgerlige koalisjonsregjering etterlot seg timelange radiointervjuer med tidligere Dagsnytt-redaktør Per Bøhn og 70-80 stappfulle esker med usorterte papirer på loft og i kjeller hjemme på gården i Flå."]
]
)
with gr.Column(scale=1, variant="panel"):
#gr.Label("", show_label=False, container=False)
table = gr.DataFrame([[""] * 42 for _ in range(8)], headers=[""] * 42, interactive=False, datatype="markdown")
dependency_plot = gr.Plot(None, container=False)
source.submit(
fn=parse, inputs=[source], outputs=[dependency_plot, table], queue=True
)
submit.click(
fn=parse, inputs=[source], outputs=[dependency_plot, table], queue=True
)
dataset.click(
fn=lambda text: text[0], inputs=[dataset], outputs=[source]
).then(
fn=parse, inputs=[source], outputs=[dependency_plot, table], queue=True
)
demo.queue(max_size=32)
demo.launch()
|