File size: 7,230 Bytes
425b806
 
 
 
 
 
 
 
 
 
 
 
 
dc6fc47
425b806
 
 
 
 
 
 
 
 
d3d5ad6
425b806
dc6fc47
7aac388
 
dc6fc47
 
425b806
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
089ec5a
 
 
 
425b806
 
 
089ec5a
 
 
 
425b806
 
 
089ec5a
 
 
 
425b806
 
 
089ec5a
 
 
 
425b806
 
 
 
 
 
491b1ef
 
 
 
425b806
 
 
491b1ef
 
 
 
425b806
 
 
 
e6e0497
 
 
 
 
 
 
 
 
 
 
 
b893e70
 
 
 
7ab2a7e
7aac388
d3d5ad6
 
e6e0497
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import json
import pickle

import gradio as gr
import matplotlib.pyplot as plt
import numpy as np


def plot_scalar_on_scale(scalar_value, distance_type):
    # Ensure the scalar is within bounds
    scalar_value = np.clip(scalar_value, 0.0, 1.0)

    # Create a figure and axis
    fig, ax = plt.subplots(figsize=(8, 2))

    # Create a horizontal gradient (from close to distant)
    gradient = np.linspace(0, 1, 256).reshape(1, -1)
    ax.imshow(gradient, extent=[0, 1, 0, 1], aspect='auto', cmap='viridis_r')

    # Plot the scalar value as a vertical line
    ax.axvline(x=scalar_value, color='white', lw=5)

    # Add a dot at the scalar position
    ax.plot(scalar_value, 0.5, 'o', color='white', markersize=42)

    ax.text(scalar_value, 0.5, f'{scalar_value:.2f}', color='black', ha='center', va='center', fontsize=14)

    # Add labels rotated 90 degrees on the sides
    ax.text(-0.03, 0.5, 'Close', ha='center', va='center', fontsize=14, rotation=90)
    ax.text(1.03, 0.5, 'Distant', ha='center', va='center', fontsize=14, rotation=270)

    # Customize the axis
    ax.set_xticks([])  # Remove x-axis ticks
    ax.set_yticks([])  # Remove y-axis ticks
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 1)
    ax.set_title(distance_type)

    # Remove spines for a cleaner look
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.spines['left'].set_visible(False)

    return fig

    # Show the plot
    # plt.tight_layout()
    # plt.show()


def load_json_from_path(path):
    with open(path, "r", encoding="utf8") as f:
        obj = json.loads(f.read())

    return obj


class Measurer:
    def __init__(self):

        # learned dist
        tree_lookup_path = "lang_1_to_lang_2_to_learned_dist.json"
        self.learned_dist_func = load_json_from_path(tree_lookup_path)

        # tree dist
        tree_lookup_path = "lang_1_to_lang_2_to_tree_dist.json"
        self.tree_dist_func = load_json_from_path(tree_lookup_path)

        # map dist
        map_lookup_path = "lang_1_to_lang_2_to_map_dist.json"
        self.map_dist_func = load_json_from_path(map_lookup_path)
        largest_value_map_dist = 0.0
        for _, values in self.map_dist_func.items():
            for _, value in values.items():
                largest_value_map_dist = max(largest_value_map_dist, value)
        for key1 in self.map_dist_func:
            for key2 in self.map_dist_func[key1]:
                self.map_dist_func[key1][key2] = self.map_dist_func[key1][key2] / largest_value_map_dist

        # ASP
        asp_dict_path = "asp_dict.pkl"
        with open(asp_dict_path, 'rb') as dictfile:
            asp_sim = pickle.load(dictfile)
        lang_list = list(asp_sim.keys())
        self.asp_dist_func = dict()
        seen_langs = set()
        for lang_1 in lang_list:
            if lang_1 not in seen_langs:
                seen_langs.add(lang_1)
                self.asp_dist_func[lang_1] = dict()
            for index, lang_2 in enumerate(lang_list):
                if lang_2 not in seen_langs:  # it's symmetric
                    self.asp_dist_func[lang_1][lang_2] = 1 - asp_sim[lang_1][index]

    def get_dists(self, l1, l2):
        if l1 in self.tree_dist_func:
            if l2 in self.tree_dist_func[l1]:
                tree_dist = self.tree_dist_func[l1][l2]
            else:
                tree_dist = self.tree_dist_func[l2][l1]
        else:
            tree_dist = self.tree_dist_func[l2][l1]
        if l1 in self.map_dist_func:
            if l2 in self.map_dist_func[l1]:
                map_dist = self.map_dist_func[l1][l2]
            else:
                map_dist = self.map_dist_func[l2][l1]
        else:
            map_dist = self.map_dist_func[l2][l1]
        if l1 in self.asp_dist_func:
            if l2 in self.asp_dist_func[l1]:
                asp_dist = self.asp_dist_func[l1][l2]
            else:
                asp_dist = self.asp_dist_func[l2][l1]
        else:
            asp_dist = self.asp_dist_func[l2][l1]
        if l1 in self.learned_dist_func:
            if l2 in self.learned_dist_func[l1]:
                learned_dist = self.learned_dist_func[l1][l2]
            else:
                learned_dist = self.learned_dist_func[l2][l1]
        else:
            learned_dist = self.learned_dist_func[l2][l1]
        return tree_dist, map_dist, asp_dist, learned_dist

    def measure(self, l1, l2):
        if l1 == l2:
            f1 = plot_scalar_on_scale(0.0, f"Language Family Tree Distance between {l1} and {l2}")
            f2 = plot_scalar_on_scale(0.0, f"Distance on the Globe between {l1} and {l2}")
            f3 = plot_scalar_on_scale(0.0, f"Distance between Phoneme-Sets between {l1} and {l2}")
            f4 = plot_scalar_on_scale(0.0, f"Machine-Learned Distance between {l1} and {l2}")
        else:
            tree_dist, map_dist, asp_dist, learned_dist = self.get_dists(l1.split(" ")[-1].split("(")[1].split(")")[0],
                                                                         l2.split(" ")[-1].split("(")[1].split(")")[0])
            f1 = plot_scalar_on_scale(tree_dist, f"Language Family Tree Distance between {l1} and {l2}")
            f2 = plot_scalar_on_scale(map_dist, f"Distance on the Globe between {l1} and {l2}")
            f3 = plot_scalar_on_scale(asp_dist, f"Distance between Phoneme-Sets between {l1} and {l2}")
            f4 = plot_scalar_on_scale(learned_dist, f"Machine-Learned Distance between {l1} and {l2}")

        return f1, f2, f3, f4


m = Measurer()
iso_to_name = load_json_from_path("iso_to_fullname.json")
text_selection = [f"{iso_to_name[iso_code]} ({iso_code})" for iso_code in iso_to_name]
iface = gr.Interface(fn=m.measure,
                     inputs=[gr.Dropdown(text_selection,
                                         type="value",
                                         value='English (eng)',
                                         label="Select the fist Language (type on your keyboard to find it quickly)"),
                             gr.Dropdown(text_selection,
                                         type="value",
                                         value='German (deu)',
                                         label="Select the second Language (type on your keyboard to find it quickly)")],
                     outputs=[gr.Plot(label="", show_label=False, format="png", container=True),
                              gr.Plot(label="", show_label=False, format="png", container=True),
                              gr.Plot(label="", show_label=False, format="png", container=True),
                              gr.Plot(label="", show_label=False, format="png", container=True)],
                     description="<br><br> This demo allows you to view the distance between two languages from the ISO 639-3 list according to several distance measurement functions. "
                                 "For more information, check out our paper: https://arxiv.org/abs/2406.06403 and our text-to-speech tool, in which we make use of "
                                 "this technique: https://github.com/DigitalPhonetics/IMS-Toucan <br><br>",
                     fill_width=True)
iface.launch()