import json import pickle import gradio as gr import matplotlib.pyplot as plt import numpy as np def plot_scalar_on_scale(scalar_value, distance_type): # Ensure the scalar is within bounds scalar_value = np.clip(scalar_value, 0.0, 1.0) # Create a figure and axis fig, ax = plt.subplots(figsize=(8, 2)) # Create a horizontal gradient (from close to distant) gradient = np.linspace(0, 1, 256).reshape(1, -1) ax.imshow(gradient, extent=[0, 1, 0, 1], aspect='auto', cmap='viridis_r') # Plot the scalar value as a vertical line ax.axvline(x=scalar_value, color='white', lw=5) # Add a dot at the scalar position ax.plot(scalar_value, 0.5, 'o', color='white', markersize=42) ax.text(scalar_value, 0.5, f'{scalar_value:.2f}', color='black', ha='center', va='center', fontsize=14) # Add labels rotated 90 degrees on the sides ax.text(-0.03, 0.5, 'Close', ha='center', va='center', fontsize=14, rotation=90) ax.text(1.03, 0.5, 'Distant', ha='center', va='center', fontsize=14, rotation=270) # Customize the axis ax.set_xticks([]) # Remove x-axis ticks ax.set_yticks([]) # Remove y-axis ticks ax.set_xlim(0, 1) ax.set_ylim(0, 1) ax.set_title(distance_type) # Remove spines for a cleaner look ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.spines['left'].set_visible(False) return fig # Show the plot # plt.tight_layout() # plt.show() def load_json_from_path(path): with open(path, "r", encoding="utf8") as f: obj = json.loads(f.read()) return obj class Measurer: def __init__(self): # learned dist tree_lookup_path = "lang_1_to_lang_2_to_learned_dist.json" self.learned_dist_func = load_json_from_path(tree_lookup_path) # tree dist tree_lookup_path = "lang_1_to_lang_2_to_tree_dist.json" self.tree_dist_func = load_json_from_path(tree_lookup_path) # map dist map_lookup_path = "lang_1_to_lang_2_to_map_dist.json" self.map_dist_func = load_json_from_path(map_lookup_path) largest_value_map_dist = 0.0 for _, values in self.map_dist_func.items(): for _, value in values.items(): largest_value_map_dist = max(largest_value_map_dist, value) for key1 in self.map_dist_func: for key2 in self.map_dist_func[key1]: self.map_dist_func[key1][key2] = self.map_dist_func[key1][key2] / largest_value_map_dist # ASP asp_dict_path = "asp_dict.pkl" with open(asp_dict_path, 'rb') as dictfile: asp_sim = pickle.load(dictfile) lang_list = list(asp_sim.keys()) self.asp_dist_func = dict() seen_langs = set() for lang_1 in lang_list: if lang_1 not in seen_langs: seen_langs.add(lang_1) self.asp_dist_func[lang_1] = dict() for index, lang_2 in enumerate(lang_list): if lang_2 not in seen_langs: # it's symmetric self.asp_dist_func[lang_1][lang_2] = 1 - asp_sim[lang_1][index] def get_dists(self, l1, l2): if l1 in self.tree_dist_func: if l2 in self.tree_dist_func[l1]: tree_dist = self.tree_dist_func[l1][l2] else: tree_dist = self.tree_dist_func[l2][l1] else: tree_dist = self.tree_dist_func[l2][l1] if l1 in self.map_dist_func: if l2 in self.map_dist_func[l1]: map_dist = self.map_dist_func[l1][l2] else: map_dist = self.map_dist_func[l2][l1] else: map_dist = self.map_dist_func[l2][l1] try: if l1 in self.asp_dist_func: if l2 in self.asp_dist_func[l1]: asp_dist = self.asp_dist_func[l1][l2] else: asp_dist = self.asp_dist_func[l2][l1] else: asp_dist = self.asp_dist_func[l2][l1] except KeyError: asp_dist = tree_dist # dirty hack, but like 4 codes are not part of phonepiece if l1 in self.learned_dist_func: if l2 in self.learned_dist_func[l1]: learned_dist = self.learned_dist_func[l1][l2] else: learned_dist = self.learned_dist_func[l2][l1] else: learned_dist = self.learned_dist_func[l2][l1] return tree_dist, map_dist, asp_dist, learned_dist def measure(self, l1, l2): if l1 == l2: f1 = plot_scalar_on_scale(0.0, f"Language Family Tree Distance between {l1} and {l2}") f2 = plot_scalar_on_scale(0.0, f"Distance on the Globe between {l1} and {l2}") f3 = plot_scalar_on_scale(0.0, f"Distance between Phoneme-Sets between {l1} and {l2}") f4 = plot_scalar_on_scale(0.0, f"Machine-Learned Distance between {l1} and {l2}") else: tree_dist, map_dist, asp_dist, learned_dist = self.get_dists(l1.split(" ")[-1].split("(")[1].split(")")[0], l2.split(" ")[-1].split("(")[1].split(")")[0]) f1 = plot_scalar_on_scale(tree_dist, f"Language Family Tree Distance between {l1} and {l2}") f2 = plot_scalar_on_scale(map_dist, f"Distance on the Globe between {l1} and {l2}") f3 = plot_scalar_on_scale(asp_dist, f"Distance between Phoneme-Sets between {l1} and {l2}") f4 = plot_scalar_on_scale(learned_dist, f"Machine-Learned Distance between {l1} and {l2}") return f1, f2, f3, f4 m = Measurer() iso_to_name = load_json_from_path("iso_to_fullname.json") text_selection = [f"{iso_to_name[iso_code]} ({iso_code})" for iso_code in iso_to_name] iface = gr.Interface(fn=m.measure, inputs=[gr.Dropdown(text_selection, type="value", value='English (eng)', label="Select the fist Language (type on your keyboard to find it quickly)"), gr.Dropdown(text_selection, type="value", value='German (deu)', label="Select the second Language (type on your keyboard to find it quickly)")], outputs=[gr.Plot(label="", show_label=False, format="png", container=True), gr.Plot(label="", show_label=False, format="png", container=True), gr.Plot(label="", show_label=False, format="png", container=True), gr.Plot(label="", show_label=False, format="png", container=True)], description="

This demo allows you to view the distance between two languages from the ISO 639-3 list according to several distance measurement functions. " "For more information, check out our paper: https://arxiv.org/abs/2406.06403 and our text-to-speech tool, in which we make use of " "this technique: https://github.com/DigitalPhonetics/IMS-Toucan

", fill_width=True) iface.launch()