mrfakename commited on
Commit
5a48f05
·
verified ·
1 Parent(s): a54e9da

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +186 -0
app.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Code taken from https://colab.research.google.com/drive/1s2eQlolcI1VGgDhqWIANfkfKvcKrMyNr
2
+ # Original code by @maximelabonne on Twitter (@mlabonne on HF)
3
+ # Apache 2.0 licensed (asked on X/Twitter)
4
+ #
5
+ # Changes:
6
+ #
7
+ # Jan 20, 2023: Ported to Gradio
8
+ import gradio as gr
9
+
10
+ from huggingface_hub import ModelCard, HfApi
11
+ import requests
12
+ import networkx as nx
13
+ from PIL import Image
14
+ import matplotlib.pyplot as plt
15
+ from matplotlib.patches import Patch
16
+ from collections import defaultdict
17
+ from networkx.drawing.nx_agraph import graphviz_layout
18
+ from IPython.display import clear_output
19
+ import io
20
+ from bokeh.io import show, output_notebook
21
+ from bokeh.plotting import figure, from_networkx
22
+ from bokeh.models import ColumnDataSource, LabelSet, HoverTool
23
+ from bokeh.transform import linear_cmap
24
+ from networkx.drawing.layout import spring_layout
25
+
26
+ def get_model_names_from_yaml(url):
27
+ """Get a list of parent model names from the yaml file."""
28
+ model_tags = []
29
+ response = requests.get(url)
30
+ if response.status_code == 200:
31
+ model_tags.extend([item for item in response.content if '/' in str(item)])
32
+ return model_tags
33
+
34
+
35
+ def get_license_color(model):
36
+ """Get the color of the model based on its license."""
37
+ try:
38
+ card = ModelCard.load(model)
39
+ license = card.data.to_dict()['license'].lower()
40
+ # Define permissive licenses
41
+ permissive_licenses = ['mit', 'bsd', 'apache-2.0', 'openrail'] # Add more as needed
42
+ # Check license type
43
+ if any(perm_license in license for perm_license in permissive_licenses):
44
+ return 'lightgreen' # Permissive licenses
45
+ else:
46
+ return 'lightcoral' # Noncommercial or other licenses
47
+ except Exception as e:
48
+ print(f"Error retrieving license for {model}: {e}")
49
+ return 'lightgray'
50
+
51
+
52
+ def get_model_names(model, genealogy, found_models=None):
53
+ """Get a list of parent model names from the model id."""
54
+ model_tags = []
55
+
56
+ if found_models is None:
57
+ found_models = []
58
+
59
+ try:
60
+ card = ModelCard.load(model)
61
+ card_dict = card.data.to_dict() # Convert the ModelCard object to a dictionary
62
+ license = card_dict['license']
63
+
64
+ # Check the base_model in metadata
65
+ if 'base_model' in card_dict:
66
+ model_tags = card_dict['base_model']
67
+
68
+ # Check the tags in metadata
69
+ if 'tags' in card_dict and not model_tags:
70
+ tags = card_dict['tags']
71
+ model_tags = [model_name for model_name in tags if '/' in model_name]
72
+
73
+ # Check for merge.yml and mergekit_config.yml if no model_tags found in the tags
74
+ if not model_tags:
75
+ model_tags.extend(get_model_names_from_yaml(f"https://huggingface.co/{model}/blob/main/merge.yml"))
76
+ if not model_tags:
77
+ model_tags.extend(get_model_names_from_yaml(f"https://huggingface.co/{model}/blob/main/mergekit_config.yml"))
78
+
79
+ # Convert to a list if tags is not None or empty, else set to an empty list
80
+ if not isinstance(model_tags, list):
81
+ model_tags = [model_tags] if model_tags else []
82
+
83
+ # Add found model names to the list
84
+ found_models.extend(model_tags)
85
+
86
+ # Record the genealogy
87
+ for model_tag in model_tags:
88
+ genealogy[model_tag].append(model)
89
+
90
+ # Recursively check for more models
91
+ for model_tag in model_tags:
92
+ get_model_names(model_tag, genealogy, found_models)
93
+
94
+ except Exception as e:
95
+ print(f"Could not find model names for {model}: {e}")
96
+
97
+ return found_models
98
+
99
+
100
+ def find_root_nodes(G):
101
+ """ Find all nodes in the graph with no predecessors """
102
+ return [n for n, d in G.in_degree() if d == 0]
103
+
104
+
105
+ def max_width_of_tree(G):
106
+ """ Calculate the maximum width of the tree """
107
+ max_width = 0
108
+ for root in find_root_nodes(G):
109
+ width_at_depth = calculate_width_at_depth(G, root)
110
+ local_max_width = max(width_at_depth.values())
111
+ max_width = max(max_width, local_max_width)
112
+ return max_width
113
+
114
+
115
+ def calculate_width_at_depth(G, root):
116
+ """ Calculate width at each depth starting from a given root """
117
+ depth_count = defaultdict(int)
118
+ queue = [(root, 0)]
119
+ while queue:
120
+ node, depth = queue.pop(0)
121
+ depth_count[depth] += 1
122
+ for child in G.successors(node):
123
+ queue.append((child, depth + 1))
124
+ return depth_count
125
+
126
+
127
+ def create_family_tree(start_model):
128
+ genealogy = defaultdict(list)
129
+ get_model_names(start_model, genealogy) # Assuming this populates the genealogy
130
+
131
+ # Create a directed graph
132
+ G = nx.DiGraph()
133
+
134
+ # Add nodes and edges to the graph
135
+ for parent, children in genealogy.items():
136
+ for child in children:
137
+ G.add_edge(parent, child)
138
+
139
+ # Get max depth
140
+ max_depth = nx.dag_longest_path_length(G) + 1
141
+
142
+ # Get max width
143
+ max_width = max_width_of_tree(G) + 1
144
+
145
+ # Estimate plot size
146
+ height = max(8, 1.5 * max_depth)
147
+ width = max(8, 3.5 * max_width)
148
+
149
+ # Set Graphviz layout attributes for a bottom-up tree
150
+ plt.figure(figsize=(width, height))
151
+ pos = graphviz_layout(G, prog="dot")
152
+
153
+ # Determine node colors based on license
154
+ node_colors = [get_license_color(node) for node in G.nodes()]
155
+ clear_output()
156
+
157
+ # Create a label mapping with line breaks
158
+ labels = {node: node.replace("/", "\n") for node in G.nodes()}
159
+
160
+ # Draw the graph
161
+ nx.draw(G, pos, labels=labels, with_labels=True, node_color=node_colors, font_size=12, node_size=8_000, edge_color='black')
162
+
163
+ # Create a legend for the colors
164
+ legend_elements = [
165
+ Patch(facecolor='lightgreen', label='Permissive'),
166
+ Patch(facecolor='lightcoral', label='Noncommercial'),
167
+ Patch(facecolor='lightgray', label='Unknown')
168
+ ]
169
+ plt.legend(handles=legend_elements, loc='upper left')
170
+
171
+ plt.title(f"{start_model}'s Family Tree", fontsize=20)
172
+ plt.figtext(0.5, 0.01, "Merge Family Tree. Created by Maxime Labonne, ported to Gradio by mrfakename. https://huggingface.co/spaces/mrfakename/merge-model-tree", ha="center", fontsize=10)
173
+
174
+ buf = io.BytesIO()
175
+ fig.savefig(buf)
176
+ buf.seek(0)
177
+ img = Image.open(buf)
178
+ return img
179
+ def create_graph(mid):
180
+ return create_family_tree(mid)
181
+ with gr.Blocks() as demo:
182
+ model_id = gr.Textbox(label="HF Model ID", info="The model ID on the Hugging Face Hub. Example: leveldevai/MarcDareBeagle-7B", placeholder="username/model")
183
+ go = gr.Button("Display")
184
+ out = gr.Image(label="Graph", interactive=False)
185
+ go.click(create_graph, inputs=[model_id], outputs=[out])
186
+ demo.queue().launch()