Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
moar ram
Browse files- app.py +28 -25
- requirements.txt +1 -0
app.py
CHANGED
@@ -1,30 +1,30 @@
|
|
1 |
import gradio as gr
|
2 |
from huggingface_hub import hf_hub_download
|
3 |
-
import json
|
4 |
import gzip
|
5 |
import urllib
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
|
29 |
text = """\
|
30 |
![](https://huggingface.co/spaces/lvwerra/in-the-stack-gr/resolve/main/banner.png)
|
@@ -77,8 +77,11 @@ def issue_url(username, repos):
|
|
77 |
|
78 |
def check_username(username, version):
|
79 |
output_md = ""
|
80 |
-
|
81 |
-
|
|
|
|
|
|
|
82 |
repo_word = "repository" if len(repos)==1 else "repositories"
|
83 |
if version[:2] == "v2":
|
84 |
output_md += f"**Yes**, there is code from **{len(repos)} {repo_word}** in The Stack. Check the links to see when it was archived by Software Heritage:\n\n"
|
|
|
1 |
import gradio as gr
|
2 |
from huggingface_hub import hf_hub_download
|
3 |
+
import json_stream as json
|
4 |
import gzip
|
5 |
import urllib
|
6 |
+
from collections import defaultdict
|
7 |
+
import gc
|
8 |
+
import sys
|
9 |
+
|
10 |
+
usernames = defaultdict(dict)
|
11 |
+
|
12 |
+
versions = ["v1.0", "v1.1", "v1.2", "v2.0", "v2.0.1"]
|
13 |
+
versions = [sys.intern(version) for version in versions]
|
14 |
+
|
15 |
+
for version in versions:
|
16 |
+
print(f"Loading {version}")
|
17 |
+
branch = version if version != "v1.0" else "main"
|
18 |
+
filepath = hf_hub_download(repo_id="bigcode/the-stack-username-to-repo", filename="username_to_repo.json.gz", repo_type="dataset", revision=branch)
|
19 |
+
with gzip.open(filepath, 'r') as f:
|
20 |
+
data = json.load(f)
|
21 |
+
for username, repos in data.items():
|
22 |
+
for repo in repos:
|
23 |
+
if repo not in usernames[username]:
|
24 |
+
usernames[username][repo] = []
|
25 |
+
usernames[username][repo].append(version)
|
26 |
+
del data
|
27 |
+
gc.collect()
|
28 |
|
29 |
text = """\
|
30 |
![](https://huggingface.co/spaces/lvwerra/in-the-stack-gr/resolve/main/banner.png)
|
|
|
77 |
|
78 |
def check_username(username, version):
|
79 |
output_md = ""
|
80 |
+
repos = []
|
81 |
+
if username in usernames:
|
82 |
+
repos = [repo for repo, versions in usernames[username].items() if version in versions]
|
83 |
+
|
84 |
+
if repos:
|
85 |
repo_word = "repository" if len(repos)==1 else "repositories"
|
86 |
if version[:2] == "v2":
|
87 |
output_md += f"**Yes**, there is code from **{len(repos)} {repo_word}** in The Stack. Check the links to see when it was archived by Software Heritage:\n\n"
|
requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
json-stream
|