Spaces:
Sleeping
Sleeping
import sqlite3 | |
import huggingface_hub | |
import sys | |
import time | |
from tqdm import tqdm | |
SQLITE3_DB = "data/repos.sqlite3" | |
def list_repos_from_hub(): | |
for repo in huggingface_hub.list_datasets(): | |
if not(repo.private): | |
yield "datasets/" + repo.id | |
for repo in huggingface_hub.list_models(): | |
if not(repo.private): | |
yield "models/" + repo.id | |
for repo in huggingface_hub.list_spaces(): | |
if not(repo.private): | |
yield "spaces/" + repo.id | |
def write_repos_to_db(): | |
print("Opening database", SQLITE3_DB, file=sys.stderr) | |
con = sqlite3.connect(SQLITE3_DB) | |
cur = con.cursor() | |
print("Creating repos table if not exists", file=sys.stderr) | |
cur.execute("CREATE TABLE IF NOT EXISTS repos (id TEXT PRIMARY KEY, last_updated_datetime INTEGER, last_enumerated_datetime INTEGER NULLABLE)") | |
con.commit() | |
print("Inserting rows from huggingface_hub query", file=sys.stderr) | |
for repo in tqdm(list_repos_from_hub()): | |
cur.execute("INSERT OR IGNORE INTO repos VALUES ('{}', '{}', NULL)".format(repo, 0)) | |
con.commit() | |
def list_repos(limit=None): | |
con = sqlite3.connect(SQLITE3_DB) | |
cur = con.cursor() | |
if limit is None: | |
res = cur.execute("SELECT id FROM repos ORDER BY last_updated_datetime ASC") | |
else: | |
res = cur.execute("SELECT id FROM repos ORDER BY last_updated_datetime ASC LIMIT {}".format(limit)) | |
return [row[0] for row in res.fetchall()] | |
def set_updated_datetime(repo): | |
con = sqlite3.connect(SQLITE3_DB) | |
cur = con.cursor() | |
cur.execute("UPDATE repos SET last_updated_datetime = {} WHERE id = '{}'".format(int(time.time()), repo)) | |
con.commit() | |
if __name__ == "__main__": | |
write_repos_to_db() | |
print("Done writing to DB. Sample of 5 rows:") | |
for repo in list_repos(limit=5): | |
print(repo) |