Spaces:
Runtime error
Runtime error
import datetime | |
import operator | |
import pathlib | |
import pandas as pd | |
import tqdm.auto | |
import yaml | |
from huggingface_hub import HfApi | |
from constants import (OWNER_CHOICES, SLEEP_TIME_INT_TO_STR, | |
SLEEP_TIME_STR_TO_INT, WHOAMI) | |
repo_dir = pathlib.Path(__file__).parent | |
class DemoList: | |
COLUMN_INFO = [ | |
['status', 'markdown'], | |
['hardware', 'markdown'], | |
['title', 'markdown'], | |
['owner', 'markdown'], | |
['arxiv', 'markdown'], | |
['github', 'markdown'], | |
['likes', 'number'], | |
['tags', 'str'], | |
['last_modified', 'str'], | |
['created', 'str'], | |
['sdk', 'markdown'], | |
['sdk_version', 'str'], | |
['suggested_hardware', 'markdown'], | |
['sleep_time', 'markdown'], | |
['replicas', 'markdown'], | |
] | |
def __init__(self): | |
self.api = HfApi() | |
self._raw_data = self.load_data() | |
self.df_raw = pd.DataFrame(self._raw_data) | |
self.df = self.prettify_df() | |
def column_names(self): | |
return list(map(operator.itemgetter(0), self.COLUMN_INFO)) | |
def column_datatype(self): | |
return list(map(operator.itemgetter(1), self.COLUMN_INFO)) | |
def get_space_id(url: str) -> str: | |
return '/'.join(url.split('/')[-2:]) | |
def load_data(self) -> list[dict]: | |
with open(repo_dir / 'list.yaml') as f: | |
data = yaml.safe_load(f) | |
res = [] | |
for url in tqdm.auto.tqdm(list(data)): | |
space_id = self.get_space_id(url) | |
space_info = self.api.space_info(repo_id=space_id) | |
card = space_info.cardData | |
info: dict = data[url] | { | |
'url': url, | |
'title': card['title'] if 'title' in card else space_id, | |
'owner': space_id.split('/')[0], | |
'sdk': card['sdk'], | |
'sdk_version': card.get('sdk_version', ''), | |
'likes': space_info.likes, | |
'private': space_info.private, | |
'last_modified': space_info.lastModified, | |
'status': space_info.runtime['stage'], | |
'suggested_hardware': card.get('suggested_hardware', ''), | |
} | |
for tag in ['arxiv', 'github', 'tags']: | |
if tag not in info: | |
info[tag] = [] | |
# `current` of paused Spaces is `None`, but `requested` is not | |
info['hardware'] = space_info.runtime['hardware']['current'] | |
if info['hardware'] is None: | |
info['hardware'] = space_info.runtime['hardware']['requested'] | |
# `gcTimeout` is `None` for `cpu-basic` Spaces and Spaces | |
# with "Don't sleep" sleep time. | |
# We use `-1` to represent it. | |
info['sleep_time'] = space_info.runtime['gcTimeout'] or -1 | |
if info['sleep_time'] not in SLEEP_TIME_INT_TO_STR: | |
print(space_id) | |
print(f'Unknown sleep time: {info["sleep_time"]}') | |
continue | |
# `resources` of paused Spaces is `None` | |
resources = space_info.runtime['resources'] | |
info['replicas'] = -1 if resources is None else resources[ | |
'replicas'] | |
res.append(info) | |
return res | |
def get_arxiv_link(self, links: list[str]) -> str: | |
links = [self.create_link(link.split('/')[-1], link) for link in links] | |
return '\n'.join(links) | |
def get_github_link(self, links: list[str]) -> str: | |
links = [self.create_link('github', link) for link in links] | |
return '\n'.join(links) | |
def get_tag_list(self, tags: list[str]) -> str: | |
return ', '.join(tags) | |
def create_link(text: str, url: str) -> str: | |
return f'<a href={url} target="_blank">{text}</a>' | |
def to_div(self, text: str | None, category_name: str) -> str: | |
if text is None: | |
text = '' | |
class_name = f'{category_name}-{text.lower()}' | |
return f'<div class="{class_name}">{text}</div>' | |
def format_timestamp(timestamp: str) -> str: | |
s = datetime.datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S.000Z') | |
return s.strftime('%Y/%m/%d %H:%M:%S') | |
def add_div_tag_to_replicas(replicas: int) -> str: | |
if replicas == -1: | |
return '' | |
if replicas == 1: | |
return '1' | |
return f'<div class="multiple-replicas">{replicas}</div>' | |
def add_div_tag_to_sleep_time(sleep_time_s: str, hardware: str) -> str: | |
if hardware == 'cpu-basic': | |
return f'<div class="sleep-time-cpu-basic">{sleep_time_s}</div>' | |
s = sleep_time_s.replace(' ', '-') | |
return f'<div class="sleep-time-{s}">{sleep_time_s}</div>' | |
def prettify_df(self) -> pd.DataFrame: | |
new_rows = [] | |
for _, row in self.df_raw.copy().iterrows(): | |
new_row = { | |
'status': | |
self.to_div(row.status, 'status'), | |
'hardware': | |
self.to_div(row.hardware, 'hardware'), | |
'suggested_hardware': | |
self.to_div(row.suggested_hardware, 'hardware'), | |
'title': | |
self.create_link(row.title, row.url), | |
'owner': | |
self.create_link(row.owner, | |
f'https://huggingface.co/{row.owner}'), | |
'arxiv': | |
self.get_arxiv_link(row.arxiv), | |
'github': | |
self.get_github_link(row.github), | |
'likes': | |
row.likes, | |
'tags': | |
self.get_tag_list(row.tags), | |
'last_modified': | |
self.format_timestamp(row.last_modified), | |
'created': | |
self.format_timestamp(row.created), | |
'sdk': | |
self.to_div(row.sdk, 'sdk'), | |
'sdk_version': | |
row.sdk_version, | |
'sleep_time': | |
self.add_div_tag_to_sleep_time( | |
SLEEP_TIME_INT_TO_STR[row.sleep_time], row.hardware), | |
'replicas': | |
self.add_div_tag_to_replicas(row.replicas), | |
} | |
new_rows.append(new_row) | |
df = pd.DataFrame(new_rows).loc[:, self.column_names] | |
return df | |
def apply_filter( | |
self, | |
status: list[str], | |
hardware: list[str], | |
sleep_time: list[str], | |
multiple_replicas: bool, | |
sdk: list[str], | |
visibility: list[str], | |
owner: list[str], | |
) -> pd.DataFrame: | |
df_raw = self.df_raw | |
df = self.df | |
if multiple_replicas: | |
df = df[df_raw.replicas > 1] | |
if visibility == ['public']: | |
df = df[~df_raw.private] | |
elif visibility == ['private']: | |
df = df[df_raw.private] | |
df = df[(df_raw.status.isin(status)) & (df_raw.hardware.isin(hardware)) | |
& (df_raw.sdk.isin(sdk))] | |
sleep_time_int = [SLEEP_TIME_STR_TO_INT[s] for s in sleep_time] | |
df = df[df_raw.sleep_time.isin(sleep_time_int)] | |
if set(owner) == set(OWNER_CHOICES): | |
pass | |
elif WHOAMI in owner: | |
df = df[df_raw.owner == WHOAMI] | |
else: | |
df = df[df_raw.owner != WHOAMI] | |
return df | |