import datetime import operator import pathlib import pandas as pd import tqdm.auto import yaml from huggingface_hub import HfApi from constants import (OWNER_CHOICES, SLEEP_TIME_INT_TO_STR, SLEEP_TIME_STR_TO_INT, WHOAMI) repo_dir = pathlib.Path(__file__).parent class DemoList: COLUMN_INFO = [ ['status', 'markdown'], ['hardware', 'markdown'], ['title', 'markdown'], ['owner', 'markdown'], ['arxiv', 'markdown'], ['github', 'markdown'], ['likes', 'number'], ['tags', 'str'], ['last_modified', 'str'], ['created', 'str'], ['sdk', 'markdown'], ['sdk_version', 'str'], ['suggested_hardware', 'markdown'], ['sleep_time', 'markdown'], ['replicas', 'markdown'], ] def __init__(self): self.api = HfApi() self._raw_data = self.load_data() self.df_raw = pd.DataFrame(self._raw_data) self.df = self.prettify_df() @property def column_names(self): return list(map(operator.itemgetter(0), self.COLUMN_INFO)) @property def column_datatype(self): return list(map(operator.itemgetter(1), self.COLUMN_INFO)) @staticmethod def get_space_id(url: str) -> str: return '/'.join(url.split('/')[-2:]) def load_data(self) -> list[dict]: with open(repo_dir / 'list.yaml') as f: data = yaml.safe_load(f) res = [] for url in tqdm.auto.tqdm(list(data)): space_id = self.get_space_id(url) space_info = self.api.space_info(repo_id=space_id) card = space_info.cardData info: dict = data[url] | { 'url': url, 'title': card['title'] if 'title' in card else space_id, 'owner': space_id.split('/')[0], 'sdk': card['sdk'], 'sdk_version': card.get('sdk_version', ''), 'likes': space_info.likes, 'private': space_info.private, 'last_modified': space_info.lastModified, 'status': space_info.runtime['stage'], 'suggested_hardware': card.get('suggested_hardware', ''), } for tag in ['arxiv', 'github', 'tags']: if tag not in info: info[tag] = [] # `current` of paused Spaces is `None`, but `requested` is not info['hardware'] = space_info.runtime['hardware']['current'] if info['hardware'] is None: info['hardware'] = space_info.runtime['hardware']['requested'] # `gcTimeout` is `None` for `cpu-basic` Spaces and Spaces # with "Don't sleep" sleep time. # We use `-1` to represent it. info['sleep_time'] = space_info.runtime['gcTimeout'] or -1 if info['sleep_time'] not in SLEEP_TIME_INT_TO_STR: print(space_id) print(f'Unknown sleep time: {info["sleep_time"]}') continue # `resources` of paused Spaces is `None` resources = space_info.runtime['resources'] info['replicas'] = -1 if resources is None else resources[ 'replicas'] res.append(info) return res def get_arxiv_link(self, links: list[str]) -> str: links = [self.create_link(link.split('/')[-1], link) for link in links] return '\n'.join(links) def get_github_link(self, links: list[str]) -> str: links = [self.create_link('github', link) for link in links] return '\n'.join(links) def get_tag_list(self, tags: list[str]) -> str: return ', '.join(tags) @staticmethod def create_link(text: str, url: str) -> str: return f'{text}' def to_div(self, text: str | None, category_name: str) -> str: if text is None: text = '' class_name = f'{category_name}-{text.lower()}' return f'