from datasets import load_dataset import streamlit as st @st.cache() def get_repo_names(): list_of_repo_names = ["lvwerra/test", "lvwerra/test2", "numpy/numpy"] return list_of_repo_names st.title("Am I in The Stack?") st.markdown("This tool lets you check if a repository under a given username is part of [The Stack dataset](https://huggingface.co/datasets/bigcode/the-stack).") repo_names = get_repo_names() username = st.text_input("GitHub Username:") if st.button("Check!"): list_of_repos = [repo_name for repo_name in repo_names if repo_name.split("/")[0]==username] if len(list_of_repos)==0: st.markdown("**There is no repository under that username in The Stack**") else: if len(list_of_repos)==1: st.markdown("**There is 1 repository under that username in The Stack:**") else: st.markdown(f"**There are {len(list_of_repos)} repositories under that username in The Stack:**") st.text("\n".join(list_of_repos))