import streamlit as st import json from datasets import load_dataset st.set_page_config(page_title="Kaggle Notebooks inspection", layout="wide") st.markdown("

Kaggle Notebooks inspection 🔍

", unsafe_allow_html=True) st.markdown("Here you can inspect Kaggle notebooks that were converted to python scripts and deduplicated.") @st.cache() def load_data(): ds = load_dataset("loubnabnl/subset_kaggle_scripts", split="train") return ds def show_extra_info(e): kv = json.loads(e["kversion"])[0] try: data_v = json.loads(e["dataset_versions"])[0] except: data_v = "" if data_v: data_title = data_v["Title"] import numpy as np description = data_v["Description"] if str(data_v["Description"]) != 'nan' else "" data_text = f"
**📚 Dataset description:**
Title: **{data_title}**, described as: {description}." else: data_text = "" text = f"The title of the notebook is: **{kv['Title']}** and it has **{kv['TotalVotes']} ⬆️ upvotes**.{data_text}" return text samples = load_data() index_example = st.number_input(f"Chose a sample from the existing {len(samples)} notebooks:", min_value=0, max_value=len(samples)-1, value=0, step=1) st.markdown(show_extra_info(samples[index_example]), unsafe_allow_html=True) st.code(samples[index_example]["script"])