Spaces:
Runtime error
Runtime error
from gradio_huggingfacehub_search import HuggingfaceHubSearch | |
from huggingface_hub import HfApi | |
import pandas as pd | |
import gradio as gr | |
import duckdb | |
import requests | |
BASE_DATASETS_SERVER_URL = "https://datasets-server.huggingface.co" | |
hf_api = HfApi() | |
conn = duckdb.connect() | |
def query_dataset(dataset_id: str, query: str) -> pd.DataFrame: | |
response = requests.get(f"{BASE_DATASETS_SERVER_URL}/parquet?dataset={dataset_id}") | |
response.raise_for_status() # Check if the request was successful | |
first_parquet = response.json().get("parquet_files", [])[0] | |
first_parquet_url = first_parquet.get("url") | |
if not first_parquet_url: | |
raise ValueError("No valid URL found for the first parquet file.") | |
sql_query = f"SELECT * FROM read_parquet('{first_parquet_url}') limit 100;" | |
df = conn.execute(sql_query).fetchdf() | |
return df | |
with gr.Blocks() as demo: | |
gr.Markdown("# Query your HF Datasets with Natural Language ππ") | |
dataset_name = HuggingfaceHubSearch( | |
label="Hub Dataset ID", | |
placeholder="Find your favorite dataset...", | |
search_type="dataset", | |
value="jamescalam/world-cities-geo", | |
) | |
query_input = gr.Textbox("", label="Ask anything...") | |
btn = gr.Button("Ask πͺ") | |
df = gr.DataFrame(datatype="markdown") | |
btn.click( | |
query_dataset, | |
inputs=[dataset_name, query_input], | |
outputs=[df], | |
) | |
if __name__ == "__main__": | |
demo.launch() | |