Spaces:
Sleeping
Sleeping
import gradio as gr | |
import spaces | |
import torch | |
import vdf_io | |
from sentence_transformers import SentenceTransformer | |
from rich import print as rprint | |
zero = torch.Tensor([0]).cuda() | |
print(zero.device) # <-- 'cpu' π€ | |
print(vdf_io.__version__) | |
def greet(n): | |
print(zero.device) # <-- 'cuda:0' π€ | |
return f"Hello {zero + n} Tensor" | |
def reembed_dataset(ds, model): | |
model = SentenceTransformer(model, device=zero.device) | |
rprint(model) | |
rprint(model.encode("Hello, World!")) | |
ds.map(lambda x: model.encode(x["text"])) | |
rprint(ds[0]) | |
def reembed_main(dataset_name, embedding_model, output_username): | |
print(f"{dataset_name=}, {embedding_model=}, {output_username=}") | |
ds = download_dataset(dataset_name) | |
reembed_dataset(ds, model=embedding_model) | |
return "Dataset re-embedded successfully" | |
def download_dataset(dataset_name): | |
import datasets | |
ds = datasets.load_dataset(dataset_name) | |
print(len(ds)) | |
return ds | |
demo = gr.Interface( | |
fn=reembed_main, | |
inputs=[ | |
# dataset name | |
gr.Textbox(label="Dataset name"), | |
# embedding model | |
gr.Textbox(label="Embedding model"), | |
# output username | |
gr.Textbox(label="Output username"), | |
], | |
outputs=gr.Textbox(label="Output"), | |
title="Re-Embedder", | |
description="Re-embed a dataset using a given model and output to a new username's account", | |
) | |
demo.launch() | |