dhruv-anand-aintech's picture
try embed
b33ec72
import gradio as gr
import spaces
import torch
import vdf_io
from sentence_transformers import SentenceTransformer
from rich import print as rprint
zero = torch.Tensor([0]).cuda()
print(zero.device) # <-- 'cpu' πŸ€”
print(vdf_io.__version__)
@spaces.GPU
def greet(n):
print(zero.device) # <-- 'cuda:0' πŸ€—
return f"Hello {zero + n} Tensor"
@spaces.GPU
def reembed_dataset(ds, model):
model = SentenceTransformer(model, device=zero.device)
rprint(model)
rprint(model.encode("Hello, World!"))
ds.map(lambda x: model.encode(x["text"]))
rprint(ds[0])
def reembed_main(dataset_name, embedding_model, output_username):
print(f"{dataset_name=}, {embedding_model=}, {output_username=}")
ds = download_dataset(dataset_name)
reembed_dataset(ds, model=embedding_model)
return "Dataset re-embedded successfully"
def download_dataset(dataset_name):
import datasets
ds = datasets.load_dataset(dataset_name)
print(len(ds))
return ds
demo = gr.Interface(
fn=reembed_main,
inputs=[
# dataset name
gr.Textbox(label="Dataset name"),
# embedding model
gr.Textbox(label="Embedding model"),
# output username
gr.Textbox(label="Output username"),
],
outputs=gr.Textbox(label="Output"),
title="Re-Embedder",
description="Re-embed a dataset using a given model and output to a new username's account",
)
demo.launch()