Spaces:
Sleeping
Sleeping
File size: 1,441 Bytes
dacec36 93dc552 b33ec72 dacec36 93dc552 dacec36 93dc552 dacec36 93dc552 b33ec72 93dc552 b33ec72 93dc552 b33ec72 93dc552 b33ec72 93dc552 b1d30e1 93dc552 b1d30e1 93dc552 b1d30e1 93dc552 b1d30e1 93dc552 dacec36 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import gradio as gr
import spaces
import torch
import vdf_io
from sentence_transformers import SentenceTransformer
from rich import print as rprint
zero = torch.Tensor([0]).cuda()
print(zero.device) # <-- 'cpu' π€
print(vdf_io.__version__)
@spaces.GPU
def greet(n):
print(zero.device) # <-- 'cuda:0' π€
return f"Hello {zero + n} Tensor"
@spaces.GPU
def reembed_dataset(ds, model):
model = SentenceTransformer(model, device=zero.device)
rprint(model)
rprint(model.encode("Hello, World!"))
ds.map(lambda x: model.encode(x["text"]))
rprint(ds[0])
def reembed_main(dataset_name, embedding_model, output_username):
print(f"{dataset_name=}, {embedding_model=}, {output_username=}")
ds = download_dataset(dataset_name)
reembed_dataset(ds, model=embedding_model)
return "Dataset re-embedded successfully"
def download_dataset(dataset_name):
import datasets
ds = datasets.load_dataset(dataset_name)
print(len(ds))
return ds
demo = gr.Interface(
fn=reembed_main,
inputs=[
# dataset name
gr.Textbox(label="Dataset name"),
# embedding model
gr.Textbox(label="Embedding model"),
# output username
gr.Textbox(label="Output username"),
],
outputs=gr.Textbox(label="Output"),
title="Re-Embedder",
description="Re-embed a dataset using a given model and output to a new username's account",
)
demo.launch()
|