File size: 1,441 Bytes
dacec36
 
 
93dc552
b33ec72
 
dacec36
 
93dc552
 
 
 
dacec36
 
 
93dc552
dacec36
 
93dc552
b33ec72
 
 
 
 
 
 
93dc552
 
b33ec72
 
 
 
 
93dc552
 
b33ec72
93dc552
 
b33ec72
 
 
93dc552
 
 
 
 
 
b1d30e1
93dc552
b1d30e1
93dc552
b1d30e1
93dc552
b1d30e1
93dc552
 
 
dacec36
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import gradio as gr
import spaces
import torch
import vdf_io
from sentence_transformers import SentenceTransformer
from rich import print as rprint

zero = torch.Tensor([0]).cuda()
print(zero.device)  # <-- 'cpu' πŸ€”

print(vdf_io.__version__)


@spaces.GPU
def greet(n):
    print(zero.device)  # <-- 'cuda:0' πŸ€—
    return f"Hello {zero + n} Tensor"


@spaces.GPU
def reembed_dataset(ds, model):
    model = SentenceTransformer(model, device=zero.device)
    rprint(model)
    rprint(model.encode("Hello, World!"))
    ds.map(lambda x: model.encode(x["text"]))
    rprint(ds[0])


def reembed_main(dataset_name, embedding_model, output_username):
    print(f"{dataset_name=}, {embedding_model=}, {output_username=}")
    ds = download_dataset(dataset_name)
    reembed_dataset(ds, model=embedding_model)
    return "Dataset re-embedded successfully"


def download_dataset(dataset_name):
    import datasets

    ds = datasets.load_dataset(dataset_name)
    print(len(ds))
    return ds


demo = gr.Interface(
    fn=reembed_main,
    inputs=[
        # dataset name
        gr.Textbox(label="Dataset name"),
        # embedding model
        gr.Textbox(label="Embedding model"),
        # output username
        gr.Textbox(label="Output username"),
    ],
    outputs=gr.Textbox(label="Output"),
    title="Re-Embedder",
    description="Re-embed a dataset using a given model and output to a new username's account",
)
demo.launch()