File size: 1,659 Bytes
c51e482
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import gradio as gr
from llm2vec import LLM2Vec
from transformers import AutoTokenizer, AutoModel, AutoConfig
from peft import PeftModel
import torch
import os

torch.backends.cuda.enable_mem_efficient_sdp(False)
torch.backends.cuda.enable_flash_sdp(False)

# Read tokens from environment variables
GROQ_API_KEY = os.getenv('GROQ_API_KEY')
HF_TOKEN = os.getenv('HF_TOKEN')

if not GROQ_API_KEY or not HF_TOKEN:
    raise ValueError("GROQ_API_KEY and HF_TOKEN must be set as environment variables.")

os.environ['GROQ_API_KEY'] = GROQ_API_KEY
os.environ['HF_TOKEN'] = HF_TOKEN


# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp")
config = AutoConfig.from_pretrained("McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp", trust_remote_code=True)
model = AutoModel.from_pretrained("McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp", trust_remote_code=True, config=config, torch_dtype=torch.bfloat16, device_map="cuda" if torch.cuda.is_available() else "cpu")

model = PeftModel.from_pretrained(model, "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp")
model = model.merge_and_unload()

# Load unsupervised SimCSE model
model = PeftModel.from_pretrained(model, "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-unsup-simcse")

# Wrapper for encoding and pooling operations
l2v = LLM2Vec(model, tokenizer, pooling_mode="mean", max_length=512)

def encode_text(input_text):
    encoding = l2v.encode(input_text)
    return encoding

# Define Gradio interface
iface = gr.Interface(
    fn=encode_text,
    inputs=gr.inputs.Textbox(lines=2, placeholder="Enter text here..."),
    outputs=gr.outputs.JSON()
)

# Launch Gradio app
iface.launch(share=True)