example_LLM2Vec / app.py
Abhijit-192-168-1-1's picture
modified app.py
77be9aa
raw
history blame
1.64 kB
import os
import gradio as gr
from llm2vec import LLM2Vec
from transformers import AutoTokenizer, AutoModel, AutoConfig
from peft import PeftModel
import torch
torch.backends.cuda.enable_mem_efficient_sdp(False)
torch.backends.cuda.enable_flash_sdp(False)
# Read tokens from environment variables
GROQ_API_KEY = os.getenv('GROQ_API_KEY')
HF_TOKEN = os.getenv('HF_TOKEN')
if not GROQ_API_KEY or not HF_TOKEN:
raise ValueError("GROQ_API_KEY and HF_TOKEN must be set as environment variables.")
os.environ['GROQ_API_KEY'] = GROQ_API_KEY
os.environ['HF_TOKEN'] = HF_TOKEN
# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp")
config = AutoConfig.from_pretrained("McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp", trust_remote_code=True)
model = AutoModel.from_pretrained("McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp", trust_remote_code=True, config=config, torch_dtype=torch.bfloat16, device_map="cuda" if torch.cuda.is_available() else "cpu")
model = PeftModel.from_pretrained(model, "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp")
model = model.merge_and_unload()
# Load unsupervised SimCSE model
model = PeftModel.from_pretrained(model, "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-unsup-simcse")
# Wrapper for encoding and pooling operations
l2v = LLM2Vec(model, tokenizer, pooling_mode="mean", max_length=512)
def encode_text(input_text):
encoding = l2v.encode(input_text)
return encoding
# Define Gradio interface
iface = gr.Interface(
fn=encode_text,
inputs=gr.Textbox(lines=2, placeholder="Enter text here..."),
outputs=gr.JSON()
)
# Launch Gradio app
iface.launch(share=True)