example_LLM2Vec / app.py
Abhijit-192-168-1-1's picture
added app.py
c51e482
raw
history blame
1.66 kB
import gradio as gr
from llm2vec import LLM2Vec
from transformers import AutoTokenizer, AutoModel, AutoConfig
from peft import PeftModel
import torch
import os
torch.backends.cuda.enable_mem_efficient_sdp(False)
torch.backends.cuda.enable_flash_sdp(False)
# Read tokens from environment variables
GROQ_API_KEY = os.getenv('GROQ_API_KEY')
HF_TOKEN = os.getenv('HF_TOKEN')
if not GROQ_API_KEY or not HF_TOKEN:
raise ValueError("GROQ_API_KEY and HF_TOKEN must be set as environment variables.")
os.environ['GROQ_API_KEY'] = GROQ_API_KEY
os.environ['HF_TOKEN'] = HF_TOKEN
# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp")
config = AutoConfig.from_pretrained("McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp", trust_remote_code=True)
model = AutoModel.from_pretrained("McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp", trust_remote_code=True, config=config, torch_dtype=torch.bfloat16, device_map="cuda" if torch.cuda.is_available() else "cpu")
model = PeftModel.from_pretrained(model, "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp")
model = model.merge_and_unload()
# Load unsupervised SimCSE model
model = PeftModel.from_pretrained(model, "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-unsup-simcse")
# Wrapper for encoding and pooling operations
l2v = LLM2Vec(model, tokenizer, pooling_mode="mean", max_length=512)
def encode_text(input_text):
encoding = l2v.encode(input_text)
return encoding
# Define Gradio interface
iface = gr.Interface(
fn=encode_text,
inputs=gr.inputs.Textbox(lines=2, placeholder="Enter text here..."),
outputs=gr.outputs.JSON()
)
# Launch Gradio app
iface.launch(share=True)