Spaces:

Abhijit-192-168-1-1
/

example_LLM2Vec

Sleeping

App Files Files Community

example_LLM2Vec / app.py

Abhijit-192-168-1-1

added app.py

c51e482 over 1 year ago

raw

history blame

1.66 kB

	import gradio as gr
	from llm2vec import LLM2Vec
	from transformers import AutoTokenizer, AutoModel, AutoConfig
	from peft import PeftModel
	import torch
	import os

	torch.backends.cuda.enable_mem_efficient_sdp(False)
	torch.backends.cuda.enable_flash_sdp(False)

	# Read tokens from environment variables
	GROQ_API_KEY = os.getenv('GROQ_API_KEY')
	HF_TOKEN = os.getenv('HF_TOKEN')

	if not GROQ_API_KEY or not HF_TOKEN:
	raise ValueError("GROQ_API_KEY and HF_TOKEN must be set as environment variables.")

	os.environ['GROQ_API_KEY'] = GROQ_API_KEY
	os.environ['HF_TOKEN'] = HF_TOKEN


	# Load tokenizer and model
	tokenizer = AutoTokenizer.from_pretrained("McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp")
	config = AutoConfig.from_pretrained("McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp", trust_remote_code=True)
	model = AutoModel.from_pretrained("McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp", trust_remote_code=True, config=config, torch_dtype=torch.bfloat16, device_map="cuda" if torch.cuda.is_available() else "cpu")

	model = PeftModel.from_pretrained(model, "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp")
	model = model.merge_and_unload()

	# Load unsupervised SimCSE model
	model = PeftModel.from_pretrained(model, "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-unsup-simcse")

	# Wrapper for encoding and pooling operations
	l2v = LLM2Vec(model, tokenizer, pooling_mode="mean", max_length=512)

	def encode_text(input_text):
	encoding = l2v.encode(input_text)
	return encoding

	# Define Gradio interface
	iface = gr.Interface(
	fn=encode_text,
	inputs=gr.inputs.Textbox(lines=2, placeholder="Enter text here..."),
	outputs=gr.outputs.JSON()
	)

	# Launch Gradio app
	iface.launch(share=True)