pdf_rag_chatbot / src /services /generate_embedding.py
hohieu's picture
init project
841b0ff
raw
history blame
505 Bytes
from transformers import AutoModel, AutoTokenizer
PhobertTokenizer = AutoTokenizer.from_pretrained("VoVanPhuc/sup-SimCSE-VietNamese-phobert-base")
model = AutoModel.from_pretrained("VoVanPhuc/sup-SimCSE-VietNamese-phobert-base")
def generate_embedding(sentence: str):
inputs = PhobertTokenizer(sentence, padding=True, truncation=True, return_tensors="pt")
embeddings = model(**inputs, output_hidden_states=True, return_dict=True).pooler_output
return embeddings[0].detach().numpy().tolist()