File size: 3,040 Bytes
189a7a7 7ee74de 2adda29 189a7a7 22ad1b8 ee49276 2079e61 63159de 189a7a7 91dd7f4 1c16e2a 189a7a7 5b78f45 63159de 5b78f45 0d8df02 63159de 5b78f45 9f71064 5b78f45 63159de 7ee74de 3c353fd c1e8c7e 2842164 63159de 8ab4ca8 7500084 b5d12e2 7ee74de b5d12e2 63159de 0989a68 5c5dbcf bf4d664 5c5dbcf 7c912db bf4d664 63159de 7039182 63159de b5d12e2 63159de 189a7a7 63159de f8d16dc 85426f6 189a7a7 7ee74de f8d16dc 5b78f45 63159de 5b78f45 c33f2c8 dc4a656 56397d8 63159de 189a7a7 85426f6 6704e38 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import os
import spaces
from langchain.memory import ConversationBufferMemory,ConversationSummaryBufferMemory
from langchain.chains import ConversationChain
import langchain.globals
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
#import streamlit as st
my_model_id = os.getenv('MODEL_REPO_ID', 'Default Value')
token = os.getenv('HUGGINGFACEHUB_API_TOKEN')
template = """You are an AI having conversation with a human. Below is an instruction that describes a task.
Write a response that appropriately completes the request.
Reply with the most helpful and logic answer. During the conversation you need to ask the user
the following questions to complete the hotel booking task.
1) Where would you like to stay and when?
2) How many people are staying in the room?
3) Do you prefer any ammenities like breakfast included or gym?
4) What is your name, your email address and phone number?
Make sure you receive a logical answer from the user from every question to complete the hotel
booking process.
Relevant Information:
{history}
Current Conversation:
Human: {input}
AI:"""
#@st.cache_resource
@spaces.GPU
def load_model():
quantization_config = BitsAndBytesConfig(
load_in_8bit=True,
# bnb_4bit_compute_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(my_model_id)
model = AutoModelForCausalLM.from_pretrained(my_model_id, device_map="auto",quantization_config=quantization_config) #
return tokenizer,model
#@st.cache_resource
@spaces.GPU
def load_pipeline():
tokenizer, model = load_model()
pipe = pipeline("text-generation",
model= model,
tokenizer = tokenizer,
#max_new_tokens = 50,
top_k = 30,
top_p = 0.7,
early_stopping=True,
num_beams = 2,
temperature = 0.05,
repetition_penalty = 1.05)
llm = HuggingFacePipeline(pipeline = pipe)
return llm
# def generate_from_pipeline(text, pipe):
# return pipe(text)
llm = load_pipeline()
def demo_miny_memory():
#prompt = ChatPromptTemplate.from_template(template)
memory = ConversationSummaryBufferMemory(llm = llm, memory_key = "history")
return memory
@spaces.GPU
def demo_chain(input_text,history):
#PROMPT = ChatPromptTemplate.from_template(template)
PROMPT = PromptTemplate(template=template, input_variables=["history", "input"])
conversation = ConversationChain(
llm=llm,
prompt=PROMPT,
#verbose=langchain.globals.get_verbose(),
verbose=True,
memory=demo_miny_memory()
)
chat_reply = conversation.invoke({
"input" : input_text,
"history" : history
}, return_only_outputs=True)
return chat_reply #['response'].split('AI:')[-1] |