Spaces:

Waflon
/

mixtral_chatbot

Runtime error

File size: 4,270 Bytes

dca217d
 
 
 
 
 
 
 
 
df7bc3a
736bb18
dca217d
9ef29db
736bb18
7b67529
9ef29db
dca217d
 
 
 
 
 
 
 
 
 
 
 
 
 
207b927
f6cd8e7
dca217d
 
 
8400323
dca217d
8400323
dca217d
 
8400323
 
 
 
dca217d
 
df7bc3a
 
dca217d
 
 
8400323
dca217d
 
 
 
 
 
 
 
 
 
8400323
 
 
 
dca217d
 
 
 
 
 
 
 
744d814
dca217d
 
744d814
dca217d

import streamlit as st
from streamlit_chat import message
from langchain.chains import ConversationalRetrievalChain
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import CTransformers
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain_openai import ChatOpenAI
import os

from huggingface_hub import login
login(token = st.secrets["HF"])
os.environ["OPENAI_API_KEY"] = st.secrets["OPENAI_API_KEY"]

# Function to load documents 
def load_documents():
    loader = DirectoryLoader('data/', glob="*.pdf", loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

# Function to split text into chunks
def split_text_into_chunks(documents):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    text_chunks = text_splitter.split_documents(documents)
    return text_chunks

# Function to create embeddings
def create_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", model_kwargs={'device': "cpu"})
    #embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': "cpu"})
    return embeddings

# Function to create vector store
def create_vector_store(text_chunks, embeddings, nombre_vector):
    vector_store = FAISS.from_documents(text_chunks, embeddings)
    vector_store.save_local("cache") #Guardarlo en un 
    return vector_store

# Function to create vector store
def load_vector_store(nombre_vector, embeddings):
    return FAISS.load_local(nombre_vector, embeddings)

# Function to create LLMS model
def create_llms_model():
    #llm = CTransformers(model='TheBloke/Mistral-7B-Instruct-v0.1-GGUF', config={'max_new_tokens': 128, 'temperature': 0.01})
    llm = ChatOpenAI(model='gpt-3.5-turbo-1106', temperature=0.1)
    return llm

# Initialize Streamlit app
st.title("Chatbot usando mistral")

# loading of documents
documents = load_documents()

# Split text into chunks
text_chunks = split_text_into_chunks(documents)

# Create embeddings
embeddings = create_embeddings()

try:#load vector store from local
    vector_store = load_vector_store("cache",embeddings)
except:# Create vector store
    vector_store = create_vector_store(text_chunks, embeddings, "cache")
# Create LLMS model
llm = create_llms_model()

# Initialize conversation history
if 'history' not in st.session_state:
    st.session_state['history'] = []

if 'generated' not in st.session_state:
    st.session_state['generated'] = ["¡Hola! Pregúntame sobre cualquier cosa 🤗"]

if 'past' not in st.session_state:
    st.session_state['past'] = ["¡Hola! 👋"]

# Create memory
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

# Create chain
chain = ConversationalRetrievalChain.from_llm(llm=llm, chain_type='stuff',
                                              retriever=vector_store.as_retriever(search_kwargs={"k": 2}),
                                              memory=memory)

# Define chat function
def conversation_chat(query):
    result = chain({"question": query, "chat_history": st.session_state['history']})
    st.session_state['history'].append((query, result["answer"]))
    return result["answer"]

# Display chat history
reply_container = st.container()
container = st.container()

with container:
    with st.form(key='my_form', clear_on_submit=True):
        user_input = st.text_input("Question:", placeholder="Ask about your Job Interview", key='input')
        submit_button = st.form_submit_button(label='Send')

    if submit_button and user_input:
        output = conversation_chat(user_input)
        st.session_state['past'].append(user_input)
        st.session_state['generated'].append(output)

if st.session_state['generated']:
    with reply_container:
        for i in range(len(st.session_state['generated'])):
            message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="thumbs")
            message(st.session_state["generated"][i], key=str(i), avatar_style="fun-emoji")