import logging import os import time import streamlit as st import torch import sys from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings, StorageContext, load_index_from_storage from llama_index.core.chat_engine.types import ChatMode from llama_index.embeddings.huggingface import HuggingFaceEmbedding from llama_index.llms.openai_like import OpenAILike CHAT_TITLE="Posez-moi une question sur les règles de marché RTE :\nhttps://www.services-rte.com/fr/actualites/nouvelles-versions-des-r%C3%A8gles-de-march%C3%A9-applicables-au-01-avril-2024.html" SYSTEM_PROMPT="Use the context information provided to assist the user. Mention the origins of the informations at the bottom of the response (file and page)." #EMBEDDING_MODEL="sentence-transformers/paraphrase-MiniLM-L6-v2" # Fast embedding model EMBEDDING_MODEL="BAAI/bge-m3" # Multilingual large model LLM_MODEL="DeepSeek-R1-Distill-Llama-70B" # Available models on : https://chatapi.akash.network/documentation#models NB_DOC_CHUNKS_TO_SEND=5 MAX_NB_TOKENS_IN_RESPONSE=1500 TEMPERATURE=0.2 # The closer to 1, the less deterministic and the more creative API_BASE_URL="https://chatapi.akash.network/api/v1" # Changing this requires to adapt the custom_llm initialization # Ajuster le chemin de torch.classes pour éviter le conflit torch.classes.__path__ = [] st.set_page_config(page_title="Votre expert en règles de marché RTE", layout="centered", initial_sidebar_state="auto", menu_items=None) st.title("Votre expert en règles de marché RTE") custom_llm = OpenAILike(model=LLM_MODEL, api_base=API_BASE_URL, api_key=st.secrets["openai_key"], max_tokens=MAX_NB_TOKENS_IN_RESPONSE, temperature=TEMPERATURE) Settings.embed_model = HuggingFaceEmbedding(model_name=EMBEDDING_MODEL) Settings.llm=custom_llm logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) # Load and index data @st.cache_resource def load_data(): persist_dir = "./storage" if not os.path.exists(persist_dir): documents = SimpleDirectoryReader(input_dir="./data").load_data() document_index = VectorStoreIndex.from_documents(documents) document_index.storage_context.persist(persist_dir=persist_dir) else: storage_context = StorageContext.from_defaults(persist_dir=persist_dir) document_index = load_index_from_storage(storage_context) return document_index start_time = time.time() index = load_data() end_time = time.time() print(f"Time taken for loading embeddings: {end_time - start_time:.4f} seconds") start_time = time.time() if "messages" not in st.session_state.keys(): # Initialize the chat messages history st.session_state.messages = [ { "role": "assistant", "content": CHAT_TITLE, } ] if "chat_engine" not in st.session_state.keys(): # Initialize the chat engine st.session_state.chat_engine = index.as_chat_engine(chat_mode=ChatMode.CONTEXT, system_prompt=SYSTEM_PROMPT, similarity_top_k=NB_DOC_CHUNKS_TO_SEND, verbose=True, streaming=True) if prompt := st.chat_input("Posez votre question"): # Prompt for user input and save to chat history st.session_state.messages.append({"role": "user", "content": prompt}) for message in st.session_state.messages: # Write message history to UI with st.chat_message(message["role"]): st.write(message["content"]) # If last message is not from assistant, generate a new response if st.session_state.messages[-1]["role"] != "assistant": with st.chat_message("assistant"): start_time = time.time() response_stream = st.session_state.chat_engine.stream_chat(prompt) st.write_stream(response_stream.response_gen) message = {"role": "assistant", "content": response_stream.response} # Add response to message history st.session_state.messages.append(message) end_time = time.time() print(f"Time taken for getting response: {end_time - start_time:.4f} seconds") start_time = time.time()