Spaces:

Tristan107
/

scrum-expert

Running

scrum-expert / app.py

trobet

Scrum BAAI/bge-small-en-v1.5 DeepSeek-R1-Distill-Qwen-32B

3e9ea02 9 days ago

4.03 kB

	import logging
	import os
	import time

	import streamlit as st
	import torch
	import sys

	from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings, StorageContext, load_index_from_storage
	from llama_index.core.chat_engine.types import ChatMode
	from llama_index.embeddings.huggingface import HuggingFaceEmbedding
	from llama_index.llms.openai_like import OpenAILike

	PAGE_TITLE="Votre expert Scrum"
	CHAT_TITLE="Posez-moi une question sur le guide Scrum 2020 (anglais ou français)"
	SYSTEM_PROMPT="Use the context information provided to assist the user. Mention the origins of the informations at the bottom of the response (file and page)."
	#EMBEDDING_MODEL="sentence-transformers/paraphrase-MiniLM-L6-v2" # Fast embedding model
	EMBEDDING_MODEL="BAAI/bge-small-en-v1.5"
	#EMBEDDING_MODEL="BAAI/bge-m3" # Multilingual large model
	#LLM_MODEL="DeepSeek-R1-Distill-Llama-70B" # Available models on : https://chatapi.akash.network/documentation#models
	LLM_MODEL="DeepSeek-R1-Distill-Qwen-32B"
	NB_DOC_CHUNKS_TO_SEND=5
	MAX_NB_TOKENS_IN_RESPONSE=1500
	TEMPERATURE=0.5 # The closer to 1, the less deterministic and the more creative

	API_BASE_URL="https://chatapi.akash.network/api/v1" # Changing this requires to adapt the custom_llm initialization

	# Ajuster le chemin de torch.classes pour éviter le conflit
	torch.classes.__path__ = []

	st.set_page_config(page_title=PAGE_TITLE, layout="centered", initial_sidebar_state="auto", menu_items=None)
	st.title(PAGE_TITLE)

	custom_llm = OpenAILike(model=LLM_MODEL, api_base=API_BASE_URL, api_key=st.secrets["openai_key"], max_tokens=MAX_NB_TOKENS_IN_RESPONSE, temperature=TEMPERATURE)
	Settings.embed_model = HuggingFaceEmbedding(model_name=EMBEDDING_MODEL)
	Settings.llm=custom_llm

	logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
	logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

	# Load and index data
	@st.cache_resource
	def load_data():
	persist_dir = "./storage"
	if not os.path.exists(persist_dir):
	documents = SimpleDirectoryReader(input_dir="./data").load_data()
	document_index = VectorStoreIndex.from_documents(documents)
	document_index.storage_context.persist(persist_dir=persist_dir)
	else:
	storage_context = StorageContext.from_defaults(persist_dir=persist_dir)
	document_index = load_index_from_storage(storage_context)
	return document_index

	start_time = time.time()

	index = load_data()

	end_time = time.time()
	print(f"Time taken for loading embeddings: {end_time - start_time:.4f} seconds")
	start_time = time.time()

	if "messages" not in st.session_state.keys(): # Initialize the chat messages history
	st.session_state.messages = [
	{
	"role": "assistant",
	"content": CHAT_TITLE,
	}
	]

	if "chat_engine" not in st.session_state.keys(): # Initialize the chat engine
	st.session_state.chat_engine = index.as_chat_engine(chat_mode=ChatMode.CONTEXT, system_prompt=SYSTEM_PROMPT, similarity_top_k=NB_DOC_CHUNKS_TO_SEND, verbose=True, streaming=True)

	if prompt := st.chat_input("Posez votre question"): # Prompt for user input and save to chat history
	st.session_state.messages.append({"role": "user", "content": prompt})

	for message in st.session_state.messages: # Write message history to UI
	with st.chat_message(message["role"]):
	st.write(message["content"])

	# If last message is not from assistant, generate a new response
	if st.session_state.messages[-1]["role"] != "assistant":
	with st.chat_message("assistant"):

	start_time = time.time()

	response_stream = st.session_state.chat_engine.stream_chat(prompt)
	st.write_stream(response_stream.response_gen)
	message = {"role": "assistant", "content": response_stream.response}
	# Add response to message history
	st.session_state.messages.append(message)

	end_time = time.time()
	print(f"Time taken for getting response: {end_time - start_time:.4f} seconds")
	start_time = time.time()