from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_openai import OpenAIEmbeddings from langchain_community.vectorstores import FAISS from langchain_pinecone import PineconeVectorStore from langchain_core.documents import Document from langchain_openai import ChatOpenAI from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import PromptTemplate def get_text_from_content_for_doc(content): text = "" for page in content: text += content[page]["texte"] return text def get_text_from_content_for_audio(content): return content["transcription"] def get_text_chunks(text): text_splitter = RecursiveCharacterTextSplitter( chunk_size=500, # the character length of the chunck chunk_overlap=100, # the character length of the overlap between chuncks length_function=len # the length function - in this case, character length (aka the python len() fn.) ) chunks = text_splitter.split_text(text) return chunks def get_vectorstore(text_chunks,filename, file_type,namespace,index): try: embedding = OpenAIEmbeddings(model="text-embedding-3-large") vector_store = PineconeVectorStore(index=index, embedding=embedding,namespace=namespace) file_name = filename.split(".")[0].replace(" ","_").replace("-","_").replace(".","_").replace("/","_").replace("\\","_").strip() documents = [] uuids = [] for i, chunk in enumerate(text_chunks): document = Document( page_content=chunk, metadata={"filename":filename,"file_type":file_type}, ) uuid = f"{file_name}_{i}" uuids.append(uuid) documents.append(document) vector_store.add_documents(documents=documents, ids=uuids) return True except Exception as e: return False def get_retreive_answer(enterprise_id,prompt,index): try: embedding = OpenAIEmbeddings(model="text-embedding-3-large") vector_store = PineconeVectorStore(index=index, embedding=embedding,namespace=enterprise_id) retriever = vector_store.as_retriever( search_type="similarity_score_threshold", search_kwargs={"k": 3, "score_threshold": 0.6}, ) response = retriever.invoke(prompt) return response except Exception as e: return False def generate_response_via_langchain(query: str, stream: bool = False, model: str = "gpt-4o-mini",context:str="",messages = []) : # Define the prompt template template = "Sachant le context suivant: {context}, et l'historique de la conversation: {messages}, {query}" prompt = PromptTemplate.from_template(template) # Initialize the OpenAI LLM with the specified model llm = ChatOpenAI(model=model) # Create an LLM chain with the prompt and the LLM llm_chain = prompt | llm | StrOutputParser() if stream: # Return a generator that yields streamed responses return llm_chain.astream({ "query": query, "context": context, "messages": messages}) # Invoke the LLM chain and return the result return llm_chain.invoke({"query": query}) def setup_rag(file_type,content): if file_type == "pdf": text = get_text_from_content_for_doc(content) elif file_type == "audio": text = get_text_from_content_for_audio(content) chunks = get_text_chunks(text) vectorstore = get_vectorstore(chunks) return vectorstore