Spaces:

bziiit
/

VEGETALIS_AI_API

Running

Ilyas KHIAT

api first commit by me :)

fe370a3 6 months ago

3.59 kB

	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_openai import OpenAIEmbeddings
	from langchain_community.vectorstores import FAISS
	from langchain_pinecone import PineconeVectorStore
	from langchain_core.documents import Document

	from langchain_openai import ChatOpenAI
	from langchain_core.output_parsers import StrOutputParser
	from langchain_core.prompts import PromptTemplate



	def get_text_from_content_for_doc(content):
	text = ""
	for page in content:
	text += content[page]["texte"]
	return text

	def get_text_from_content_for_audio(content):
	return content["transcription"]


	def get_text_chunks(text):
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=500, # the character length of the chunck
	chunk_overlap=100, # the character length of the overlap between chuncks
	length_function=len # the length function - in this case, character length (aka the python len() fn.)
	)
	chunks = text_splitter.split_text(text)
	return chunks

	def get_vectorstore(text_chunks,filename, file_type,namespace,index):
	try:
	embedding = OpenAIEmbeddings(model="text-embedding-3-large")
	vector_store = PineconeVectorStore(index=index, embedding=embedding,namespace=namespace)

	file_name = filename.split(".")[0].replace(" ","_").replace("-","_").replace(".","_").replace("/","_").replace("\\","_").strip()

	documents = []
	uuids = []

	for i, chunk in enumerate(text_chunks):
	document = Document(
	page_content=chunk,
	metadata={"filename":filename,"file_type":file_type},
	)
	uuid = f"{file_name}_{i}"
	uuids.append(uuid)
	documents.append(document)

	vector_store.add_documents(documents=documents, ids=uuids)

	return True

	except Exception as e:
	return False

	def get_retreive_answer(enterprise_id,prompt,index):
	try:

	embedding = OpenAIEmbeddings(model="text-embedding-3-large")
	vector_store = PineconeVectorStore(index=index, embedding=embedding,namespace=enterprise_id)

	retriever = vector_store.as_retriever(
	search_type="similarity_score_threshold",
	search_kwargs={"k": 3, "score_threshold": 0.6},
	)
	response = retriever.invoke(prompt)

	return response

	except Exception as e:
	return False

	def generate_response_via_langchain(query: str, stream: bool = False, model: str = "gpt-4o-mini",context:str="",messages = []) :
	# Define the prompt template
	template = "Sachant le context suivant: {context}, et l'historique de la conversation: {messages}, {query}"
	prompt = PromptTemplate.from_template(template)

	# Initialize the OpenAI LLM with the specified model
	llm = ChatOpenAI(model=model)

	# Create an LLM chain with the prompt and the LLM
	llm_chain = prompt \| llm \| StrOutputParser()

	if stream:
	# Return a generator that yields streamed responses
	return llm_chain.astream({ "query": query, "context": context, "messages": messages})

	# Invoke the LLM chain and return the result
	return llm_chain.invoke({"query": query})



	def setup_rag(file_type,content):
	if file_type == "pdf":
	text = get_text_from_content_for_doc(content)
	elif file_type == "audio":
	text = get_text_from_content_for_audio(content)


	chunks = get_text_chunks(text)

	vectorstore = get_vectorstore(chunks)

	return vectorstore