Spaces:

mohamedashraf11
/

RAG_MODEL_HADITH_ARABIC

Runtime error

App Files Files Community

mohamedashraf11 commited on Oct 15, 2024

Commit

b2ed690

verified ·

1 Parent(s): 7c8db93

Upload 4 files

Browse files

Files changed (4) hide show

api.py +81 -0
app.py +62 -105
main.py +70 -0
similarity.py +16 -0

api.py ADDED Viewed

	@@ -0,0 +1,81 @@

+from flask import Flask, request, jsonify
+from langchain_community.vectorstores import Chroma
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_ollama import OllamaLLM
+from langchain.chains.question_answering import load_qa_chain
+from langchain import PromptTemplate
+app = Flask(__name__)
+# Initialize the language models
+llm = OllamaLLM(model="llama3.2")
+# Initialize HuggingFaceEmbeddings and Chroma
+model_name = "intfloat/multilingual-e5-large"
+load_from_dir = "Hadith_Chroma_db"
+embedding_llm = HuggingFaceEmbeddings(model_name=model_name)
+loaded_vector_db = Chroma(
+    persist_directory=load_from_dir,
+    embedding_function=embedding_llm
+)
+def get_similar_docs(query):
+    """Retrieve similar documents based on the query."""
+    similar_docs = loaded_vector_db.similarity_search(query, k=2)
+    return similar_docs
+def ask_llms(query_text):
+    """Ask the LLM to provide an answer based on similar documents."""
+    similar_docs = get_similar_docs(query_text)
+    qna_template = '\n'.join([
+        "Answer the following question using the context provided.",
+        "If the answer is not included in the context, say 'No answer available'.",
+        "### Context:",
+        "{context}",
+        "### Question:",
+        "{question}",
+        "### Answer:"
+    ])
+    qna_prompt = PromptTemplate(
+        template=qna_template,
+        input_variables=['context', 'question'],
+        verbose=True
+    )
+    stuff_chain = load_qa_chain(llm, chain_type="stuff", prompt=qna_prompt)
+    final_answer = stuff_chain.invoke({
+        "input_documents": similar_docs,
+        "question": query_text
+    })
+    return final_answer['output_text']
+@app.route('/ai', methods=['POST'])
+def aiPost():
+    """Handle POST requests to the /ai endpoint."""
+    try:
+        json_content = request.json
+        if not json_content or 'query' not in json_content:
+            return jsonify({"error": "Invalid input, 'query' field is required"}), 400
+        query = json_content.get('query')
+        # Get the response from the LLM based on the query
+        response = ask_llms(query)
+        return jsonify({"response": response})
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+def start_app():
+    """Start the Flask app."""
+    app.run(host="0.0.0.0", port=8080, debug=True)
+if __name__ == '__main__':
+    start_app()

app.py CHANGED Viewed

@@ -1,105 +1,62 @@
-import os
-import re
-import zipfile
-import gradio as gr
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
-from langchain_community.embeddings import HuggingFaceEmbeddings  # Updated import
-from langchain_chroma import Chroma  # Updated import
-class HadithChatApp:
-    def __init__(self):
-        # Load embeddings
-        self.embeddings = HuggingFaceEmbeddings(
-            model_name="intfloat/multilingual-e5-large"
-        )
-        self.model_name = "malhajar/Mistral-7B-v0.1-arabic"
-        # Define paths
-        self.zip_file_path = 'Hadith_Chroma_db_compressed.zip'
-        self.extract_dir = 'Hadith_Chroma_db'
-        # Extract the ZIP file if not already extracted
-        if not os.path.exists(self.extract_dir):
-            self.extract_zip(self.zip_file_path, self.extract_dir)
-        # Load the vector store from the extracted directory
-        self.vectorStore = Chroma(
-            persist_directory=self.extract_dir,
-            embedding_function=self.embeddings
-        )
-        # Initialize the language model (AceGPT-v2-32B-Chat)
-        self.tokenizer = AutoTokenizer.from_pretrained( self.model_name)
-        self.model = AutoModelForCausalLM.from_pretrained( self.model_name)
-    def extract_zip(self, zip_path, extract_to):
-        """Extract a ZIP file to a specified directory."""
-        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
-            zip_ref.extractall(extract_to)
-    def clean_text(self, text):
-        """Clean the input text by removing HTML tags and unwanted characters."""
-        text = re.sub(r'<[^>]*>', '', text)
-        text = re.sub(r'[^\w\s]', '', text)
-        text = re.sub(r'\s+', ' ', text)
-        return text.lower().strip()
-    def get_relevant_docs(self, question, k=5):
-        """Get relevant documents based on the input question."""
-        relevant_docs = self.vectorStore.similarity_search(question, k=k)
-        return relevant_docs
-    def extract_contexts(self, relevant_docs):
-        """Extract contexts from the relevant documents."""
-        contexts = []
-        for doc in relevant_docs:
-            contexts.append(doc.page_content)  # Accessing page_content directly
-        return contexts
-    def create_template(self, question, k):
-        """Create a template for the query to the LLM."""
-        relevant_docs = self.get_relevant_docs(question, k)
-        contexts = self.extract_contexts(relevant_docs)
-        template = f"""
-        Engage in a conversation with the user, responding to their question:
-        {question}
-        within this contexts of Hadiths:
-        {contexts}
-        Encourage the model to provide informative and culturally sensitive answers, reflecting Islamic teachings. Maintain a conversational tone and aim for clarity in responses and make sure they are restricted extracted from the provided contexts and i want you to answer me in arabic."""
-        return template
-    def generate_answer(self, question):
-        """Generate an answer using the AceGPT-v2 model."""
-        cleaned_question = self.clean_text(question)
-        query = self.create_template(cleaned_question, 5)
-        # Tokenize the query
-        inputs = self.tokenizer(query, return_tensors="pt", padding=True, truncation=True)
-        # Generate the response
-        with torch.no_grad():
-            outputs = self.model.generate(**inputs, max_length=512)
-        # Decode the generated text
-        response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-        return self.clean_text(response)
-    def greet(self, question):
-        answer = self.generate_answer(question)
-        return answer
-if __name__ == "__main__":
-    # Initialize the app
-    hadith_chat_app = HadithChatApp()
-    # Set up the Gradio interface
-    iface = gr.Interface(
-        fn=hadith_chat_app.greet,
-        inputs="text",
-        outputs="text",
-        title="Hadith QA App"
-    )
-    # Launch the Gradio interface
-    iface.launch(server_name="0.0.0.0", server_port=7860, share=True)

+import streamlit as st
+import time
+import main
+# Initialize conversation history
+if 'conversation' not in st.session_state:
+    st.session_state['conversation'] = []
+# Function to display conversation history
+def display_conversation():
+    for entry in st.session_state['conversation']:
+        if entry['type'] == 'bot':
+            st.markdown(f"<div style='background-color:#005691; padding:10px; border-radius:10px; margin:10px 0; color:white;'>{entry['message']}</div>", unsafe_allow_html=True)
+        else:
+            st.markdown(f"<div style='background-color:#f1f1f1; padding:10px; border-radius:10px; margin:10px 0;color:black;'>{entry['message']}</div>", unsafe_allow_html=True)
+# Function to simulate chatbot response
+def get_bot_response(user_input):
+    # Simulated bot response
+    response = main.ask_llms(user_input)
+    bot_response = f"Echo: {response}"
+    return bot_response
+# Streamlit UI components
+st.title("Professional Chatbot Interface")
+# Display conversation
+st.markdown("### Conversation")
+display_conversation()
+# User input
+user_input = st.text_input("You", "")
+# Button for user to submit the input
+if st.button("Send"):
+    if user_input:
+        # Save user's message
+        st.session_state['conversation'].append({"type": "user", "message": user_input})
+        # Get bot response and save it
+        bot_message = get_bot_response(user_input)
+        time.sleep(1)  # Simulate delay
+        st.session_state['conversation'].append({"type": "bot", "message": bot_message})
+    # Refresh the page after sending the message
+    st.rerun()
+# Design Enhancements
+# Custom CSS to style the input box
+# Custom CSS to style the input box
+st.markdown("""
+<style>
+    .stTextInput > div > input {
+        background-color: #1e1e1e !important;  /* Darker background for better contrast */
+        color: white !important;  /* Ensure text is white */
+        padding: 10px;
+        border-radius: 10px;
+    }
+    .css-1e5imcs {padding-top: 0rem;}  /* Reduces top padding */
+</style>
+""", unsafe_allow_html=True)

main.py ADDED Viewed

	@@ -0,0 +1,70 @@

+from langchain_ollama import OllamaLLM
+import similarity
+from langchain.chains.question_answering import load_qa_chain
+from langchain import PromptTemplate
+# Initialize an instance of the Ollama model
+llm = OllamaLLM(model="llama3.2")
+# query_text = "ما فضل صلاة العصر؟"
+# print(f'Query : {query_text}')
+# similar_docs = similarity.get_similar_docs(query_text)
+# # print(f'similar_docs : {similar_docs}')
+# qna_template = '\n'.join([
+#     "Answer the following question using the context provided.",
+#     'please provide answer within context with details If exist.'
+#     "If the answer is not included in the context, say ",
+#     "No answer available",
+#     "### Context:",
+#     "{context}",
+#     """,
+#     "### Question:",
+#     "{question}",
+#     """,
+#     "### Answer:",
+# ])
+# qna_prompt = PromptTemplate(
+#     template = qna_template,
+#     input_variables=['context', 'question'],
+#     verbose=True
+# )
+# stuff_chain = load_qa_chain(llm, chain_type="stuff", prompt=qna_prompt)
+# final_answer = stuff_chain({
+#     "input_documents": similar_docs,
+#     "question": query_text
+# }, return_only_outputs=True)
+# print(final_answer)
+def ask_llms(query_text):
+    similar_docs = similarity.get_similar_docs(query_text)
+    # print(f'similar_docs : {similar_docs}')
+    qna_template = '\n'.join([
+        "Answer the following question using the context provided.",
+        "If the answer is not included in the context, say ",
+        "No answer available",
+        "### Context:",
+        "{context}",
+        """,
+        "### Question:",
+        "{question}",
+        """,
+        "### Answer:",
+    ])
+    qna_prompt = PromptTemplate(
+        template = qna_template,
+        input_variables=['context', 'question'],
+        verbose=True
+    )
+    stuff_chain = load_qa_chain(llm, chain_type="stuff", prompt=qna_prompt)
+    final_answer = stuff_chain.invoke({
+    "input_documents": similar_docs,
+    "question": query_text
+    })
+    return final_answer['output_text']

similarity.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS ,Chroma
+model_name = "intfloat/multilingual-e5-large"
+load_from_dir = "Hadith_Chroma_db"
+embedding_llm = HuggingFaceEmbeddings(model_name=model_name)
+loaded_vector_db = Chroma(
+    persist_directory=load_from_dir,
+    embedding_function=embedding_llm
+)
+def get_similar_docs(query):
+    similar_docs = loaded_vector_db.similarity_search(query ,k =2)
+    return similar_docs