mohamedashraf11 commited on
Commit
b2ed690
·
verified ·
1 Parent(s): 7c8db93

Upload 4 files

Browse files
Files changed (4) hide show
  1. api.py +81 -0
  2. app.py +62 -105
  3. main.py +70 -0
  4. similarity.py +16 -0
api.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+ from langchain_community.vectorstores import Chroma
3
+ from langchain_huggingface import HuggingFaceEmbeddings
4
+ from langchain_ollama import OllamaLLM
5
+ from langchain.chains.question_answering import load_qa_chain
6
+ from langchain import PromptTemplate
7
+
8
+ app = Flask(__name__)
9
+
10
+ # Initialize the language models
11
+ llm = OllamaLLM(model="llama3.2")
12
+
13
+ # Initialize HuggingFaceEmbeddings and Chroma
14
+ model_name = "intfloat/multilingual-e5-large"
15
+ load_from_dir = "Hadith_Chroma_db"
16
+
17
+ embedding_llm = HuggingFaceEmbeddings(model_name=model_name)
18
+
19
+ loaded_vector_db = Chroma(
20
+ persist_directory=load_from_dir,
21
+ embedding_function=embedding_llm
22
+ )
23
+
24
+ def get_similar_docs(query):
25
+ """Retrieve similar documents based on the query."""
26
+ similar_docs = loaded_vector_db.similarity_search(query, k=2)
27
+ return similar_docs
28
+
29
+ def ask_llms(query_text):
30
+ """Ask the LLM to provide an answer based on similar documents."""
31
+ similar_docs = get_similar_docs(query_text)
32
+
33
+ qna_template = '\n'.join([
34
+ "Answer the following question using the context provided.",
35
+ "If the answer is not included in the context, say 'No answer available'.",
36
+ "### Context:",
37
+ "{context}",
38
+ "### Question:",
39
+ "{question}",
40
+ "### Answer:"
41
+ ])
42
+
43
+ qna_prompt = PromptTemplate(
44
+ template=qna_template,
45
+ input_variables=['context', 'question'],
46
+ verbose=True
47
+ )
48
+
49
+ stuff_chain = load_qa_chain(llm, chain_type="stuff", prompt=qna_prompt)
50
+
51
+ final_answer = stuff_chain.invoke({
52
+ "input_documents": similar_docs,
53
+ "question": query_text
54
+ })
55
+
56
+ return final_answer['output_text']
57
+
58
+ @app.route('/ai', methods=['POST'])
59
+ def aiPost():
60
+ """Handle POST requests to the /ai endpoint."""
61
+ try:
62
+ json_content = request.json
63
+ if not json_content or 'query' not in json_content:
64
+ return jsonify({"error": "Invalid input, 'query' field is required"}), 400
65
+
66
+ query = json_content.get('query')
67
+
68
+ # Get the response from the LLM based on the query
69
+ response = ask_llms(query)
70
+
71
+ return jsonify({"response": response})
72
+
73
+ except Exception as e:
74
+ return jsonify({"error": str(e)}), 500
75
+
76
+ def start_app():
77
+ """Start the Flask app."""
78
+ app.run(host="0.0.0.0", port=8080, debug=True)
79
+
80
+ if __name__ == '__main__':
81
+ start_app()
app.py CHANGED
@@ -1,105 +1,62 @@
1
- import os
2
- import re
3
- import zipfile
4
- import gradio as gr
5
- import torch
6
- from transformers import AutoTokenizer, AutoModelForCausalLM
7
- from langchain_community.embeddings import HuggingFaceEmbeddings # Updated import
8
- from langchain_chroma import Chroma # Updated import
9
-
10
- class HadithChatApp:
11
- def __init__(self):
12
- # Load embeddings
13
- self.embeddings = HuggingFaceEmbeddings(
14
- model_name="intfloat/multilingual-e5-large"
15
- )
16
- self.model_name = "malhajar/Mistral-7B-v0.1-arabic"
17
-
18
- # Define paths
19
- self.zip_file_path = 'Hadith_Chroma_db_compressed.zip'
20
- self.extract_dir = 'Hadith_Chroma_db'
21
-
22
- # Extract the ZIP file if not already extracted
23
- if not os.path.exists(self.extract_dir):
24
- self.extract_zip(self.zip_file_path, self.extract_dir)
25
-
26
- # Load the vector store from the extracted directory
27
- self.vectorStore = Chroma(
28
- persist_directory=self.extract_dir,
29
- embedding_function=self.embeddings
30
- )
31
-
32
- # Initialize the language model (AceGPT-v2-32B-Chat)
33
- self.tokenizer = AutoTokenizer.from_pretrained( self.model_name)
34
- self.model = AutoModelForCausalLM.from_pretrained( self.model_name)
35
-
36
- def extract_zip(self, zip_path, extract_to):
37
- """Extract a ZIP file to a specified directory."""
38
- with zipfile.ZipFile(zip_path, 'r') as zip_ref:
39
- zip_ref.extractall(extract_to)
40
-
41
- def clean_text(self, text):
42
- """Clean the input text by removing HTML tags and unwanted characters."""
43
- text = re.sub(r'<[^>]*>', '', text)
44
- text = re.sub(r'[^\w\s]', '', text)
45
- text = re.sub(r'\s+', ' ', text)
46
- return text.lower().strip()
47
-
48
- def get_relevant_docs(self, question, k=5):
49
- """Get relevant documents based on the input question."""
50
- relevant_docs = self.vectorStore.similarity_search(question, k=k)
51
- return relevant_docs
52
-
53
- def extract_contexts(self, relevant_docs):
54
- """Extract contexts from the relevant documents."""
55
- contexts = []
56
- for doc in relevant_docs:
57
- contexts.append(doc.page_content) # Accessing page_content directly
58
- return contexts
59
-
60
- def create_template(self, question, k):
61
- """Create a template for the query to the LLM."""
62
- relevant_docs = self.get_relevant_docs(question, k)
63
- contexts = self.extract_contexts(relevant_docs)
64
- template = f"""
65
- Engage in a conversation with the user, responding to their question:
66
- {question}
67
- within this contexts of Hadiths:
68
- {contexts}
69
- Encourage the model to provide informative and culturally sensitive answers, reflecting Islamic teachings. Maintain a conversational tone and aim for clarity in responses and make sure they are restricted extracted from the provided contexts and i want you to answer me in arabic."""
70
- return template
71
-
72
- def generate_answer(self, question):
73
- """Generate an answer using the AceGPT-v2 model."""
74
- cleaned_question = self.clean_text(question)
75
- query = self.create_template(cleaned_question, 5)
76
-
77
- # Tokenize the query
78
- inputs = self.tokenizer(query, return_tensors="pt", padding=True, truncation=True)
79
-
80
- # Generate the response
81
- with torch.no_grad():
82
- outputs = self.model.generate(**inputs, max_length=512)
83
-
84
- # Decode the generated text
85
- response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
86
- return self.clean_text(response)
87
-
88
- def greet(self, question):
89
- answer = self.generate_answer(question)
90
- return answer
91
-
92
- if __name__ == "__main__":
93
- # Initialize the app
94
- hadith_chat_app = HadithChatApp()
95
-
96
- # Set up the Gradio interface
97
- iface = gr.Interface(
98
- fn=hadith_chat_app.greet,
99
- inputs="text",
100
- outputs="text",
101
- title="Hadith QA App"
102
- )
103
-
104
- # Launch the Gradio interface
105
- iface.launch(server_name="0.0.0.0", server_port=7860, share=True)
 
1
+ import streamlit as st
2
+ import time
3
+ import main
4
+ # Initialize conversation history
5
+ if 'conversation' not in st.session_state:
6
+ st.session_state['conversation'] = []
7
+
8
+ # Function to display conversation history
9
+ def display_conversation():
10
+ for entry in st.session_state['conversation']:
11
+ if entry['type'] == 'bot':
12
+ st.markdown(f"<div style='background-color:#005691; padding:10px; border-radius:10px; margin:10px 0; color:white;'>{entry['message']}</div>", unsafe_allow_html=True)
13
+ else:
14
+ st.markdown(f"<div style='background-color:#f1f1f1; padding:10px; border-radius:10px; margin:10px 0;color:black;'>{entry['message']}</div>", unsafe_allow_html=True)
15
+
16
+ # Function to simulate chatbot response
17
+ def get_bot_response(user_input):
18
+ # Simulated bot response
19
+ response = main.ask_llms(user_input)
20
+ bot_response = f"Echo: {response}"
21
+ return bot_response
22
+
23
+ # Streamlit UI components
24
+ st.title("Professional Chatbot Interface")
25
+
26
+ # Display conversation
27
+ st.markdown("### Conversation")
28
+ display_conversation()
29
+
30
+ # User input
31
+ user_input = st.text_input("You", "")
32
+
33
+ # Button for user to submit the input
34
+ if st.button("Send"):
35
+ if user_input:
36
+ # Save user's message
37
+ st.session_state['conversation'].append({"type": "user", "message": user_input})
38
+
39
+ # Get bot response and save it
40
+ bot_message = get_bot_response(user_input)
41
+ time.sleep(1) # Simulate delay
42
+ st.session_state['conversation'].append({"type": "bot", "message": bot_message})
43
+
44
+ # Refresh the page after sending the message
45
+ st.rerun()
46
+
47
+ # Design Enhancements
48
+ # Custom CSS to style the input box
49
+ # Custom CSS to style the input box
50
+ st.markdown("""
51
+ <style>
52
+ .stTextInput > div > input {
53
+ background-color: #1e1e1e !important; /* Darker background for better contrast */
54
+ color: white !important; /* Ensure text is white */
55
+ padding: 10px;
56
+ border-radius: 10px;
57
+ }
58
+ .css-1e5imcs {padding-top: 0rem;} /* Reduces top padding */
59
+ </style>
60
+ """, unsafe_allow_html=True)
61
+
62
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
main.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_ollama import OllamaLLM
2
+ import similarity
3
+ from langchain.chains.question_answering import load_qa_chain
4
+ from langchain import PromptTemplate
5
+ # Initialize an instance of the Ollama model
6
+ llm = OllamaLLM(model="llama3.2")
7
+ # query_text = "ما فضل صلاة العصر؟"
8
+
9
+ # print(f'Query : {query_text}')
10
+ # similar_docs = similarity.get_similar_docs(query_text)
11
+ # # print(f'similar_docs : {similar_docs}')
12
+
13
+ # qna_template = '\n'.join([
14
+ # "Answer the following question using the context provided.",
15
+ # 'please provide answer within context with details If exist.'
16
+ # "If the answer is not included in the context, say ",
17
+ # "No answer available",
18
+ # "### Context:",
19
+ # "{context}",
20
+ # """,
21
+ # "### Question:",
22
+ # "{question}",
23
+ # """,
24
+ # "### Answer:",
25
+ # ])
26
+ # qna_prompt = PromptTemplate(
27
+ # template = qna_template,
28
+ # input_variables=['context', 'question'],
29
+ # verbose=True
30
+ # )
31
+
32
+ # stuff_chain = load_qa_chain(llm, chain_type="stuff", prompt=qna_prompt)
33
+
34
+ # final_answer = stuff_chain({
35
+ # "input_documents": similar_docs,
36
+ # "question": query_text
37
+ # }, return_only_outputs=True)
38
+
39
+ # print(final_answer)
40
+
41
+ def ask_llms(query_text):
42
+ similar_docs = similarity.get_similar_docs(query_text)
43
+ # print(f'similar_docs : {similar_docs}')
44
+
45
+ qna_template = '\n'.join([
46
+ "Answer the following question using the context provided.",
47
+ "If the answer is not included in the context, say ",
48
+ "No answer available",
49
+ "### Context:",
50
+ "{context}",
51
+ """,
52
+ "### Question:",
53
+ "{question}",
54
+ """,
55
+ "### Answer:",
56
+ ])
57
+ qna_prompt = PromptTemplate(
58
+ template = qna_template,
59
+ input_variables=['context', 'question'],
60
+ verbose=True
61
+ )
62
+
63
+ stuff_chain = load_qa_chain(llm, chain_type="stuff", prompt=qna_prompt)
64
+
65
+ final_answer = stuff_chain.invoke({
66
+ "input_documents": similar_docs,
67
+ "question": query_text
68
+ })
69
+
70
+ return final_answer['output_text']
similarity.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_huggingface import HuggingFaceEmbeddings
2
+ from langchain_community.vectorstores import FAISS ,Chroma
3
+
4
+ model_name = "intfloat/multilingual-e5-large"
5
+ load_from_dir = "Hadith_Chroma_db"
6
+
7
+ embedding_llm = HuggingFaceEmbeddings(model_name=model_name)
8
+
9
+ loaded_vector_db = Chroma(
10
+ persist_directory=load_from_dir,
11
+ embedding_function=embedding_llm
12
+ )
13
+
14
+ def get_similar_docs(query):
15
+ similar_docs = loaded_vector_db.similarity_search(query ,k =2)
16
+ return similar_docs