Gopikanth123 commited on
Commit
22808ba
·
verified ·
1 Parent(s): 6fe1f69

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +114 -241
main.py CHANGED
@@ -1,247 +1,120 @@
1
- import os
2
- import shutil
3
- from flask import Flask, render_template, request, jsonify
4
- from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate, Settings
5
- from llama_index.llms.huggingface import HuggingFaceInferenceAPI
6
- from llama_index.embeddings.huggingface import HuggingFaceEmbedding
7
- from huggingface_hub import InferenceClient
8
- from transformers import AutoTokenizer, AutoModel
9
  from deep_translator import GoogleTranslator
10
-
11
-
12
- # Ensure HF_TOKEN is set
13
- HF_TOKEN = os.getenv("HF_TOKEN")
14
- if not HF_TOKEN:
15
- raise ValueError("HF_TOKEN environment variable not set.")
16
-
17
- repo_id = "meta-llama/Meta-Llama-3-8B-Instruct"
18
- llm_client = InferenceClient(
19
- model=repo_id,
20
- token=HF_TOKEN,
21
- )
22
-
23
- # Configure Llama index settings
24
- Settings.llm = HuggingFaceInferenceAPI(
25
- model_name=repo_id,
26
- tokenizer_name=repo_id,
27
- context_window=3000,
28
- token=HF_TOKEN,
29
- max_new_tokens=512,
30
- generate_kwargs={"temperature": 0.1},
31
- )
32
- # Settings.embed_model = HuggingFaceEmbedding(
33
- # model_name="BAAI/bge-small-en-v1.5"
34
- # )
35
- # Replace the embedding model with XLM-R
36
- # Settings.embed_model = HuggingFaceEmbedding(
37
- # model_name="xlm-roberta-base" # XLM-RoBERTa model for multilingual support
38
- # )
39
- Settings.embed_model = HuggingFaceEmbedding(
40
- model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
41
- )
42
-
43
- # Configure tokenizer and model if required
44
- tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")
45
- model = AutoModel.from_pretrained("xlm-roberta-base")
46
-
47
- PERSIST_DIR = "db"
48
- PDF_DIRECTORY = 'data'
49
-
50
- # Ensure directories exist
51
- os.makedirs(PDF_DIRECTORY, exist_ok=True)
52
- os.makedirs(PERSIST_DIR, exist_ok=True)
53
- chat_history = []
54
- current_chat_history = []
55
-
56
- def data_ingestion_from_directory():
57
- # Clear previous data by removing the persist directory
58
- if os.path.exists(PERSIST_DIR):
59
- shutil.rmtree(PERSIST_DIR) # Remove the persist directory and all its contents
60
-
61
- # Recreate the persist directory after removal
62
- os.makedirs(PERSIST_DIR, exist_ok=True)
63
-
64
- # Load new documents from the directory
65
- new_documents = SimpleDirectoryReader(PDF_DIRECTORY).load_data()
66
-
67
- # Create a new index with the new documents
68
- index = VectorStoreIndex.from_documents(new_documents)
69
-
70
- # Persist the new index
71
- index.storage_context.persist(persist_dir=PERSIST_DIR)
72
-
73
- # def handle_query(query):
74
- # context_str = ""
75
-
76
- # # Build context from current chat history
77
- # for past_query, response in reversed(current_chat_history):
78
- # if past_query.strip():
79
- # context_str += f"User asked: '{past_query}'\nBot answered: '{response}'\n"
80
-
81
- # chat_text_qa_msgs = [
82
- # (
83
- # "user",
84
- # """
85
- # You are the Taj Hotel voice chatbot and your name is Taj hotel helper. Your goal is to provide accurate, professional, and helpful answers to user queries based on the Taj hotel data. Always ensure your responses are clear and concise. Give response within 10-15 words only. You need to give an answer in the same language used by the user.
86
- # {context_str}
87
- # Question:
88
- # {query_str}
89
- # """
90
- # )
91
- # ]
92
-
93
-
94
-
95
- # text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
96
-
97
- # storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
98
- # index = load_index_from_storage(storage_context)
99
- # # context_str = ""
100
-
101
- # # # Build context from current chat history
102
- # # for past_query, response in reversed(current_chat_history):
103
- # # if past_query.strip():
104
- # # context_str += f"User asked: '{past_query}'\nBot answered: '{response}'\n"
105
-
106
- # query_engine = index.as_query_engine(text_qa_template=text_qa_template, context_str=context_str)
107
- # print(f"Querying: {query}")
108
- # answer = query_engine.query(query)
109
-
110
- # # Extracting the response
111
- # if hasattr(answer, 'response'):
112
- # response = answer.response
113
- # elif isinstance(answer, dict) and 'response' in answer:
114
- # response = answer['response']
115
- # else:
116
- # response = "I'm sorry, I couldn't find an answer to that."
117
-
118
- # # Append to chat history
119
- # current_chat_history.append((query, response))
120
- # return response
121
  def handle_query(query):
122
- chat_text_qa_msgs = [
123
- (
124
- "user",
125
- """
126
- You are the Hotel voice chatbot and your name is hotel helper. Your goal is to provide accurate, professional, and helpful answers to user queries based on the hotel's data. Always ensure your responses are clear and concise. Give response within 10-15 words only. You need to give an answer in the same language used by the user.
127
- {context_str}
128
- Question:
129
- {query_str}
130
- """
131
- )
132
- ]
133
- text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
134
-
135
- storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
136
- index = load_index_from_storage(storage_context)
137
- context_str = ""
138
- for past_query, response in reversed(current_chat_history):
139
- if past_query.strip():
140
- context_str += f"User asked: '{past_query}'\nBot answered: '{response}'\n"
141
-
142
- query_engine = index.as_query_engine(text_qa_template=text_qa_template, context_str=context_str)
143
- print(query)
144
- answer = query_engine.query(query)
145
-
146
- if hasattr(answer, 'response'):
147
- response = answer.response
148
- elif isinstance(answer, dict) and 'response' in answer:
149
- response = answer['response']
150
- else:
151
- response = "Sorry, I couldn't find an answer."
152
- current_chat_history.append((query, response))
153
- return response
154
-
155
- app = Flask(__name__)
156
-
157
- # Data ingestion
158
- data_ingestion_from_directory()
159
-
160
- # Generate Response
161
- def generate_response(query, language):
162
- try:
163
- # Call the handle_query function to get the response
164
  bot_response = handle_query(query)
165
-
166
- # Map of supported languages
167
- supported_languages = {
168
- "hindi": "hi",
169
- "bengali": "bn",
170
- "telugu": "te",
171
- "marathi": "mr",
172
- "tamil": "ta",
173
- "gujarati": "gu",
174
- "kannada": "kn",
175
- "malayalam": "ml",
176
- "punjabi": "pa",
177
- "odia": "or",
178
- "urdu": "ur",
179
- "assamese": "as",
180
- "sanskrit": "sa",
181
- "arabic": "ar",
182
- "australian": "en-AU",
183
- "bangla-india": "bn-IN",
184
- "chinese": "zh-CN",
185
- "dutch": "nl",
186
- "french": "fr",
187
- "filipino": "tl",
188
- "greek": "el",
189
- "indonesian": "id",
190
- "italian": "it",
191
- "japanese": "ja",
192
- "korean": "ko",
193
- "latin": "la",
194
- "nepali": "ne",
195
- "portuguese": "pt",
196
- "romanian": "ro",
197
- "russian": "ru",
198
- "spanish": "es",
199
- "swedish": "sv",
200
- "thai": "th",
201
- "ukrainian": "uk",
202
- "turkish": "tr"
203
- }
204
-
205
- # Initialize the translated text
206
- translated_text = bot_response
207
-
208
- # Translate only if the language is supported and not English
209
- try:
210
- if language in supported_languages:
211
- target_lang = supported_languages[language]
212
- translated_text = GoogleTranslator(source='en', target=target_lang).translate(bot_response)
213
- print(translated_text)
214
- else:
215
- print(f"Unsupported language: {language}")
216
- except Exception as e:
217
- # Handle translation errors
218
- print(f"Translation error: {e}")
219
- translated_text = "Sorry, I couldn't translate the response."
220
-
221
- # Append to chat history
222
- chat_history.append((query, translated_text))
223
- return translated_text
224
- except Exception as e:
225
  return f"Error fetching the response: {str(e)}"
226
 
227
- # Route for the homepage
228
- @app.route('/')
229
- def index():
230
- return render_template('index.html')
231
-
232
- # Route to handle chatbot messages
233
- @app.route('/chat', methods=['POST'])
234
- def chat():
235
- try:
236
- user_message = request.json.get("message")
237
- language = request.json.get("language")
238
- if not user_message:
239
- return jsonify({"response": "Please say something!"})
240
-
241
- bot_response = generate_response(user_message,language)
242
- return jsonify({"response": bot_response})
243
- except Exception as e:
244
- return jsonify({"response": f"An error occurred: {str(e)}"})
245
-
246
- if __name__ == '__main__':
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  app.run(debug=True)
 
1
+ import os
2
+ from flask import Flask, render_template, request, jsonify, send_file
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
 
 
 
4
  from deep_translator import GoogleTranslator
5
+ from gtts import gTTS
6
+ import uuid
7
+
8
+ # Initialize Flask app
9
+ app = Flask(__name__)
10
+
11
+ # Load the DeepSeek-V3 model and tokenizer
12
+ model_name = "deepseek-ai/DeepSeek-V3"
13
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
14
+ model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
15
+
16
+ # Supported languages for translation
17
+ SUPPORTED_LANGUAGES = {
18
+ "english": "en", "hindi": "hi", "bengali": "bn", "telugu": "te", "marathi": "mr",
19
+ "tamil": "ta", "gujarati": "gu", "kannada": "kn", "malayalam": "ml", "punjabi": "pa",
20
+ "odia": "or", "urdu": "ur", "assamese": "as", "sanskrit": "sa", "arabic": "ar",
21
+ "chinese": "zh-CN", "dutch": "nl", "french": "fr", "filipino": "tl", "greek": "el",
22
+ "indonesian": "id", "italian": "it", "japanese": "ja", "korean": "ko", "latin": "la",
23
+ "nepali": "ne", "portuguese": "pt", "romanian": "ro", "russian": "ru", "spanish": "es",
24
+ "swedish": "sv", "thai": "th", "ukrainian": "uk", "turkish": "tr"
25
+ }
26
+
27
+ # Temporary directory for audio files
28
+ AUDIO_DIR = "audio"
29
+ os.makedirs(AUDIO_DIR, exist_ok=True)
30
+
31
+ # Handle user queries using DeepSeek-V3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  def handle_query(query):
33
+ # Tokenize the input query
34
+ inputs = tokenizer(query, return_tensors="pt")
35
+
36
+ # Generate a response using the DeepSeek-V3 model
37
+ outputs = model.generate(
38
+ inputs.input_ids,
39
+ max_length=50, # Adjust the max length as needed
40
+ num_return_sequences=1,
41
+ temperature=0.7, # Adjust temperature for creativity
42
+ pad_token_id=tokenizer.eos_token_id # Ensure proper padding
43
+ )
44
+
45
+ # Decode the generated response
46
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
47
+ return response.strip()
48
+
49
+ # Generate response and translate if needed
50
+ def generate_response(query, language):
51
+ try:
52
+ # Get response from DeepSeek-V3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  bot_response = handle_query(query)
54
+
55
+ # Translate response if the language is not English
56
+ target_lang = SUPPORTED_LANGUAGES.get(language.lower(), "en")
57
+ if target_lang != "en":
58
+ try:
59
+ bot_response = GoogleTranslator(source='en', target=target_lang).translate(bot_response)
60
+ except Exception as e:
61
+ print(f"Translation error: {e}")
62
+ bot_response = "Sorry, I couldn't translate the response."
63
+
64
+ return bot_response
65
+ except Exception as e:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  return f"Error fetching the response: {str(e)}"
67
 
68
+ # Convert text to speech using gTTS
69
+ def text_to_speech(text, lang="en"):
70
+ try:
71
+ # Generate a unique filename for the audio file
72
+ audio_filename = f"{uuid.uuid4()}.mp3"
73
+ audio_path = os.path.join(AUDIO_DIR, audio_filename)
74
+
75
+ # Create gTTS object and save the audio file
76
+ tts = gTTS(text=text, lang=lang)
77
+ tts.save(audio_path)
78
+
79
+ return audio_path
80
+ except Exception as e:
81
+ print(f"Error generating speech: {e}")
82
+ return None
83
+
84
+ # Flask routes
85
+ @app.route('/')
86
+ def index():
87
+ return render_template('index.html')
88
+
89
+ @app.route('/chat', methods=['POST'])
90
+ def chat():
91
+ try:
92
+ data = request.json
93
+ user_message = data.get("message")
94
+ language = data.get("language", "english").lower()
95
+
96
+ if not user_message:
97
+ return jsonify({"response": "Please say something!", "audio_url": None})
98
+
99
+ # Generate response
100
+ bot_response = generate_response(user_message, language)
101
+
102
+ # Convert response to speech
103
+ target_lang = SUPPORTED_LANGUAGES.get(language, "en")
104
+ audio_path = text_to_speech(bot_response, lang=target_lang)
105
+
106
+ if audio_path:
107
+ audio_url = f"/audio/{os.path.basename(audio_path)}"
108
+ else:
109
+ audio_url = None
110
+
111
+ return jsonify({"response": bot_response, "audio_url": audio_url})
112
+ except Exception as e:
113
+ return jsonify({"response": f"An error occurred: {str(e)}", "audio_url": None})
114
+
115
+ @app.route('/audio/<filename>')
116
+ def serve_audio(filename):
117
+ return send_file(os.path.join(AUDIO_DIR, filename), mimetype="audio/mp3")
118
+
119
+ if __name__ == '__main__':
120
  app.run(debug=True)