zohaibterminator commited on
Commit
b486b3a
1 Parent(s): 9b594a2

Upload 4 files

Browse files
Files changed (4) hide show
  1. .env +4 -0
  2. app.py +81 -0
  3. llm.py +99 -0
  4. requirements.txt +20 -0
.env ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ GROQ_API_KEY="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"
2
+ RAPIDAPI_LANG_TRANS="0875969c18mshea4b5d106303222p197cb2jsnd47e91b7da30"
3
+ RAPIDAPI_HOST="microsoft-translator-text.p.rapidapi.com"
4
+ TAVILY_API_KEY="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"
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from dotenv import load_dotenv
3
+ from audiorecorder import audiorecorder
4
+ from langchain_core.messages import HumanMessage, AIMessage
5
+ import requests
6
+ from transformers import pipeline
7
+ from gtts import gTTS
8
+ import io
9
+
10
+ # Load environment variables (if any)
11
+ load_dotenv()
12
+
13
+ user_id = "1" # example user id
14
+
15
+ # Initialize the wav2vec2 model for Urdu speech-to-text
16
+ pipe = pipeline("automatic-speech-recognition", model="kingabzpro/wav2vec2-large-xls-r-300m-Urdu")
17
+
18
+ def get_response(user_input):
19
+ '''
20
+ Takes user_input in English and invokes the infer API for response.
21
+
22
+ Parameters:
23
+ user_input (string): User Query in English.
24
+ Returns:
25
+ res (string): Response from the LLM.
26
+ '''
27
+ url = f"http://127.0.0.1/infer/{user_id}"
28
+ headers = {"Content-Type": "application/x-www-form-urlencoded"}
29
+ data = {"user_input": user_input}
30
+ response = requests.post(url, headers=headers, data=data)
31
+ res = response.json()
32
+ return res["data"]
33
+
34
+
35
+ def text_to_speech(text, lang='ur'):
36
+ '''
37
+ Converts text to speech using gTTS.
38
+
39
+ Parameters:
40
+ text (string): Text to be converted to speech.
41
+ lang (string): Language for the speech synthesis. Default is 'ur' (Urdu).
42
+ Returns:
43
+ response_audio_io (BytesIO): BytesIO object containing the audio data.
44
+ '''
45
+ tts = gTTS(text, lang=lang)
46
+ response_audio_io = io.BytesIO()
47
+ tts.write_to_fp(response_audio_io)
48
+ response_audio_io.seek(0)
49
+ return response_audio_io
50
+
51
+
52
+ st.set_page_config(page_title="Urdu Virtual Assistant", page_icon="🤖") # set the page title and icon
53
+ st.title("Urdu Virtual Assistant") # set the main title of the application
54
+
55
+ # Add a text input box
56
+ audio = audiorecorder()
57
+
58
+ if len(audio) > 0:
59
+ # Save the audio to a file
60
+ audio.export("audio.wav", format="wav")
61
+
62
+ # Convert audio to text using the wav2vec2 model
63
+ with open("audio.wav", "rb") as f:
64
+ audio_bytes = f.read()
65
+
66
+ # Process the audio file
67
+ result = pipe("audio.wav")
68
+ user_query = result["text"]
69
+
70
+ with st.chat_message("Human"): # create the message box for human input
71
+ st.audio(audio.export().read()) # display the audio player
72
+ st.markdown(user_query)
73
+
74
+ # Get response from the LLM
75
+ response_text = get_response(user_input=user_query)
76
+ response_audio = text_to_speech(response_text, lang='ur')
77
+
78
+ # Play the generated speech in the app
79
+ with st.chat_message("AI"):
80
+ st.audio(response_audio.read(), format='audio/mp3')
81
+ st.markdown(response_text)
llm.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Form
2
+ from langchain_core.runnables.base import RunnableSequence
3
+ from langchain_core.prompts import ChatPromptTemplate
4
+ from langchain_groq import ChatGroq
5
+ import os
6
+ import requests
7
+ from dotenv import load_dotenv
8
+ from langgraph.checkpoint.memory import MemorySaver
9
+ from langgraph.prebuilt import create_react_agent
10
+ from langchain_community.tools.tavily_search import TavilySearchResults
11
+ load_dotenv()
12
+
13
+ app = FastAPI()
14
+
15
+ llm = ChatGroq(
16
+ model="llama-3.1-70b-versatile",
17
+ temperature=0,
18
+ max_tokens=None,
19
+ timeout=None,
20
+ max_retries=5,
21
+ groq_api_key=os.getenv("GROQ_API_KEY")
22
+ )
23
+
24
+ search = TavilySearchResults(
25
+ max_results=2,
26
+ )
27
+ tools = [search]
28
+ memory = MemorySaver()
29
+
30
+ agent_executor = create_react_agent(llm, tools, checkpointer=memory)
31
+
32
+ def translate(target, text):
33
+ '''
34
+ Translates given text into target language
35
+
36
+ Parameters:
37
+ target (string): 2 character code to specify the target language.
38
+ text (string): Text to be translated.
39
+
40
+ Returns:
41
+ res (string): Translated text.
42
+ '''
43
+ url = "https://microsoft-translator-text.p.rapidapi.com/translate"
44
+
45
+ querystring = {"api-version":"3.0","profanityAction":"NoAction","textType":"plain", "to":target}
46
+
47
+ payload = [{ "Text": text }]
48
+ headers = {
49
+ "x-rapidapi-key": os.getenv("RAPIDAPI_LANG_TRANS"),
50
+ "x-rapidapi-host": "microsoft-translator-text.p.rapidapi.com",
51
+ "Content-Type": "application/json"
52
+ }
53
+
54
+ response = requests.post(url, json=payload, headers=headers, params=querystring)
55
+ res = response.json()
56
+ return res[0]["translations"][0]["text"]
57
+
58
+
59
+ @app.post('/infer/{user_id}')
60
+ def infer(user_id: str, user_input: str = Form(...)):
61
+ '''
62
+ Returns the translated response from the LLM in response to a user query.
63
+
64
+ Parameters:
65
+ user_id (string): User ID of a user.
66
+ user_input (string): User query.
67
+
68
+ Returns:
69
+ JSON Response (Dictionary): Returns a translated response from the LLM.
70
+ '''
71
+
72
+ user_input = translate("en", user_input) # translate user query to english
73
+
74
+ prompt = ChatPromptTemplate.from_messages( # define a prompt
75
+ [
76
+ (
77
+ "system",
78
+ "You're a compassionate AI virtual Assistant"
79
+ ),
80
+ ("human", "{user_input}")
81
+ ]
82
+ )
83
+
84
+ runnable = prompt | agent_executor # define a chain
85
+
86
+ conversation = RunnableSequence( # wrap the chain along with chat history and user input
87
+ runnable,
88
+ )
89
+
90
+ response = conversation.invoke( # invoke the chain by giving the user input and the chat history
91
+ {"user_input": user_input},
92
+ config={"configurable": {"thread_id":user_id}}
93
+ )
94
+
95
+ res = translate("ur", response["messages"][-1].content) # translate the response to Urdu
96
+
97
+ return {
98
+ "data": res
99
+ }
requirements.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langgraph
2
+ streamlit-audiorecorder
3
+ fastapi
4
+ langchain
5
+ python-dotenv
6
+ uvicorn
7
+ gTTS
8
+ openai-whisper
9
+ langchain_core
10
+ langchain_community
11
+ langchain_groq
12
+ python-multipart
13
+ kenlm
14
+ pyctcdecode
15
+ transformers
16
+ librosa
17
+ soundfile
18
+ gTTS
19
+ tavily-python
20
+ langgraph-checkpoint-sqlite