Vageesh1 commited on
Commit
7ad1106
·
1 Parent(s): 5911589

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +134 -0
  2. helper.py +77 -0
  3. requirements.txt +14 -0
app.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from audiorecorder import audiorecorder
3
+
4
+ import torch
5
+ from transformers import pipeline
6
+ import torch
7
+ import torchaudio
8
+
9
+ from langchain.embeddings.openai import OpenAIEmbeddings
10
+ from langchain import HuggingFaceHub, LLMChain, PromptTemplate
11
+ from langchain.memory import ConversationBufferWindowMemory
12
+ from langchain.chat_models import ChatOpenAI
13
+ from langchain.chains import ConversationalRetrievalChain
14
+ from langchain.document_loaders.csv_loader import CSVLoader
15
+ from langchain.vectorstores import FAISS
16
+ import tempfile
17
+ from streamlit_chat import message
18
+ import streamlit as st
19
+ from elevenlabs import set_api_key
20
+ from elevenlabs import clone, generate, play
21
+ from pydub import AudioSegment
22
+
23
+ import os
24
+ import re
25
+ import sys
26
+ import pandas as pd
27
+
28
+ from helper import parse_transcription,hindi_to_english,translate_english_to_hindi,hindi_tts
29
+
30
+ def extract_text_from_html(html):
31
+ cleanr = re.compile('<.*?>')
32
+ cleantext = re.sub(cleanr, '', html)
33
+ def conversational_chat(query):
34
+ result = llm_chain({"question": query,
35
+ "chat_history": st.session_state['history']})
36
+ st.session_state['history'].append((query, result["answer"]))
37
+
38
+ return result["answer"]
39
+
40
+ def save_uploaded_file_as_mp3(uploaded_file, output_file_path):
41
+ audio = AudioSegment.from_file(uploaded_file)
42
+ audio.export(output_file_path, format="mp3")
43
+
44
+ def ui():
45
+ user_api_key = st.sidebar.text_input(
46
+ label="#### Your OpenAI API key 👇",
47
+ placeholder="Paste your openAI API key, sk-",
48
+ type="password")
49
+
50
+ eleven_labs_api_key = st.sidebar.text_input(
51
+ label="#### Your OpenAI API key 👇",
52
+ placeholder="Paste your openAI API key, sk-",
53
+ type="password")
54
+ if user_api_key is not None and user_api_key.strip() != "":
55
+ os.environ["OPENAI_API_KEY"] =user_api_key
56
+ template = """
57
+ Behave like a Telecomm customer servce call agent and don't include any website address, compnay name or any other parameter in your output
58
+ {history}
59
+ Me:{human_input}
60
+ Jack:
61
+ """
62
+
63
+ prompt = PromptTemplate(
64
+ input_variables=["history", "human_input"],
65
+ template=template
66
+ )
67
+
68
+ llm_chain = LLMChain(
69
+ llm = ChatOpenAI(temperature=0.0,model_name='gpt-3.5-turbo'),
70
+ prompt=prompt,
71
+ verbose=True,
72
+ memory=ConversationBufferWindowMemory(k=2)
73
+ )
74
+
75
+ if 'history' not in st.session_state:
76
+ st.session_state['history'] = []
77
+
78
+ if 'generated' not in st.session_state:
79
+ st.session_state['generated'] = ["Hello ! Ask me anything about " + " 🤗"]
80
+
81
+ if 'past' not in st.session_state:
82
+ st.session_state['past'] = ["Hey ! 👋"]
83
+
84
+ if user_api_key is not None and user_api_key.strip() != "":
85
+ eleven_labs_api_key = st.sidebar.text_input(
86
+ label="#### Your Eleven Labs API key 👇",
87
+ placeholder="Paste your Eleven Labs API key",
88
+ type="password")
89
+
90
+ set_api_key(user_api_key)
91
+
92
+ #container for the chat history
93
+ response_container = st.container()
94
+ #container for the user's text input
95
+ container = st.container()
96
+
97
+ with container:
98
+ audio_file = audiorecorder("Click to record", "Recording...")
99
+ audio_file=audio_file.tobytes()
100
+ submit_button = st.form_submit_button(label='Send')
101
+ if submit_button and audio_file:
102
+ output_file_path = "./output_audio.mp3"
103
+ save_uploaded_file_as_mp3(audio_file,output_file_path )
104
+ hindi_input_audio,sample_rate=torchaudio.load(output_file_path)
105
+ #applying the audio recognition
106
+ hindi_transcription=parse_transcription('./output_audio.mp3')
107
+ st.success(f"Audio file saved as {output_file_path}")
108
+ #convert hindi to english
109
+ english_input=hindi_to_english(hindi_transcription)
110
+ #feeding the input to the LLM
111
+ english_output = conversational_chat(english_input)
112
+ #converting english to hindi
113
+ hin_output=translate_english_to_hindi(english_output)
114
+ #getting the hindi_tts
115
+ hindi_output_audio=hindi_tts(hin_output)
116
+
117
+ st.session_state['past'].append(hindi_input_audio)
118
+ st.session_state['generated'].append(hindi_output_audio)
119
+
120
+ if st.session_state['generated']:
121
+ with response_container:
122
+ for i in range(len(st.session_state['generated'])):
123
+ st.audio(st.session_state["past"][i])
124
+ st.audio(st.session_state["generated"][i])
125
+
126
+ if __name__ == '__main__':
127
+ ui()
128
+
129
+
130
+
131
+
132
+
133
+
134
+
helper.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchaudio
3
+ import torch
4
+ from transformers import pipeline
5
+ import soundfile as sf
6
+ import torch
7
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
8
+ import argparse
9
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
10
+ import librosa
11
+ from huggingface_hub.hf_api import HfFolder
12
+ from transformers import MarianMTModel, MarianTokenizer
13
+ from elevenlabs import set_api_key
14
+ from elevenlabs import clone, generate, play
15
+ HfFolder.save_token('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')
16
+ access_token = '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'
17
+
18
+
19
+ tokenizer_en_hn = AutoTokenizer.from_pretrained("vasudevgupta/mbart-bhasha-hin-eng")
20
+ model_translate_en_hm = AutoModelForSeq2SeqLM.from_pretrained("vasudevgupta/mbart-bhasha-hin-eng")
21
+
22
+ processor = Wav2Vec2Processor.from_pretrained("Harveenchadha/vakyansh-wav2vec2-hindi-him-4200")
23
+ model_speech = Wav2Vec2ForCTC.from_pretrained("Harveenchadha/vakyansh-wav2vec2-hindi-him-4200")
24
+
25
+
26
+
27
+ def parse_transcription(wav_file):
28
+ # load audio
29
+ audio_input, sample_rate = librosa.load(wav_file, sr=16000)
30
+
31
+ # pad input values and return pt tensor
32
+ input_values = processor(audio_input, sampling_rate=sample_rate, return_tensors="pt").input_values
33
+
34
+ # INFERENCE
35
+ # retrieve logits & take argmax
36
+ logits = model_speech(input_values).logits
37
+ predicted_ids = torch.argmax(logits, dim=-1)
38
+
39
+ # transcribe
40
+ transcription = processor.decode(predicted_ids[0], skip_special_tokens=True)
41
+ return transcription
42
+
43
+ def hindi_to_english(text):
44
+ inputs = tokenizer_en_hn.encode(
45
+ text, return_tensors="pt",padding=True,max_length=512,truncation=True)
46
+
47
+ outputs = model_translate_en_hm.generate(
48
+ inputs, max_length=128, num_beams=4, early_stopping=True
49
+ )
50
+ translated = tokenizer_en_hn.decode(outputs[0]).replace('<pad>',"").replace('<s>', "").strip().lower()
51
+ return translated
52
+
53
+ def translate_english_to_hindi(input_text):
54
+ # Load the pre-trained English to Hindi translation model and tokenizer
55
+ model_name = "Helsinki-NLP/opus-mt-en-hi"
56
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
57
+ model = MarianMTModel.from_pretrained(model_name)
58
+
59
+ # Tokenize the input text and generate translation
60
+ inputs = tokenizer(input_text, return_tensors="pt", padding=True)
61
+ translated_ids = model.generate(inputs.input_ids)
62
+
63
+ # Decode the translated output
64
+ translated_text = tokenizer.decode(translated_ids[0], skip_special_tokens=True)
65
+
66
+ return translated_text
67
+
68
+ def hindi_tts(text):
69
+ audio = generate(
70
+ text="Hi! My name is Bella, nice to meet you!",
71
+ voice="Customer Service",
72
+ model="eleven_monolingual_v1"
73
+ )
74
+
75
+ return audio
76
+
77
+
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ streamlit-audiorecorder
3
+ langchain
4
+ streamlit
5
+ openai
6
+ tiktoken
7
+ faiss-cpu
8
+ streamlit_chat
9
+ huggingface_hub
10
+ librosa
11
+ sentencepiece
12
+ elevenlabs
13
+ pydub
14
+ torchaudio