j-tobias commited on
Commit
e1e27eb
1 Parent(s): 5282c8d

initial commit

Browse files
Files changed (2) hide show
  1. app.py +82 -0
  2. requirements.txt +79 -0
app.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ # from gradio import ChatMessage
3
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration
4
+ import numpy as np
5
+ import librosa
6
+ import json
7
+ import os
8
+
9
+
10
+ from huggingface_hub import InferenceClient
11
+
12
+ hf_token = os.getenv("HF_Token")
13
+
14
+ # def get_token():
15
+ # with open("credentials.json","r") as f:
16
+ # credentials = json.load(f)
17
+ # return credentials['token']
18
+
19
+ # hf_token = get_token()
20
+
21
+ client = InferenceClient(
22
+ "meta-llama/Meta-Llama-3-8B-Instruct",
23
+ token=hf_token)
24
+
25
+
26
+
27
+
28
+ processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2")
29
+ model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2")
30
+ model.config.forced_decoder_ids = None
31
+
32
+ def chat(audio, chat:list):
33
+ transcription = transcribe(audio)
34
+ chat.append({'role':'user','content':transcription})
35
+ response = client.chat_completion(
36
+ messages=chat,
37
+ max_tokens=500,
38
+ stream=False,
39
+ ).choices[0].message.content
40
+ chat.append({'role':'assistant','content':response})
41
+ return chat
42
+
43
+ def transcribe(audio):
44
+ sr, audio = audio
45
+ audio = audio.astype(np.float32)
46
+ if len(audio.shape) > 2 and audio.shape[1] > 1:
47
+ audio = np.mean(audio, axis=1)
48
+ audio = librosa.resample(audio, orig_sr=sr, target_sr=16000)
49
+ input_features = processor(audio, sampling_rate=16000, return_tensors="pt").input_features
50
+ predicted_ids = model.generate(input_features)
51
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=False)
52
+ transcription = processor.tokenizer.normalize(transcription[0])
53
+ return transcription
54
+
55
+ with gr.Blocks() as app:
56
+
57
+ chatbot = gr.Chatbot(
58
+ value=[{
59
+ 'role':'System',
60
+ 'content':'You are a helpfull assitant for an Audio based Chatbot. You are helping Users to order their notes and thoughts.'
61
+ }],
62
+ bubble_full_width=False,
63
+ type="messages"
64
+ )
65
+
66
+ with gr.Row():
67
+
68
+ audio_input = gr.Audio(
69
+ sources=['microphone'],
70
+ interactive=True,
71
+ scale=8
72
+ )
73
+
74
+ # mode_option = gr.Radio(
75
+ # choices=["online", "local"],
76
+ # scale=1
77
+ # )
78
+
79
+ # Event listener for when the audio recording stops
80
+ audio_input.stop_recording(fn=chat, inputs=[audio_input, chatbot], outputs=chatbot)
81
+
82
+ app.launch()
requirements.txt ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ annotated-types==0.7.0
3
+ anyio==4.4.0
4
+ audioread==3.0.1
5
+ certifi==2024.7.4
6
+ cffi==1.17.0
7
+ charset-normalizer==3.3.2
8
+ click==8.1.7
9
+ contourpy==1.2.1
10
+ cycler==0.12.1
11
+ decorator==5.1.1
12
+ exceptiongroup==1.2.2
13
+ fastapi==0.112.1
14
+ ffmpy==0.4.0
15
+ filelock==3.15.4
16
+ fonttools==4.53.1
17
+ fsspec==2024.6.1
18
+ gradio==4.41.0
19
+ gradio_client==1.3.0
20
+ h11==0.14.0
21
+ httpcore==1.0.5
22
+ httpx==0.27.0
23
+ huggingface-hub==0.24.5
24
+ idna==3.7
25
+ importlib_resources==6.4.2
26
+ Jinja2==3.1.4
27
+ joblib==1.4.2
28
+ kiwisolver==1.4.5
29
+ lazy_loader==0.4
30
+ librosa==0.10.2.post1
31
+ llvmlite==0.43.0
32
+ markdown-it-py==3.0.0
33
+ MarkupSafe==2.1.5
34
+ matplotlib==3.9.2
35
+ mdurl==0.1.2
36
+ msgpack==1.0.8
37
+ numba==0.60.0
38
+ numpy==2.0.1
39
+ orjson==3.10.7
40
+ packaging==24.1
41
+ pandas==2.2.2
42
+ pillow==10.4.0
43
+ platformdirs==4.2.2
44
+ pooch==1.8.2
45
+ pycparser==2.22
46
+ pydantic==2.8.2
47
+ pydantic_core==2.20.1
48
+ pydub==0.25.1
49
+ Pygments==2.18.0
50
+ pyparsing==3.1.2
51
+ python-dateutil==2.9.0.post0
52
+ python-multipart==0.0.9
53
+ pytz==2024.1
54
+ PyYAML==6.0.2
55
+ regex==2024.7.24
56
+ requests==2.32.3
57
+ rich==13.7.1
58
+ ruff==0.6.0
59
+ safetensors==0.4.4
60
+ scikit-learn==1.5.1
61
+ scipy==1.14.0
62
+ semantic-version==2.10.0
63
+ shellingham==1.5.4
64
+ six==1.16.0
65
+ sniffio==1.3.1
66
+ soundfile==0.12.1
67
+ soxr==0.4.0
68
+ starlette==0.38.2
69
+ threadpoolctl==3.5.0
70
+ tokenizers==0.19.1
71
+ tomlkit==0.12.0
72
+ tqdm==4.66.5
73
+ transformers==4.44.0
74
+ typer==0.12.3
75
+ typing_extensions==4.12.2
76
+ tzdata==2024.1
77
+ urllib3==2.2.2
78
+ uvicorn==0.30.6
79
+ websockets==12.0