Delete speech_to_text.py
Browse files- speech_to_text.py +0 -76
speech_to_text.py
DELETED
@@ -1,76 +0,0 @@
|
|
1 |
-
from dotenv import load_dotenv
|
2 |
-
import os
|
3 |
-
from vosk import Model, KaldiRecognizer
|
4 |
-
import pyaudio
|
5 |
-
import json
|
6 |
-
|
7 |
-
# Load environment variables from .env file
|
8 |
-
load_dotenv()
|
9 |
-
|
10 |
-
# Get the Vosk model path from the environment variable
|
11 |
-
vosk_model_path = os.getenv("vosk_model_path")
|
12 |
-
|
13 |
-
if not vosk_model_path:
|
14 |
-
print("Error: vosk_model_path is not set in the .env file.")
|
15 |
-
exit()
|
16 |
-
|
17 |
-
# Initialize the Vosk model
|
18 |
-
try:
|
19 |
-
model = Model(vosk_model_path)
|
20 |
-
print("Vosk model loaded successfully.")
|
21 |
-
except Exception as e:
|
22 |
-
print(f"Failed to load Vosk model: {e}")
|
23 |
-
exit()
|
24 |
-
|
25 |
-
# Initialize recognizer and audio input
|
26 |
-
recognizer = KaldiRecognizer(model, 16000)
|
27 |
-
audio = pyaudio.PyAudio()
|
28 |
-
|
29 |
-
# Open audio stream
|
30 |
-
stream = audio.open(format=pyaudio.paInt16,
|
31 |
-
channels=1,
|
32 |
-
rate=16000,
|
33 |
-
input=True,
|
34 |
-
frames_per_buffer=4000)
|
35 |
-
stream.start_stream()
|
36 |
-
|
37 |
-
print("Say 'start listening' to begin transcription and 'stop listening' to stop.")
|
38 |
-
|
39 |
-
# State management
|
40 |
-
is_listening = False
|
41 |
-
|
42 |
-
try:
|
43 |
-
while True:
|
44 |
-
data = stream.read(4000, exception_on_overflow=False)
|
45 |
-
|
46 |
-
if recognizer.AcceptWaveform(data):
|
47 |
-
result = recognizer.Result()
|
48 |
-
text = json.loads(result)["text"]
|
49 |
-
|
50 |
-
# Check for commands to start or stop listening
|
51 |
-
if "start listening" in text.lower():
|
52 |
-
is_listening = True
|
53 |
-
print("Listening started. Speak into the microphone.")
|
54 |
-
continue
|
55 |
-
elif "stop listening" in text.lower():
|
56 |
-
is_listening = False
|
57 |
-
print("Listening stopped. Say 'start listening' to resume.")
|
58 |
-
continue
|
59 |
-
|
60 |
-
# Transcribe if actively listening
|
61 |
-
if is_listening:
|
62 |
-
print(f"Transcription: {text}")
|
63 |
-
else:
|
64 |
-
# Handle partial results if needed
|
65 |
-
chunk_result = recognizer.PartialResult()
|
66 |
-
chunk_text = json.loads(chunk_result)["partial"]
|
67 |
-
|
68 |
-
# Display partial transcription only if actively listening
|
69 |
-
if is_listening and chunk_text:
|
70 |
-
print(f"chunk: {chunk_text}", end="\r")
|
71 |
-
|
72 |
-
except KeyboardInterrupt:
|
73 |
-
print("\nExiting...")
|
74 |
-
stream.stop_stream()
|
75 |
-
stream.close()
|
76 |
-
audio.terminate()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|