File size: 6,791 Bytes
2e6f087
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# server.py remains the same as before

# Updated client.py
import asyncio
import websockets
import sounddevice as sd
import numpy as np
import base64
import queue
import argparse
import requests
import time

class AudioClient:
    def __init__(self, server_url="ws://localhost:8000", token_temp=None, categorical_temp=None, gaussian_temp=None):
        # Convert ws:// to http:// for the base URL
        self.base_url = server_url.replace("ws://", "http://")
        self.server_url = f"{server_url}/audio"
        
        # Set temperatures if provided
        if any(t is not None for t in [token_temp, categorical_temp, gaussian_temp]):
            self.set_temperature_and_echo(token_temp, categorical_temp, gaussian_temp)
        
        # Initialize queues
        self.audio_queue = queue.Queue()
        self.output_queue = queue.Queue()
    
    def set_temperature_and_echo(self, token_temp=None, categorical_temp=None, gaussian_temp=None, echo_testing = False):
        """Send temperature settings to server"""
        params = {}
        if token_temp is not None:
            params['token_temp'] = token_temp
        if categorical_temp is not None:
            params['categorical_temp'] = categorical_temp
        if gaussian_temp is not None:
            params['gaussian_temp'] = gaussian_temp
            
        response = requests.post(f"{self.base_url}/set_temperature", params=params)
        print(response.json()['message'])
    
    def audio_callback(self, indata, frames, time, status):
        """This is called for each audio block"""
        if status:
            print(status)
        # if np.isclose(indata, 0).all():
        #     raise Exception('Audio input is not working - received all zeros')
        # Convert float32 to int16 for efficient transmission
        indata_int16 = (indata.copy() * 32767).astype(np.int16) 
        # indata_int16 = np.zeros_like(indata_int16)
        self.audio_queue.put(indata_int16)
    
    def output_stream_callback(self, outdata, frames, time, status):
        """Callback for output stream to get audio data"""
        if status:
            print(status)
        
        try:
            data = self.output_queue.get_nowait()
            data = data.astype(np.float32) / 32767.0
            if len(data) < len(outdata):
                outdata[:len(data)] = data
                outdata[len(data):] = 0
            else:
                outdata[:] = data[:len(outdata)]
        except queue.Empty:
            outdata.fill(0)
    
    async def process_audio(self):
        async with websockets.connect(self.server_url) as ws:
            while self.running:
                if not self.audio_queue.empty():
                    # Get recorded audio
                    audio_data = self.audio_queue.get()
                    print(f'Data from microphone:{audio_data.shape, audio_data.dtype, audio_data.min(), audio_data.max()}')
                    
                    # Convert to base64
                    audio_b64 = base64.b64encode(audio_data.tobytes()).decode('utf-8')
                    
                    # Send to server
                    time_sent = time.time()
                    await ws.send(f"data:audio/raw;base64,{audio_b64}")
                    
                    # Receive processed audio
                    response = await ws.recv()
                    response = response.split(",")[1]
                    time_received = time.time()
                    print(f"Data sent: {audio_b64[:10]}. Data received: {response[:10]}. Received in {(time_received - time_sent) * 1000:.2f} ms")
                    processed_audio = np.frombuffer(
                        base64.b64decode(response),
                        dtype=np.int16
                    ).reshape(-1, CHANNELS)
                    print(f'Data from model:{processed_audio.shape, processed_audio.dtype, processed_audio.min(), processed_audio.max()}')
                    
                    self.output_queue.put(processed_audio)
    
    def start(self):
        self.running = True
        # Print audio device information
        devices = sd.query_devices()
        default_input = sd.query_devices(kind='input')
        default_output = sd.query_devices(kind='output')
        
        print("\nAudio Device Configuration:")
        print("-" * 50)
        print(f"Default Input Device:\n{default_input}\n")
        print(f"Default Output Device:\n{default_output}\n") 
        print("\nAll Available Devices:")
        print("-" * 50)
        for i, device in enumerate(devices):
            print(f"Device {i}:")
            print(f"Name: {device['name']}")
            print(f"Channels (in/out): {device['max_input_channels']}/{device['max_output_channels']}")
            print(f"Sample Rates: {device['default_samplerate']}")
            print()
        input_device = input("Enter the index of the input device or press enter for default: ")
        output_device = input("Enter the index of the output device or press enter for default: ")
        if input_device == "":
            input_device = default_input['index']
        if output_device == "":
            output_device = default_output['index']
        with sd.InputStream(callback=self.audio_callback,
                          channels=CHANNELS,
                          samplerate=SAMPLE_RATE,
                          device=int(input_device),
                          blocksize=2000), \
             sd.OutputStream(callback=self.output_stream_callback,
                           channels=CHANNELS,
                           samplerate=SAMPLE_RATE,
                           blocksize=2000,
                           device=int(output_device)):
            
            asyncio.run(self.process_audio())
    
    def stop(self):
        self.running = False

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Audio Client with Temperature Control')
    parser.add_argument('--token_temp', '-t1', type=float, help='Token (LM) temperature parameter')
    parser.add_argument('--categorical_temp', '-t2', type=float, help='Categorical (VAE) temperature parameter')
    parser.add_argument('--gaussian_temp', '-t3', type=float, help='Gaussian (VAE) temperature parameter')
    parser.add_argument('--server', '-s', default="ws://localhost:8000", 
                        help='Server URL (default: ws://localhost:8000)')
    
    args = parser.parse_args()
    
    # Audio settings
    SAMPLE_RATE = 16000
    CHANNELS = 1
    
    client = AudioClient(
        server_url=args.server,
        token_temp=args.token_temp,
        categorical_temp=args.categorical_temp,
        gaussian_temp=args.gaussian_temp
    )
    
    try:
        client.start()
    except KeyboardInterrupt:
        client.stop()