Spaces:
Running
on
Zero
Running
on
Zero
| # server.py remains the same as before | |
| # Updated client.py | |
| import asyncio | |
| import websockets | |
| import sounddevice as sd | |
| import numpy as np | |
| import base64 | |
| import queue | |
| import argparse | |
| import requests | |
| import time | |
| class AudioClient: | |
| def __init__(self, server_url="ws://localhost:8000", token_temp=None, categorical_temp=None, gaussian_temp=None): | |
| # Convert ws:// to http:// for the base URL | |
| self.base_url = server_url.replace("ws://", "http://") | |
| self.server_url = f"{server_url}/audio" | |
| # Set temperatures if provided | |
| if any(t is not None for t in [token_temp, categorical_temp, gaussian_temp]): | |
| self.set_temperature_and_echo(token_temp, categorical_temp, gaussian_temp) | |
| # Initialize queues | |
| self.audio_queue = queue.Queue() | |
| self.output_queue = queue.Queue() | |
| def set_temperature_and_echo(self, token_temp=None, categorical_temp=None, gaussian_temp=None, echo_testing = False): | |
| """Send temperature settings to server""" | |
| params = {} | |
| if token_temp is not None: | |
| params['token_temp'] = token_temp | |
| if categorical_temp is not None: | |
| params['categorical_temp'] = categorical_temp | |
| if gaussian_temp is not None: | |
| params['gaussian_temp'] = gaussian_temp | |
| response = requests.post(f"{self.base_url}/set_temperature", params=params) | |
| print(response.json()['message']) | |
| def audio_callback(self, indata, frames, time, status): | |
| """This is called for each audio block""" | |
| if status: | |
| print(status) | |
| # if np.isclose(indata, 0).all(): | |
| # raise Exception('Audio input is not working - received all zeros') | |
| # Convert float32 to int16 for efficient transmission | |
| indata_int16 = (indata.copy() * 32767).astype(np.int16) | |
| # indata_int16 = np.zeros_like(indata_int16) | |
| self.audio_queue.put(indata_int16) | |
| def output_stream_callback(self, outdata, frames, time, status): | |
| """Callback for output stream to get audio data""" | |
| if status: | |
| print(status) | |
| try: | |
| data = self.output_queue.get_nowait() | |
| data = data.astype(np.float32) / 32767.0 | |
| if len(data) < len(outdata): | |
| outdata[:len(data)] = data | |
| outdata[len(data):] = 0 | |
| else: | |
| outdata[:] = data[:len(outdata)] | |
| except queue.Empty: | |
| outdata.fill(0) | |
| async def process_audio(self): | |
| async with websockets.connect(self.server_url) as ws: | |
| while self.running: | |
| if not self.audio_queue.empty(): | |
| # Get recorded audio | |
| audio_data = self.audio_queue.get() | |
| print(f'Data from microphone:{audio_data.shape, audio_data.dtype, audio_data.min(), audio_data.max()}') | |
| # Convert to base64 | |
| audio_b64 = base64.b64encode(audio_data.tobytes()).decode('utf-8') | |
| # Send to server | |
| time_sent = time.time() | |
| await ws.send(f"data:audio/raw;base64,{audio_b64}") | |
| # Receive processed audio | |
| response = await ws.recv() | |
| response = response.split(",")[1] | |
| time_received = time.time() | |
| print(f"Data sent: {audio_b64[:10]}. Data received: {response[:10]}. Received in {(time_received - time_sent) * 1000:.2f} ms") | |
| processed_audio = np.frombuffer( | |
| base64.b64decode(response), | |
| dtype=np.int16 | |
| ).reshape(-1, CHANNELS) | |
| print(f'Data from model:{processed_audio.shape, processed_audio.dtype, processed_audio.min(), processed_audio.max()}') | |
| self.output_queue.put(processed_audio) | |
| def start(self): | |
| self.running = True | |
| # Print audio device information | |
| devices = sd.query_devices() | |
| default_input = sd.query_devices(kind='input') | |
| default_output = sd.query_devices(kind='output') | |
| print("\nAudio Device Configuration:") | |
| print("-" * 50) | |
| print(f"Default Input Device:\n{default_input}\n") | |
| print(f"Default Output Device:\n{default_output}\n") | |
| print("\nAll Available Devices:") | |
| print("-" * 50) | |
| for i, device in enumerate(devices): | |
| print(f"Device {i}:") | |
| print(f"Name: {device['name']}") | |
| print(f"Channels (in/out): {device['max_input_channels']}/{device['max_output_channels']}") | |
| print(f"Sample Rates: {device['default_samplerate']}") | |
| print() | |
| input_device = input("Enter the index of the input device or press enter for default: ") | |
| output_device = input("Enter the index of the output device or press enter for default: ") | |
| if input_device == "": | |
| input_device = default_input['index'] | |
| if output_device == "": | |
| output_device = default_output['index'] | |
| with sd.InputStream(callback=self.audio_callback, | |
| channels=CHANNELS, | |
| samplerate=SAMPLE_RATE, | |
| device=int(input_device), | |
| blocksize=2000), \ | |
| sd.OutputStream(callback=self.output_stream_callback, | |
| channels=CHANNELS, | |
| samplerate=SAMPLE_RATE, | |
| blocksize=2000, | |
| device=int(output_device)): | |
| asyncio.run(self.process_audio()) | |
| def stop(self): | |
| self.running = False | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser(description='Audio Client with Temperature Control') | |
| parser.add_argument('--token_temp', '-t1', type=float, help='Token (LM) temperature parameter') | |
| parser.add_argument('--categorical_temp', '-t2', type=float, help='Categorical (VAE) temperature parameter') | |
| parser.add_argument('--gaussian_temp', '-t3', type=float, help='Gaussian (VAE) temperature parameter') | |
| parser.add_argument('--server', '-s', default="ws://localhost:8000", | |
| help='Server URL (default: ws://localhost:8000)') | |
| args = parser.parse_args() | |
| # Audio settings | |
| SAMPLE_RATE = 16000 | |
| CHANNELS = 1 | |
| client = AudioClient( | |
| server_url=args.server, | |
| token_temp=args.token_temp, | |
| categorical_temp=args.categorical_temp, | |
| gaussian_temp=args.gaussian_temp | |
| ) | |
| try: | |
| client.start() | |
| except KeyboardInterrupt: | |
| client.stop() |