Spaces:

fedirz
/

faster-whisper-server-d424

Running on T4

faster-whisper-server-d424 / scripts /client.py

Fedir Zadniprovskyi

chore: misc changes

6eabeb2 3 months ago

2.54 kB

	import os
	from pathlib import Path
	import subprocess
	import threading
	import time

	import httpx
	import keyboard

	# NOTE: this is a very basic implementation. Not really meant for usage by others.
	# Included here in case someone wants to use it as a reference.

	# This script will run in the background and listen for a keybind to start recording audio.
	# It will then wait until the keybind is pressed again to stop recording.
	# The audio file will be sent to the server for transcription.
	# The transcription will be copied to the clipboard.
	# When having a short audio of a couple of sentences and running inference on a GPU the response time is very fast (less than 2 seconds). # noqa: E501
	# Run this with `sudo -E python scripts/client.py`

	CHUNK = 2**12
	AUDIO_RECORD_CMD = [
	"ffmpeg",
	"-hide_banner",
	# "-loglevel",
	# "quiet",
	"-f",
	"alsa",
	"-i",
	"default",
	"-f",
	"wav",
	]
	COPY_TO_CLIPBOARD_CMD = "wl-copy"
	OPENAI_BASE_URL = "ws://localhost:8000/v1"
	TRANSCRIBE_PATH = "/audio/transcriptions?language=en"
	USER = "nixos"
	TIMEOUT = httpx.Timeout(None)
	KEYBIND = "ctrl+x"
	REQUEST_KWARGS = {
	"language": "en",
	"response_format": "text",
	"vad_filter": True,
	}

	client = httpx.Client(base_url=OPENAI_BASE_URL, timeout=TIMEOUT)
	is_running = threading.Event()

	file = Path("test.wav") # HACK: I had a hard time trying to use a temporary file due to permissions issues


	while True:
	keyboard.wait(KEYBIND)
	print("Recording started")
	process = subprocess.Popen(
	[*AUDIO_RECORD_CMD, "-y", str(file.name)],
	stdout=subprocess.PIPE,
	stderr=subprocess.PIPE,
	user=USER,
	env=dict(os.environ),
	)
	keyboard.wait(KEYBIND)
	process.kill()
	stdout, stderr = process.communicate()
	if stdout or stderr:
	print(f"stdout: {stdout}")
	print(f"stderr: {stderr}")
	print(f"Recording finished. File size: {file.stat().st_size} bytes")

	try:
	with file.open("rb") as fd:
	start = time.perf_counter()
	res = client.post(
	OPENAI_BASE_URL + TRANSCRIBE_PATH,
	files={"file": fd},
	data=REQUEST_KWARGS,
	)
	end = time.perf_counter()
	print(f"Transcription took {end - start} seconds")
	transcription = res.text
	print(transcription)
	subprocess.run([COPY_TO_CLIPBOARD_CMD], input=transcription.encode(), check=True)
	except httpx.ConnectError as e:
	print(f"Couldn't connect to server: {e}")