From 17520f90f9f841c492e4c6e555cb625b9de285fd Mon Sep 17 00:00:00 2001 From: mrsobakin <68982655+mrsobakin@users.noreply.github.com> Date: Tue, 24 Feb 2026 13:21:28 +0300 Subject: [PATCH] feat: configurable audio quality --- pyproject.toml | 2 +- src/speechd/config.py | 3 +++ src/speechd/daemon.py | 1 + src/speechd/transcribe.py | 7 +++++-- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 56d6690..78c993f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "speechd" -version = "1.2.5" +version = "1.2.6" description = "Speech-to-Text daemon with Groq Whisper API" readme = "README.md" requires-python = ">=3.11" diff --git a/src/speechd/config.py b/src/speechd/config.py index 513340e..b63abe0 100644 --- a/src/speechd/config.py +++ b/src/speechd/config.py @@ -7,6 +7,7 @@ model = "whisper-large-v3-turbo" language = "ru" sample_rate = 16000 timeout = 300 +audio_quality = 0.8 """ @@ -18,6 +19,7 @@ class Config: sample_rate: int timeout_seconds: int runtime_dir: str + audio_quality: float @classmethod def load(cls) -> "Config": @@ -47,4 +49,5 @@ class Config: sample_rate=data.get("sample_rate", 16000), timeout_seconds=data.get("timeout", 300), runtime_dir=os.environ.get("XDG_RUNTIME_DIR", "/tmp"), + audio_quality=data.get("audio_quality", 0.8), ) diff --git a/src/speechd/daemon.py b/src/speechd/daemon.py index 97852ca..53670b0 100644 --- a/src/speechd/daemon.py +++ b/src/speechd/daemon.py @@ -28,6 +28,7 @@ class SpeechDaemon: model=config.model, language=config.language, sample_rate=config.sample_rate, + audio_quality=config.audio_quality, ) self.recording = False self.frames: list[np.ndarray] = [] diff --git a/src/speechd/transcribe.py b/src/speechd/transcribe.py index a892570..a5ab261 100644 --- a/src/speechd/transcribe.py +++ b/src/speechd/transcribe.py @@ -17,11 +17,14 @@ class TranscriptionResult: class Transcriber: - def __init__(self, api_key: str, model: str, language: str, sample_rate: int): + def __init__( + self, api_key: str, model: str, language: str, sample_rate: int, audio_quality: float + ): self.client = Groq(api_key=api_key) self.model = model self.language = language self.sample_rate = sample_rate + self.audio_quality = audio_quality def transcribe(self, audio_data: np.ndarray) -> TranscriptionResult: if len(audio_data) == 0: @@ -53,6 +56,6 @@ class Transcriber: self.sample_rate, format="OGG", subtype="OPUS", - compression_level=0.8, + compression_level=self.audio_quality, ) return buf.getvalue()