feat: configurable audio quality

2026-02-24 13:21:28 +03:00 · 2026-02-24 13:21:28 +03:00 · 17520f90f9
commit 17520f90f9
parent 1e5654e591
4 changed files with 10 additions and 3 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [project]
 name = "speechd"
-version = "1.2.5"
+version = "1.2.6"
 description = "Speech-to-Text daemon with Groq Whisper API"
 readme = "README.md"
 requires-python = ">=3.11"
--- a/src/speechd/config.py
+++ b/src/speechd/config.py
@ -7,6 +7,7 @@ model = "whisper-large-v3-turbo"
 language = "ru"
 sample_rate = 16000
 timeout = 300
+audio_quality = 0.8
 """


@ -18,6 +19,7 @@ class Config:
    sample_rate: int
    timeout_seconds: int
    runtime_dir: str
+    audio_quality: float

    @classmethod
    def load(cls) -> "Config":
@ -47,4 +49,5 @@ class Config:
            sample_rate=data.get("sample_rate", 16000),
            timeout_seconds=data.get("timeout", 300),
            runtime_dir=os.environ.get("XDG_RUNTIME_DIR", "/tmp"),
+            audio_quality=data.get("audio_quality", 0.8),
        )
--- a/src/speechd/daemon.py
+++ b/src/speechd/daemon.py
@ -28,6 +28,7 @@ class SpeechDaemon:
            model=config.model,
            language=config.language,
            sample_rate=config.sample_rate,
+            audio_quality=config.audio_quality,
        )
        self.recording = False
        self.frames: list[np.ndarray] = []
--- a/src/speechd/transcribe.py
+++ b/src/speechd/transcribe.py
@ -17,11 +17,14 @@ class TranscriptionResult:


 class Transcriber:
-    def __init__(self, api_key: str, model: str, language: str, sample_rate: int):
+    def __init__(
+        self, api_key: str, model: str, language: str, sample_rate: int, audio_quality: float
+    ):
        self.client = Groq(api_key=api_key)
        self.model = model
        self.language = language
        self.sample_rate = sample_rate
+        self.audio_quality = audio_quality

    def transcribe(self, audio_data: np.ndarray) -> TranscriptionResult:
        if len(audio_data) == 0:
@ -53,6 +56,6 @@ class Transcriber:
            self.sample_rate,
            format="OGG",
            subtype="OPUS",
-            compression_level=0.8,
+            compression_level=self.audio_quality,
        )
        return buf.getvalue()