feat: audio normalization

2026-02-24 16:49:04 +03:00 · 2026-02-24 16:49:04 +03:00 · 33e6f229d3
commit 33e6f229d3
parent 17520f90f9
2 changed files with 9 additions and 1 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [project]
 name = "speechd"
-version = "1.2.6"
+version = "1.2.7"
 description = "Speech-to-Text daemon with Groq Whisper API"
 readme = "README.md"
 requires-python = ">=3.11"
--- a/src/speechd/transcribe.py
+++ b/src/speechd/transcribe.py
@ -47,8 +47,16 @@ class Transcriber:
            logger.error(f"Transcription failed: {e}")
            return TranscriptionResult(text="", success=False, error=str(e))

+    def _normalize_rms(self, audio: np.ndarray, target_rms: float = 0.1) -> np.ndarray:
+        rms = np.sqrt(np.mean(audio**2))
+        if rms > 1e-8:
+            audio = audio * (target_rms / rms)
+            np.clip(audio, -1.0, 1.0, out=audio)
+        return audio
+
    def _encode_opus(self, audio_data: np.ndarray) -> bytes:
        audio_float = audio_data.astype(np.float32) / 32768.0
+        audio_float = self._normalize_rms(audio_float)
        buf = io.BytesIO()
        sf.write(
            buf,