From 33e6f229d348779aab623f518148c30079ecbe36 Mon Sep 17 00:00:00 2001
From: mrsobakin <68982655+mrsobakin@users.noreply.github.com>
Date: Tue, 24 Feb 2026 16:49:04 +0300
Subject: [PATCH] feat: audio normalization

---
 pyproject.toml            | 2 +-
 src/speechd/transcribe.py | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 78c993f..9ac9ea0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "speechd"
-version = "1.2.6"
+version = "1.2.7"
 description = "Speech-to-Text daemon with Groq Whisper API"
 readme = "README.md"
 requires-python = ">=3.11"
diff --git a/src/speechd/transcribe.py b/src/speechd/transcribe.py
index a5ab261..d3d6eb0 100644
--- a/src/speechd/transcribe.py
+++ b/src/speechd/transcribe.py
@@ -47,8 +47,16 @@ class Transcriber:
             logger.error(f"Transcription failed: {e}")
             return TranscriptionResult(text="", success=False, error=str(e))
 
+    def _normalize_rms(self, audio: np.ndarray, target_rms: float = 0.1) -> np.ndarray:
+        rms = np.sqrt(np.mean(audio**2))
+        if rms > 1e-8:
+            audio = audio * (target_rms / rms)
+            np.clip(audio, -1.0, 1.0, out=audio)
+        return audio
+
     def _encode_opus(self, audio_data: np.ndarray) -> bytes:
         audio_float = audio_data.astype(np.float32) / 32768.0
+        audio_float = self._normalize_rms(audio_float)
         buf = io.BytesIO()
         sf.write(
             buf,