diff --git a/pyproject.toml b/pyproject.toml index 78c993f..9ac9ea0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "speechd" -version = "1.2.6" +version = "1.2.7" description = "Speech-to-Text daemon with Groq Whisper API" readme = "README.md" requires-python = ">=3.11" diff --git a/src/speechd/transcribe.py b/src/speechd/transcribe.py index a5ab261..d3d6eb0 100644 --- a/src/speechd/transcribe.py +++ b/src/speechd/transcribe.py @@ -47,8 +47,16 @@ class Transcriber: logger.error(f"Transcription failed: {e}") return TranscriptionResult(text="", success=False, error=str(e)) + def _normalize_rms(self, audio: np.ndarray, target_rms: float = 0.1) -> np.ndarray: + rms = np.sqrt(np.mean(audio**2)) + if rms > 1e-8: + audio = audio * (target_rms / rms) + np.clip(audio, -1.0, 1.0, out=audio) + return audio + def _encode_opus(self, audio_data: np.ndarray) -> bytes: audio_float = audio_data.astype(np.float32) / 32768.0 + audio_float = self._normalize_rms(audio_float) buf = io.BytesIO() sf.write( buf,