1
0
Fork 0

feat: audio normalization

This commit is contained in:
mrsobakin 2026-02-24 16:49:04 +03:00
parent 17520f90f9
commit 33e6f229d3
No known key found for this signature in database
GPG key ID: 325CBF665E4FFD6E
2 changed files with 9 additions and 1 deletions

View file

@ -1,6 +1,6 @@
[project]
name = "speechd"
version = "1.2.6"
version = "1.2.7"
description = "Speech-to-Text daemon with Groq Whisper API"
readme = "README.md"
requires-python = ">=3.11"

View file

@ -47,8 +47,16 @@ class Transcriber:
logger.error(f"Transcription failed: {e}")
return TranscriptionResult(text="", success=False, error=str(e))
def _normalize_rms(self, audio: np.ndarray, target_rms: float = 0.1) -> np.ndarray:
rms = np.sqrt(np.mean(audio**2))
if rms > 1e-8:
audio = audio * (target_rms / rms)
np.clip(audio, -1.0, 1.0, out=audio)
return audio
def _encode_opus(self, audio_data: np.ndarray) -> bytes:
audio_float = audio_data.astype(np.float32) / 32768.0
audio_float = self._normalize_rms(audio_float)
buf = io.BytesIO()
sf.write(
buf,