feat: audio normalization
This commit is contained in:
parent
17520f90f9
commit
33e6f229d3
2 changed files with 9 additions and 1 deletions
|
|
@ -1,6 +1,6 @@
|
|||
[project]
|
||||
name = "speechd"
|
||||
version = "1.2.6"
|
||||
version = "1.2.7"
|
||||
description = "Speech-to-Text daemon with Groq Whisper API"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
|
|
|
|||
|
|
@ -47,8 +47,16 @@ class Transcriber:
|
|||
logger.error(f"Transcription failed: {e}")
|
||||
return TranscriptionResult(text="", success=False, error=str(e))
|
||||
|
||||
def _normalize_rms(self, audio: np.ndarray, target_rms: float = 0.1) -> np.ndarray:
|
||||
rms = np.sqrt(np.mean(audio**2))
|
||||
if rms > 1e-8:
|
||||
audio = audio * (target_rms / rms)
|
||||
np.clip(audio, -1.0, 1.0, out=audio)
|
||||
return audio
|
||||
|
||||
def _encode_opus(self, audio_data: np.ndarray) -> bytes:
|
||||
audio_float = audio_data.astype(np.float32) / 32768.0
|
||||
audio_float = self._normalize_rms(audio_float)
|
||||
buf = io.BytesIO()
|
||||
sf.write(
|
||||
buf,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue