feat: audio normalization
This commit is contained in:
parent
17520f90f9
commit
33e6f229d3
2 changed files with 9 additions and 1 deletions
|
|
@ -1,6 +1,6 @@
|
||||||
[project]
|
[project]
|
||||||
name = "speechd"
|
name = "speechd"
|
||||||
version = "1.2.6"
|
version = "1.2.7"
|
||||||
description = "Speech-to-Text daemon with Groq Whisper API"
|
description = "Speech-to-Text daemon with Groq Whisper API"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.11"
|
requires-python = ">=3.11"
|
||||||
|
|
|
||||||
|
|
@ -47,8 +47,16 @@ class Transcriber:
|
||||||
logger.error(f"Transcription failed: {e}")
|
logger.error(f"Transcription failed: {e}")
|
||||||
return TranscriptionResult(text="", success=False, error=str(e))
|
return TranscriptionResult(text="", success=False, error=str(e))
|
||||||
|
|
||||||
|
def _normalize_rms(self, audio: np.ndarray, target_rms: float = 0.1) -> np.ndarray:
|
||||||
|
rms = np.sqrt(np.mean(audio**2))
|
||||||
|
if rms > 1e-8:
|
||||||
|
audio = audio * (target_rms / rms)
|
||||||
|
np.clip(audio, -1.0, 1.0, out=audio)
|
||||||
|
return audio
|
||||||
|
|
||||||
def _encode_opus(self, audio_data: np.ndarray) -> bytes:
|
def _encode_opus(self, audio_data: np.ndarray) -> bytes:
|
||||||
audio_float = audio_data.astype(np.float32) / 32768.0
|
audio_float = audio_data.astype(np.float32) / 32768.0
|
||||||
|
audio_float = self._normalize_rms(audio_float)
|
||||||
buf = io.BytesIO()
|
buf = io.BytesIO()
|
||||||
sf.write(
|
sf.write(
|
||||||
buf,
|
buf,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue