1
0
Fork 0

feat: configurable audio quality

This commit is contained in:
mrsobakin 2026-02-24 13:21:28 +03:00
parent 1e5654e591
commit 17520f90f9
No known key found for this signature in database
GPG key ID: 325CBF665E4FFD6E
4 changed files with 10 additions and 3 deletions

View file

@ -1,6 +1,6 @@
[project]
name = "speechd"
version = "1.2.5"
version = "1.2.6"
description = "Speech-to-Text daemon with Groq Whisper API"
readme = "README.md"
requires-python = ">=3.11"

View file

@ -7,6 +7,7 @@ model = "whisper-large-v3-turbo"
language = "ru"
sample_rate = 16000
timeout = 300
audio_quality = 0.8
"""
@ -18,6 +19,7 @@ class Config:
sample_rate: int
timeout_seconds: int
runtime_dir: str
audio_quality: float
@classmethod
def load(cls) -> "Config":
@ -47,4 +49,5 @@ class Config:
sample_rate=data.get("sample_rate", 16000),
timeout_seconds=data.get("timeout", 300),
runtime_dir=os.environ.get("XDG_RUNTIME_DIR", "/tmp"),
audio_quality=data.get("audio_quality", 0.8),
)

View file

@ -28,6 +28,7 @@ class SpeechDaemon:
model=config.model,
language=config.language,
sample_rate=config.sample_rate,
audio_quality=config.audio_quality,
)
self.recording = False
self.frames: list[np.ndarray] = []

View file

@ -17,11 +17,14 @@ class TranscriptionResult:
class Transcriber:
def __init__(self, api_key: str, model: str, language: str, sample_rate: int):
def __init__(
self, api_key: str, model: str, language: str, sample_rate: int, audio_quality: float
):
self.client = Groq(api_key=api_key)
self.model = model
self.language = language
self.sample_rate = sample_rate
self.audio_quality = audio_quality
def transcribe(self, audio_data: np.ndarray) -> TranscriptionResult:
if len(audio_data) == 0:
@ -53,6 +56,6 @@ class Transcriber:
self.sample_rate,
format="OGG",
subtype="OPUS",
compression_level=0.8,
compression_level=self.audio_quality,
)
return buf.getvalue()