feat: configurable audio quality
This commit is contained in:
parent
1e5654e591
commit
17520f90f9
4 changed files with 10 additions and 3 deletions
|
|
@ -1,6 +1,6 @@
|
|||
[project]
|
||||
name = "speechd"
|
||||
version = "1.2.5"
|
||||
version = "1.2.6"
|
||||
description = "Speech-to-Text daemon with Groq Whisper API"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ model = "whisper-large-v3-turbo"
|
|||
language = "ru"
|
||||
sample_rate = 16000
|
||||
timeout = 300
|
||||
audio_quality = 0.8
|
||||
"""
|
||||
|
||||
|
||||
|
|
@ -18,6 +19,7 @@ class Config:
|
|||
sample_rate: int
|
||||
timeout_seconds: int
|
||||
runtime_dir: str
|
||||
audio_quality: float
|
||||
|
||||
@classmethod
|
||||
def load(cls) -> "Config":
|
||||
|
|
@ -47,4 +49,5 @@ class Config:
|
|||
sample_rate=data.get("sample_rate", 16000),
|
||||
timeout_seconds=data.get("timeout", 300),
|
||||
runtime_dir=os.environ.get("XDG_RUNTIME_DIR", "/tmp"),
|
||||
audio_quality=data.get("audio_quality", 0.8),
|
||||
)
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ class SpeechDaemon:
|
|||
model=config.model,
|
||||
language=config.language,
|
||||
sample_rate=config.sample_rate,
|
||||
audio_quality=config.audio_quality,
|
||||
)
|
||||
self.recording = False
|
||||
self.frames: list[np.ndarray] = []
|
||||
|
|
|
|||
|
|
@ -17,11 +17,14 @@ class TranscriptionResult:
|
|||
|
||||
|
||||
class Transcriber:
|
||||
def __init__(self, api_key: str, model: str, language: str, sample_rate: int):
|
||||
def __init__(
|
||||
self, api_key: str, model: str, language: str, sample_rate: int, audio_quality: float
|
||||
):
|
||||
self.client = Groq(api_key=api_key)
|
||||
self.model = model
|
||||
self.language = language
|
||||
self.sample_rate = sample_rate
|
||||
self.audio_quality = audio_quality
|
||||
|
||||
def transcribe(self, audio_data: np.ndarray) -> TranscriptionResult:
|
||||
if len(audio_data) == 0:
|
||||
|
|
@ -53,6 +56,6 @@ class Transcriber:
|
|||
self.sample_rate,
|
||||
format="OGG",
|
||||
subtype="OPUS",
|
||||
compression_level=0.8,
|
||||
compression_level=self.audio_quality,
|
||||
)
|
||||
return buf.getvalue()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue