feat: configurable audio quality
This commit is contained in:
parent
1e5654e591
commit
17520f90f9
4 changed files with 10 additions and 3 deletions
|
|
@ -1,6 +1,6 @@
|
||||||
[project]
|
[project]
|
||||||
name = "speechd"
|
name = "speechd"
|
||||||
version = "1.2.5"
|
version = "1.2.6"
|
||||||
description = "Speech-to-Text daemon with Groq Whisper API"
|
description = "Speech-to-Text daemon with Groq Whisper API"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.11"
|
requires-python = ">=3.11"
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ model = "whisper-large-v3-turbo"
|
||||||
language = "ru"
|
language = "ru"
|
||||||
sample_rate = 16000
|
sample_rate = 16000
|
||||||
timeout = 300
|
timeout = 300
|
||||||
|
audio_quality = 0.8
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -18,6 +19,7 @@ class Config:
|
||||||
sample_rate: int
|
sample_rate: int
|
||||||
timeout_seconds: int
|
timeout_seconds: int
|
||||||
runtime_dir: str
|
runtime_dir: str
|
||||||
|
audio_quality: float
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def load(cls) -> "Config":
|
def load(cls) -> "Config":
|
||||||
|
|
@ -47,4 +49,5 @@ class Config:
|
||||||
sample_rate=data.get("sample_rate", 16000),
|
sample_rate=data.get("sample_rate", 16000),
|
||||||
timeout_seconds=data.get("timeout", 300),
|
timeout_seconds=data.get("timeout", 300),
|
||||||
runtime_dir=os.environ.get("XDG_RUNTIME_DIR", "/tmp"),
|
runtime_dir=os.environ.get("XDG_RUNTIME_DIR", "/tmp"),
|
||||||
|
audio_quality=data.get("audio_quality", 0.8),
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,7 @@ class SpeechDaemon:
|
||||||
model=config.model,
|
model=config.model,
|
||||||
language=config.language,
|
language=config.language,
|
||||||
sample_rate=config.sample_rate,
|
sample_rate=config.sample_rate,
|
||||||
|
audio_quality=config.audio_quality,
|
||||||
)
|
)
|
||||||
self.recording = False
|
self.recording = False
|
||||||
self.frames: list[np.ndarray] = []
|
self.frames: list[np.ndarray] = []
|
||||||
|
|
|
||||||
|
|
@ -17,11 +17,14 @@ class TranscriptionResult:
|
||||||
|
|
||||||
|
|
||||||
class Transcriber:
|
class Transcriber:
|
||||||
def __init__(self, api_key: str, model: str, language: str, sample_rate: int):
|
def __init__(
|
||||||
|
self, api_key: str, model: str, language: str, sample_rate: int, audio_quality: float
|
||||||
|
):
|
||||||
self.client = Groq(api_key=api_key)
|
self.client = Groq(api_key=api_key)
|
||||||
self.model = model
|
self.model = model
|
||||||
self.language = language
|
self.language = language
|
||||||
self.sample_rate = sample_rate
|
self.sample_rate = sample_rate
|
||||||
|
self.audio_quality = audio_quality
|
||||||
|
|
||||||
def transcribe(self, audio_data: np.ndarray) -> TranscriptionResult:
|
def transcribe(self, audio_data: np.ndarray) -> TranscriptionResult:
|
||||||
if len(audio_data) == 0:
|
if len(audio_data) == 0:
|
||||||
|
|
@ -53,6 +56,6 @@ class Transcriber:
|
||||||
self.sample_rate,
|
self.sample_rate,
|
||||||
format="OGG",
|
format="OGG",
|
||||||
subtype="OPUS",
|
subtype="OPUS",
|
||||||
compression_level=0.8,
|
compression_level=self.audio_quality,
|
||||||
)
|
)
|
||||||
return buf.getvalue()
|
return buf.getvalue()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue