Initial commit

This commit is contained in:
mrsobakin 2023-09-08 14:40:37 +03:00
commit ee3c2358ff
12 changed files with 622 additions and 0 deletions

160
.gitignore vendored Normal file
View file

@ -0,0 +1,160 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

55
lyrics_dl/__init__.py Normal file
View file

@ -0,0 +1,55 @@
from typing import Optional
import traceback
# Initialize classes from lyrics_dl/providers
import lyrics_dl.providers
from lyrics_dl.core import Song
from lyrics_dl.registry import Registry
from lyrics_dl.config import LyricsDlConfig
from lyrics_dl.logger import DefaultLogger, AbstractLogger
class LyricsDl:
logger: AbstractLogger
def __init__(self, config: LyricsDlConfig = LyricsDlConfig(), logger: AbstractLogger = DefaultLogger()):
self.logger = logger
providers_classes = Registry.get_synced_providers()
self.providers = []
for name in config.order:
Provider = providers_classes[name]
provider_config = config.providers_configs.get(name)
if not provider_config:
provider_config = {}
try:
provider = Provider(**provider_config)
except TypeError as e:
self.logger.error(f"[lyrics-dl] {e}")
continue
self.providers.append(provider)
def fetch_lyrics(self, song: Song) -> Optional[str]:
self.logger.info(f"[lyrics-dl] Fetching lyrics for \"{song.artist} - {song.title}\"")
for provider in self.providers:
self.logger.info(f"[{provider.name}] Fetching lyrics...")
try:
lyrics = provider.fetch_lyrics(song)
except Exception as e:
lyrics = None
self.logger.error(f"[{provider.name}] Got exception while fetching lyrics! ({type(e).__name__}: {e})")
self.logger.debug(f"[{provider.name}] {traceback.format_exc()}")
if lyrics:
self.logger.info(f"[{provider.name}] Found lyrics!")
return lyrics
self.logger.info(f"[{provider.name}] No lyrics was found!")
return None

62
lyrics_dl/__main__.py Normal file
View file

@ -0,0 +1,62 @@
import argparse
from pathlib import Path
from lyrics_dl.core import Song
from lyrics_dl.config import LyricsDlConfig
from lyrics_dl.logger import DefaultLogger
from lyrics_dl import LyricsDl
logger = DefaultLogger()
config = LyricsDlConfig()
lyrics_dl = LyricsDl(config=config, logger=logger)
def process_file(path, force=False):
lyrics_path = path.with_suffix(".lrc")
if lyrics_path.exists() and not force:
logger.error("[lyrics-dl] Lyrics file already exists!")
return
# TODO handle errors
try:
song = Song.from_file(path)
except Exception as e:
logger.error(f"[lyrics-dl] {path}: {e}")
return
lyrics = lyrics_dl.fetch_lyrics(song)
if not lyrics:
logger.error("[lyrics-dl] No lyrics was found!")
return
with open(lyrics_path, "w") as f:
f.write(lyrics)
def process_directory(path, extensions):
for file_path in path.rglob("*"):
if file_path.suffix[1:] not in extensions:
continue
process_file(file_path)
parser = argparse.ArgumentParser()
parser.add_argument("path", type=Path, help="Path to the song file or directory")
parser.add_argument("-e", "--extensions", type=str, help="Music files extensions, separated by a comma. For example: wav,flac,mp3")
parser.add_argument("-f", "--force-override", action="store_true", help="Force override .lrc file, if it already exists")
args = parser.parse_args()
if args.path.is_dir():
if not args.extensions:
extensions = ["flac", "alac", "mp3", "m4a", "mp4", "aac", "wav", "opus", "ogg"]
else:
extensions = args.extensions.split(",")
process_directory(args.path, extensions)
else:
process_file(args.path)

20
lyrics_dl/config.py Normal file
View file

@ -0,0 +1,20 @@
from typing import Self
import tomllib
from dataclasses import dataclass, field
from pathlib import Path
@dataclass
class LyricsDlConfig:
order: list[str] = field(default_factory=lambda: ["kugou", "youtube"])
providers_configs: dict[str, dict] = field(default_factory=lambda: {})
@classmethod
def from_file(cls, path: Path) -> Self:
with open(path, "rb") as f:
config = tomllib.load(f)
return cls(
order=config["providers"].pop("order"),
providers_configs=config["providers"],
)

38
lyrics_dl/core.py Normal file
View file

@ -0,0 +1,38 @@
from dataclasses import dataclass
from abc import ABC, abstractmethod
from typing import Optional, Self
from pathlib import Path
import mutagen
@dataclass
class Song:
title: str
artist: str
album: Optional[str] = None
duration: Optional[int] = None
@classmethod
def from_file(cls, path: Path) -> Self:
metadata = mutagen.File(path)
if "title" not in metadata or "artist" not in metadata:
raise RuntimeError("Song is missing title or artist name")
title = ", ".join(metadata.get("title"))
artist = ", ".join(metadata.get("artist"))
album = metadata.get("album")
if album:
album = ", ".join(album)
duration = metadata.info.length
return cls(title=title, artist=artist, album=album, duration=duration)
class AbstractProvider(ABC):
name: str = ""
@abstractmethod
def fetch_lyrics(self, song: Song) -> Optional[str]:
pass

55
lyrics_dl/logger.py Normal file
View file

@ -0,0 +1,55 @@
from typing import Self
from abc import ABC, abstractmethod
import logging
class AbstractLogger(ABC):
@abstractmethod
def debug(self, message: str) -> None:
pass
@abstractmethod
def info(self, message: str) -> None:
pass
@abstractmethod
def warning(self, message: str) -> None:
pass
@abstractmethod
def error(self, message: str) -> None:
pass
class DefaultLogger(AbstractLogger):
__instance = None
__initialized = False
def __new__(cls) -> Self:
if cls.__instance is None:
cls.__instance = AbstractLogger.__new__(cls)
return cls.__instance
def __init__(self) -> None:
if self.__initialized:
return
self.__initialized = True
self.logger = logging.getLogger("lyrics-dl")
self.logger.setLevel(logging.DEBUG)
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.DEBUG)
self.logger.addHandler(console_handler)
def debug(self, message: str) -> None:
self.logger.debug(message)
def info(self, message: str) -> None:
self.logger.info(message)
def warning(self, message: str) -> None:
self.logger.warning(message)
def error(self, message: str) -> None:
self.logger.error(message)

View file

@ -0,0 +1,3 @@
from lyrics_dl.providers import musixmatch
from lyrics_dl.providers import kugou
from lyrics_dl.providers import youtube

View file

@ -0,0 +1,80 @@
from typing import Optional, Iterable
from base64 import b64decode
import zlib
import re
from itertools import filterfalse, islice
from datetime import datetime
import httpx
from lyrics_dl.core import Song, AbstractProvider
from lyrics_dl.registry import lyrics_provider
KRC_ENCODE_KEY = [64, 71, 97, 119, 94, 50, 116, 71, 81, 54, 49, 45, 206, 210, 110, 105]
RE_KRC_JUNK = re.compile(r"^\[((id|ar|ti|by|hash|al|sign|qq|total|language):|offset:0\]|.*\]<.*>?(Written by|Lyrics by|Composed by|Producer|作曲 :|作词 :)).*$")
RE_WORD_TIMING = re.compile(r"<\d+,\d+,\d+>")
def decode_krc(content: bytes) -> str:
content = b64decode(content)
buf = bytearray(len(content) - 4)
for i in range(4, len(content)):
buf[i - 4] = content[i] ^ KRC_ENCODE_KEY[(i - 4) % 16]
return zlib.decompress(buf).decode('utf-8-sig')
def reformat_timings(lines: Iterable[str]) -> Iterable[str]:
for line in lines:
if not line.startswith("["):
yield line
continue
line = RE_WORD_TIMING.sub("", line)
raw_timings, text = line.split("]", 1)
beginning, _ = map(int, raw_timings[1:].split(","))
timing = datetime.fromtimestamp(beginning / 1000).strftime("%M:%S.%f")[:8]
yield f"[{timing}]{text}"
@lyrics_provider
class Kugou(AbstractProvider):
name = "kugou"
def fetch_lyrics(self: AbstractProvider, song: Song) -> Optional[str]:
keyword = f"{song.artist} - {song.title}"
response = httpx.get("https://krcs.kugou.com/search", params={
"ver": 1,
"man": "yes",
"client": "mobi",
"keyword": keyword
}).json()
candidates = response["candidates"]
if not candidates:
return None
id_, accesskey = candidates[0]["id"], candidates[0]["accesskey"]
r = httpx.get("https://krcs.kugou.com/download", params={
"ver": 1,
"man": "yes",
"client": "mobi",
"format": "lrc",
"id": id_,
"accesskey": accesskey
}).json()
krc = decode_krc(r["content"])
lines = reformat_timings(islice(filterfalse(RE_KRC_JUNK.match, krc.splitlines()), 1, None))
return "\n".join(lines)

View file

@ -0,0 +1,36 @@
from typing import Optional
import httpx
from lyrics_dl.core import Song, AbstractProvider
from lyrics_dl.registry import lyrics_provider
@lyrics_provider
class Musixmatch(AbstractProvider):
name = "musixmatch"
def __init__(self, token: str) -> None:
self.token = token
def fetch_lyrics(self, song: Song) -> Optional[str]:
response = httpx.get("https://apic-desktop.musixmatch.com/ws/1.1/macro.subtitles.get", params={
"format": "json",
"namespace": "lyrics_synched",
"part": "lyrics_crowd,user,lyrics_verified_by",
"user_language": "en",
"f_subtitle_length_max_deviation": 1,
"subtitle_format": "lrc",
"app_id": "web-desktop-app-v1.0",
"usertoken": self.token,
"q_artist": song.artist,
"q_track": song.title,
"q_album": song.album,
}, follow_redirects=True).json()
response = response["message"]["body"]["macro_calls"]["track.subtitles.get"]["message"]["body"]
if not response:
return None
return response["subtitle_list"][0]["subtitle"]["subtitle_body"]

View file

@ -0,0 +1,79 @@
from typing import Optional, Dict
from contextlib import redirect_stdout
import subprocess
import io
import urllib
import unittest.mock
from yt_dlp import YoutubeDL
from lyrics_dl.core import Song, AbstractProvider
from lyrics_dl.registry import lyrics_provider
from lyrics_dl import utils
@lyrics_provider
class Youtube(AbstractProvider):
name = "youtube"
def _craft_search_link(self, song: Song) -> str:
query = f"{song.artist} - {song.title}"
query = urllib.parse.quote(query)
# sp=... means search only videos with subtitles
url = f"https://www.youtube.com/results?search_query={query}&sp=EgIoAQ%253D%253D"
return url
def _download_subtitles(self, video_id: str) -> str:
# buffer = io.BytesIO()
buffer = io.StringIO()
# A dirty monkey patch; youtube-dl does not
# support "-" filename for subtitles, so we
# just force it to use it here.
with unittest.mock.patch("yt_dlp.YoutubeDL.subtitles_filename", new=lambda *_: "-"):
with redirect_stdout(buffer):
with YoutubeDL({"writesubtitles": True, "skip_download": True, "subtitlesformat": "srt/vtt/best", 'logtostderr': True}) as ydl:
ydl.download(video_id)
return buffer.getvalue()
def _subtitles_to_lyrics(self, subtitles: str) -> str:
# "-fflags +bitexact" prevents ffmpeg from
# writing metadata to .lrc file
# TODO: use `with` statement
process = subprocess.Popen(["ffmpeg", "-loglevel", "quiet", "-i", "-", "-f", "lrc", "-fflags", "+bitexact", "-"], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
if not process.stdin or not process.stdout:
return ""
process.stdin.write(subtitles.encode())
process.stdin.close()
process.wait()
return process.stdout.read().decode()[1:]
def fetch_lyrics(self, song: Song) -> Optional[str]:
search_link = self._craft_search_link(song)
with YoutubeDL({"extract_flat": True, "playlistend": 10}) as ydl:
videos = ydl.extract_info(search_link)["entries"]
if song.duration:
def match_duration(video: Dict) -> bool:
return utils.threshold_equal(video["duration"], song.duration, 2)
videos = filter(match_duration, videos)
def match_title(video: Dict) -> bool:
return True
videos = filter(match_title, videos)
video = utils.next_or_none(videos)
if not video:
return None
subtitles = self._download_subtitles(video["id"])
lyrics = self._subtitles_to_lyrics(subtitles)
if lyrics != "":
return lyrics
return None

20
lyrics_dl/registry.py Normal file
View file

@ -0,0 +1,20 @@
from lyrics_dl.core import AbstractProvider
class Registry:
providers: dict[str, type[AbstractProvider]] = {}
@staticmethod
def get_synced_providers() -> dict[str, type[AbstractProvider]]:
# TODO: stub
return dict(Registry.providers)
@staticmethod
def register_provider(provider_class: type[AbstractProvider]) -> None:
Registry.providers[provider_class.name] = provider_class
def lyrics_provider(cls: type[AbstractProvider]) -> type[AbstractProvider]:
Registry.register_provider(cls)
return cls

14
lyrics_dl/utils.py Normal file
View file

@ -0,0 +1,14 @@
from typing import Iterator, Optional, TypeVar
def threshold_equal(a: float, b: float, epsilon: float) -> bool:
return abs(a - b) <= epsilon
T = TypeVar('T')
def next_or_none(iterator: Iterator[T]) -> Optional[T]:
try:
return next(iterator)
except StopIteration:
return None