commit ee3c2358ff0aa7ea104eca3816b5b57e8b5ea588 Author: mrsobakin <68982655+mrsobakin@users.noreply.github.com> Date: Fri Sep 8 14:40:37 2023 +0300 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..68bc17f --- /dev/null +++ b/.gitignore @@ -0,0 +1,160 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ diff --git a/lyrics_dl/__init__.py b/lyrics_dl/__init__.py new file mode 100644 index 0000000..2c6eaae --- /dev/null +++ b/lyrics_dl/__init__.py @@ -0,0 +1,55 @@ +from typing import Optional +import traceback + +# Initialize classes from lyrics_dl/providers +import lyrics_dl.providers +from lyrics_dl.core import Song +from lyrics_dl.registry import Registry +from lyrics_dl.config import LyricsDlConfig +from lyrics_dl.logger import DefaultLogger, AbstractLogger + + +class LyricsDl: + logger: AbstractLogger + + def __init__(self, config: LyricsDlConfig = LyricsDlConfig(), logger: AbstractLogger = DefaultLogger()): + self.logger = logger + + providers_classes = Registry.get_synced_providers() + + self.providers = [] + + for name in config.order: + Provider = providers_classes[name] + provider_config = config.providers_configs.get(name) + + if not provider_config: + provider_config = {} + + try: + provider = Provider(**provider_config) + except TypeError as e: + self.logger.error(f"[lyrics-dl] {e}") + continue + + self.providers.append(provider) + + def fetch_lyrics(self, song: Song) -> Optional[str]: + self.logger.info(f"[lyrics-dl] Fetching lyrics for \"{song.artist} - {song.title}\"") + for provider in self.providers: + self.logger.info(f"[{provider.name}] Fetching lyrics...") + + try: + lyrics = provider.fetch_lyrics(song) + except Exception as e: + lyrics = None + self.logger.error(f"[{provider.name}] Got exception while fetching lyrics! ({type(e).__name__}: {e})") + self.logger.debug(f"[{provider.name}] {traceback.format_exc()}") + + if lyrics: + self.logger.info(f"[{provider.name}] Found lyrics!") + return lyrics + + self.logger.info(f"[{provider.name}] No lyrics was found!") + + return None diff --git a/lyrics_dl/__main__.py b/lyrics_dl/__main__.py new file mode 100644 index 0000000..9466adb --- /dev/null +++ b/lyrics_dl/__main__.py @@ -0,0 +1,62 @@ +import argparse +from pathlib import Path + +from lyrics_dl.core import Song +from lyrics_dl.config import LyricsDlConfig +from lyrics_dl.logger import DefaultLogger +from lyrics_dl import LyricsDl + + +logger = DefaultLogger() +config = LyricsDlConfig() +lyrics_dl = LyricsDl(config=config, logger=logger) + + +def process_file(path, force=False): + lyrics_path = path.with_suffix(".lrc") + + if lyrics_path.exists() and not force: + logger.error("[lyrics-dl] Lyrics file already exists!") + return + + # TODO handle errors + try: + song = Song.from_file(path) + except Exception as e: + logger.error(f"[lyrics-dl] {path}: {e}") + return + + lyrics = lyrics_dl.fetch_lyrics(song) + + if not lyrics: + logger.error("[lyrics-dl] No lyrics was found!") + return + + with open(lyrics_path, "w") as f: + f.write(lyrics) + + +def process_directory(path, extensions): + for file_path in path.rglob("*"): + if file_path.suffix[1:] not in extensions: + continue + + process_file(file_path) + + +parser = argparse.ArgumentParser() +parser.add_argument("path", type=Path, help="Path to the song file or directory") +parser.add_argument("-e", "--extensions", type=str, help="Music files extensions, separated by a comma. For example: wav,flac,mp3") +parser.add_argument("-f", "--force-override", action="store_true", help="Force override .lrc file, if it already exists") + +args = parser.parse_args() + +if args.path.is_dir(): + if not args.extensions: + extensions = ["flac", "alac", "mp3", "m4a", "mp4", "aac", "wav", "opus", "ogg"] + else: + extensions = args.extensions.split(",") + + process_directory(args.path, extensions) +else: + process_file(args.path) diff --git a/lyrics_dl/config.py b/lyrics_dl/config.py new file mode 100644 index 0000000..e25a012 --- /dev/null +++ b/lyrics_dl/config.py @@ -0,0 +1,20 @@ +from typing import Self +import tomllib +from dataclasses import dataclass, field +from pathlib import Path + + +@dataclass +class LyricsDlConfig: + order: list[str] = field(default_factory=lambda: ["kugou", "youtube"]) + providers_configs: dict[str, dict] = field(default_factory=lambda: {}) + + @classmethod + def from_file(cls, path: Path) -> Self: + with open(path, "rb") as f: + config = tomllib.load(f) + + return cls( + order=config["providers"].pop("order"), + providers_configs=config["providers"], + ) diff --git a/lyrics_dl/core.py b/lyrics_dl/core.py new file mode 100644 index 0000000..f4e26eb --- /dev/null +++ b/lyrics_dl/core.py @@ -0,0 +1,38 @@ +from dataclasses import dataclass +from abc import ABC, abstractmethod +from typing import Optional, Self +from pathlib import Path +import mutagen + + +@dataclass +class Song: + title: str + artist: str + album: Optional[str] = None + duration: Optional[int] = None + + @classmethod + def from_file(cls, path: Path) -> Self: + metadata = mutagen.File(path) + + if "title" not in metadata or "artist" not in metadata: + raise RuntimeError("Song is missing title or artist name") + + title = ", ".join(metadata.get("title")) + artist = ", ".join(metadata.get("artist")) + + album = metadata.get("album") + if album: + album = ", ".join(album) + + duration = metadata.info.length + + return cls(title=title, artist=artist, album=album, duration=duration) + + +class AbstractProvider(ABC): + name: str = "" + @abstractmethod + def fetch_lyrics(self, song: Song) -> Optional[str]: + pass diff --git a/lyrics_dl/logger.py b/lyrics_dl/logger.py new file mode 100644 index 0000000..1d1bc3d --- /dev/null +++ b/lyrics_dl/logger.py @@ -0,0 +1,55 @@ +from typing import Self +from abc import ABC, abstractmethod +import logging + + +class AbstractLogger(ABC): + @abstractmethod + def debug(self, message: str) -> None: + pass + + @abstractmethod + def info(self, message: str) -> None: + pass + + @abstractmethod + def warning(self, message: str) -> None: + pass + + @abstractmethod + def error(self, message: str) -> None: + pass + + +class DefaultLogger(AbstractLogger): + __instance = None + __initialized = False + + def __new__(cls) -> Self: + if cls.__instance is None: + cls.__instance = AbstractLogger.__new__(cls) + return cls.__instance + + def __init__(self) -> None: + if self.__initialized: + return + self.__initialized = True + + self.logger = logging.getLogger("lyrics-dl") + self.logger.setLevel(logging.DEBUG) + + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.DEBUG) + self.logger.addHandler(console_handler) + + def debug(self, message: str) -> None: + self.logger.debug(message) + + def info(self, message: str) -> None: + self.logger.info(message) + + def warning(self, message: str) -> None: + self.logger.warning(message) + + def error(self, message: str) -> None: + self.logger.error(message) diff --git a/lyrics_dl/providers/__init__.py b/lyrics_dl/providers/__init__.py new file mode 100644 index 0000000..bc085a0 --- /dev/null +++ b/lyrics_dl/providers/__init__.py @@ -0,0 +1,3 @@ +from lyrics_dl.providers import musixmatch +from lyrics_dl.providers import kugou +from lyrics_dl.providers import youtube diff --git a/lyrics_dl/providers/kugou.py b/lyrics_dl/providers/kugou.py new file mode 100644 index 0000000..9d57bb9 --- /dev/null +++ b/lyrics_dl/providers/kugou.py @@ -0,0 +1,80 @@ +from typing import Optional, Iterable +from base64 import b64decode +import zlib +import re +from itertools import filterfalse, islice +from datetime import datetime + +import httpx + +from lyrics_dl.core import Song, AbstractProvider +from lyrics_dl.registry import lyrics_provider + + +KRC_ENCODE_KEY = [64, 71, 97, 119, 94, 50, 116, 71, 81, 54, 49, 45, 206, 210, 110, 105] + +RE_KRC_JUNK = re.compile(r"^\[((id|ar|ti|by|hash|al|sign|qq|total|language):|offset:0\]|.*\]<.*>?(Written by:|Lyrics by:|Composed by:|Producer:|作曲 :|作词 :)).*$") +RE_WORD_TIMING = re.compile(r"<\d+,\d+,\d+>") + + +def decode_krc(content: bytes) -> str: + content = b64decode(content) + + buf = bytearray(len(content) - 4) + for i in range(4, len(content)): + buf[i - 4] = content[i] ^ KRC_ENCODE_KEY[(i - 4) % 16] + + return zlib.decompress(buf).decode('utf-8-sig') + + +def reformat_timings(lines: Iterable[str]) -> Iterable[str]: + for line in lines: + if not line.startswith("["): + yield line + continue + + line = RE_WORD_TIMING.sub("", line) + + raw_timings, text = line.split("]", 1) + beginning, _ = map(int, raw_timings[1:].split(",")) + + timing = datetime.fromtimestamp(beginning / 1000).strftime("%M:%S.%f")[:8] + + yield f"[{timing}]{text}" + + +@lyrics_provider +class Kugou(AbstractProvider): + name = "kugou" + + def fetch_lyrics(self: AbstractProvider, song: Song) -> Optional[str]: + keyword = f"{song.artist} - {song.title}" + + response = httpx.get("https://krcs.kugou.com/search", params={ + "ver": 1, + "man": "yes", + "client": "mobi", + "keyword": keyword + }).json() + + candidates = response["candidates"] + + if not candidates: + return None + + id_, accesskey = candidates[0]["id"], candidates[0]["accesskey"] + + r = httpx.get("https://krcs.kugou.com/download", params={ + "ver": 1, + "man": "yes", + "client": "mobi", + "format": "lrc", + "id": id_, + "accesskey": accesskey + }).json() + + krc = decode_krc(r["content"]) + + lines = reformat_timings(islice(filterfalse(RE_KRC_JUNK.match, krc.splitlines()), 1, None)) + + return "\n".join(lines) diff --git a/lyrics_dl/providers/musixmatch.py b/lyrics_dl/providers/musixmatch.py new file mode 100644 index 0000000..6c4b13b --- /dev/null +++ b/lyrics_dl/providers/musixmatch.py @@ -0,0 +1,36 @@ +from typing import Optional +import httpx + +from lyrics_dl.core import Song, AbstractProvider +from lyrics_dl.registry import lyrics_provider + + +@lyrics_provider +class Musixmatch(AbstractProvider): + name = "musixmatch" + + def __init__(self, token: str) -> None: + self.token = token + + def fetch_lyrics(self, song: Song) -> Optional[str]: + response = httpx.get("https://apic-desktop.musixmatch.com/ws/1.1/macro.subtitles.get", params={ + "format": "json", + "namespace": "lyrics_synched", + "part": "lyrics_crowd,user,lyrics_verified_by", + "user_language": "en", + "f_subtitle_length_max_deviation": 1, + "subtitle_format": "lrc", + "app_id": "web-desktop-app-v1.0", + "usertoken": self.token, + + "q_artist": song.artist, + "q_track": song.title, + "q_album": song.album, + }, follow_redirects=True).json() + + response = response["message"]["body"]["macro_calls"]["track.subtitles.get"]["message"]["body"] + + if not response: + return None + + return response["subtitle_list"][0]["subtitle"]["subtitle_body"] diff --git a/lyrics_dl/providers/youtube.py b/lyrics_dl/providers/youtube.py new file mode 100644 index 0000000..32c4b9a --- /dev/null +++ b/lyrics_dl/providers/youtube.py @@ -0,0 +1,79 @@ +from typing import Optional, Dict +from contextlib import redirect_stdout +import subprocess +import io +import urllib +import unittest.mock + +from yt_dlp import YoutubeDL + +from lyrics_dl.core import Song, AbstractProvider +from lyrics_dl.registry import lyrics_provider +from lyrics_dl import utils + + +@lyrics_provider +class Youtube(AbstractProvider): + name = "youtube" + + def _craft_search_link(self, song: Song) -> str: + query = f"{song.artist} - {song.title}" + query = urllib.parse.quote(query) + # sp=... means search only videos with subtitles + url = f"https://www.youtube.com/results?search_query={query}&sp=EgIoAQ%253D%253D" + return url + + def _download_subtitles(self, video_id: str) -> str: + # buffer = io.BytesIO() + buffer = io.StringIO() + + # A dirty monkey patch; youtube-dl does not + # support "-" filename for subtitles, so we + # just force it to use it here. + with unittest.mock.patch("yt_dlp.YoutubeDL.subtitles_filename", new=lambda *_: "-"): + with redirect_stdout(buffer): + with YoutubeDL({"writesubtitles": True, "skip_download": True, "subtitlesformat": "srt/vtt/best", 'logtostderr': True}) as ydl: + ydl.download(video_id) + + return buffer.getvalue() + + def _subtitles_to_lyrics(self, subtitles: str) -> str: + # "-fflags +bitexact" prevents ffmpeg from + # writing metadata to .lrc file + # TODO: use `with` statement + process = subprocess.Popen(["ffmpeg", "-loglevel", "quiet", "-i", "-", "-f", "lrc", "-fflags", "+bitexact", "-"], stdin=subprocess.PIPE, stdout=subprocess.PIPE) + + if not process.stdin or not process.stdout: + return "" + + process.stdin.write(subtitles.encode()) + process.stdin.close() + process.wait() + return process.stdout.read().decode()[1:] + + def fetch_lyrics(self, song: Song) -> Optional[str]: + search_link = self._craft_search_link(song) + with YoutubeDL({"extract_flat": True, "playlistend": 10}) as ydl: + videos = ydl.extract_info(search_link)["entries"] + + if song.duration: + def match_duration(video: Dict) -> bool: + return utils.threshold_equal(video["duration"], song.duration, 2) + videos = filter(match_duration, videos) + + def match_title(video: Dict) -> bool: + return True + videos = filter(match_title, videos) + + video = utils.next_or_none(videos) + + if not video: + return None + + subtitles = self._download_subtitles(video["id"]) + lyrics = self._subtitles_to_lyrics(subtitles) + + if lyrics != "": + return lyrics + + return None diff --git a/lyrics_dl/registry.py b/lyrics_dl/registry.py new file mode 100644 index 0000000..fa56344 --- /dev/null +++ b/lyrics_dl/registry.py @@ -0,0 +1,20 @@ +from lyrics_dl.core import AbstractProvider + + +class Registry: + providers: dict[str, type[AbstractProvider]] = {} + + @staticmethod + def get_synced_providers() -> dict[str, type[AbstractProvider]]: + # TODO: stub + return dict(Registry.providers) + + @staticmethod + def register_provider(provider_class: type[AbstractProvider]) -> None: + Registry.providers[provider_class.name] = provider_class + + +def lyrics_provider(cls: type[AbstractProvider]) -> type[AbstractProvider]: + Registry.register_provider(cls) + + return cls diff --git a/lyrics_dl/utils.py b/lyrics_dl/utils.py new file mode 100644 index 0000000..9b43cb2 --- /dev/null +++ b/lyrics_dl/utils.py @@ -0,0 +1,14 @@ +from typing import Iterator, Optional, TypeVar + + +def threshold_equal(a: float, b: float, epsilon: float) -> bool: + return abs(a - b) <= epsilon + + +T = TypeVar('T') + +def next_or_none(iterator: Iterator[T]) -> Optional[T]: + try: + return next(iterator) + except StopIteration: + return None