Files
tidal_downloader/new_dl.py

283 lines
9.4 KiB
Python
Executable File

from ast import Dict
import asyncio
from enum import Enum
import json
from queue import Queue
import time
from typing import Any, Callable, Literal, Optional, TypedDict
from urllib.error import URLError
from urllib.request import HTTPError, Request, urlopen
from musicbrainzngs import musicbrainz
from musicbrainzngs.types import Artist
import pprint
import aiohttp
from textual.reactive import reactive
from textual.app import App, ComposeResult
from textual.widgets import Footer, Header, Label, ListView, ListItem
from textual.widget import Widget
from yt_dlp import YoutubeDL
from thefuzz import fuzz
from rich.pretty import pprint
from rich.progress import Progress, TimeElapsedColumn
from rich.status import Status
from memorize import Memorize
testRun = True
def falsy(value: Any) -> bool:
return not value or value is False
JobType = Enum(
"JobType",
["fetch_artists", "fetch_artist_videos", "find_video_sources", "download_video"],
)
class Job(TypedDict):
type: JobType
payload: Any
type MBRelation = {}
class MBRecordingInfo(TypedDict):
length: Optional[int]
title: Optional[str]
id: str
disambiguation: Optional[str]
first_release_date: Optional[str]
video: Optional[bool]
relations: Optional[list[MBRelation]]
VideoSource = Enum("VideoSource", ["youtube", "vimeo", "tidal", "unknown"])
class UrlSource(TypedDict):
url: str
type: str
class Video(TypedDict):
id: str
title: Optional[str]
artist: Optional[str]
year: Optional[str]
source_urls: Optional[list[UrlSource]]
class ArtistItemInfo(TypedDict):
id: str
name: str
class VideoItemInfo(TypedDict):
id: str
title: Optional[str]
artist: Optional[str]
year: Optional[str]
class Item(TypedDict):
artist: ArtistItemInfo
video_sources: VideoItemInfo
class MVDownloader:
def __init__(self):
self.temp_path = "/mnt/user/data/downloads/temp"
self.download_path = "/mnt/user/data/downloads/downloads"
self.lidarr_api_key = "36fb27b01480452b8e5d01a0a0ce9979"
self.lidarr_url = "http://10.0.0.101:8686"
self.musicbrainzapi = musicbrainz
self.artists = {}
self.videos: dict[str, Video] = {}
self.fetch_log = {}
self.queue:Queue[Job] = Queue()
self.semaphore = asyncio.Semaphore(10)
self.session: aiohttp.ClientSession | None = None
self.data: dict[str, Item] = {}
async def http_get_json(
self, url: str, headers: Optional[dict[str, str]] = None
) -> Any:
async with self.semaphore:
try:
async with self.session.get(url, headers=headers) as resp:
return await resp.json()
except (URLError, HTTPError, json.JSONDecodeError) as e:
self.log(f"ERROR :: HTTP/JSON error for {url}: {e}")
return None
def add_fetch_artist_videos_job(self, artist: Artist):
# self.progress.add_task(f"Fetching videos for {name}")
self.queue.put(
{
"type": JobType.fetch_artist_videos,
"payload": artist,
}
)
# @Memorize
async def fetch_artists(self, lidarr_url: str, lidarr_api_key: str):
base = lidarr_url.rstrip("/")
url = f"{base}/api/v1/artist?apikey={lidarr_api_key}"
artists = await self.http_get_json(url)
for artist in artists:
self.add_fetch_artist_videos_job(artist)
def filter_videos_factory(self, payload: dict[str, str]) -> Callable[..., Literal['Artist or title mismatch'] | None]:
def filter_videos(info_dict, incomplete: bool):
extra_points = {
"music video": 50,
"lyric video": -25,
}
if info_dict.get("title"):
yt_title = info_dict["title"]
if payload["video"]["title"].lower() not in yt_title.lower():
return "Artist or title mismatch"
score = 0
test = f"{payload['video']['title']} - {payload['video']['artist-credit-phrase']}"
score += fuzz.token_sort_ratio(yt_title, test)
for keyword, points in extra_points.items():
if keyword.lower() in yt_title.lower():
score += points
if (
info_dict.get("uploader").lower()
== payload["video"]["artist-credit-phrase"].lower()
):
score += 10
return
return None
return filter_videos
async def download_yt_video(self, url: str):
ydl_opts: dict[str, Any] = {
"noplaylist": True,
"quiet": True,
"no_warnings": True,
"extract-audio": False,
"audio-format": "best",
"skip_download": testRun,
}
with YoutubeDL(ydl_opts) as ydl:
ydl.download([payload["url"]])
async def download_video(self, payload: dict[str, str]):
if payload.get("source_urls") is None:
self.log(f"No source URLs for {payload['title']} by {payload['artist']}")
return
for source in payload["source_urls"]:
if source["target"].includes("youtube.com"):
await self.download_yt_video(source["url"])
async def find_video_sources(self, payload: dict[str, str]):
filter_videos = self.filter_videos_factory(payload)
ydl_opts = {
"t": "sleep",
"noplaylist": True,
"quiet": True,
"no_warnings": True,
"extract_flat": True,
"extract_audio": False,
"audio_format": "best",
"default_search": "",
"skip_download": testRun,
"match_filter": filter_videos,
}
artist = payload["video"]["artist-credit-phrase"]
title = payload["video"]["title"]
if artist == "20SIX Hundred":
return
search_query = f"{artist} - {title}"
with YoutubeDL(ydl_opts) as ydl:
output = ydl.download(
[f"https://youtube.com/results?search_query={search_query}"]
)
# print(output)
# id = payload['id']
# video = payload['video']
# print(video)
async def fetch_mb_relations(self, id: str):
return self.musicbrainzapi.get_recording_by_id(id, includes=["url-rels"])
async def search_mb_recording(self, id: str):
return self.musicbrainzapi.search_recordings(strict=True, arid=id, video=True)
async def check_mb_relations(self, id: str):
pass
async def parse_mb_relation(self, relation: MBRelation):
pprint(["parsing relation: ", relation])
# self.queue.put(
# "type": JobType.download_video, "payload": {"url": relation["url"]["resource"]}}
# )
return_value = {"url": relation["target"], "type": "unknown"}
if "youtube.com" in relation.get("url", {}).get("resource", ""):
return_value["type"] = "youtube"
return return_value
async def fetch_artist_videos(self, payload: dict[str, str]):
self.musicbrainzapi.set_useragent("MVDownloader", "1.0.0")
id = payload["id"]
# await self.check_mb_relations(id)
recordings = await self.search_mb_recording(id)
potential_videos = {}
for recording in recordings["recording-list"]:
info = await self.fetch_mb_relations(recording["id"])
relations = info.get("recording").get("url-relation-list", [])
if relations == []:
pprint("no relations")
return
else:
for relation in relations:
pprint(["relation: ", relation])
parsed_relation = await self.parse_mb_relation(relation)
potential_videos[parsed_relation["type"]] = parsed_relation["url"]
await self.find_video_sources({"id": id, "video": recording})
self.queue.put(
{
"type": JobType.find_video_sources,
"payload": {"id": id, "video": recording},
}
)
async def __aenter__(self):
self.session = aiohttp.ClientSession()
return self
async def __aexit__(self, exc_type, exc_value, traceback):
await self.session.close()
async def main(self):
await self.fetch_artists(self.lidarr_url, self.lidarr_api_key)
async def run_forever(self):
await self.fetch_artists(self.lidarr_url, self.lidarr_api_key)
while True:
task = self.queue.get()
# print(task)
match task["type"]:
case JobType.download_video:
await self.download_video(task["payload"])
case JobType.fetch_artists:
await self.fetch_artists(self.lidarr_url, self.lidarr_api_key)
case JobType.fetch_artist_videos:
await self.fetch_artist_videos(task["payload"])
case JobType.find_video_sources:
await self.find_video_sources(task["payload"])
case _:
raise ValueError(f"Unknown job type: {task.type}")
await asyncio.sleep(3)
async def main():
async with MVDownloader() as downloader:
await downloader.run_forever()
if __name__ == "__main__":
# app = MVDownloaderTUI()
# app.run()
asyncio.run(main())