initial commit

2026-03-11 09:43:31 -04:00
commit 197b72a951
7 changed files with 3893 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,5 @@
+.venv
+__pycache__
+*.pyc
+.ruff_cache
+.mypy_cache
--- a/config/logs/Lidarr-MusicVideoAutomator-2026_03_04_09_51_AM.txt
+++ b/config/logs/Lidarr-MusicVideoAutomator-2026_03_04_09_51_AM.txt
@@ -0,0 +1,3 @@
+2026-03-04 09:51:38 :: Lidarr-MusicVideoAutomator (v2.2) :: Starting...
+2026-03-04 09:51:38 :: Lidarr-MusicVideoAutomator (v2.2) :: apk not found, skipping dependency installation...
+2026-03-04 09:51:38 :: Lidarr-MusicVideoAutomator (v2.2) :: ERROR :: No config files found, exiting...
--- a/dl.py
+++ b/dl.py
--- a/new_dl.py
+++ b/new_dl.py
@@ -0,0 +1,308 @@
+from ast import Dict
+import asyncio
+from enum import Enum
+import json
+from queue import Queue
+import time
+from typing import Any, Literal, Optional
+from urllib.error import URLError
+from urllib.request import HTTPError, Request, urlopen
+from musicbrainzngs import musicbrainz
+import pprint
+import aiohttp
+from textual.reactive import reactive
+from textual.app import App, ComposeResult
+from textual.widgets import Footer, Header, Label, ListView, ListItem
+from textual.widget import Widget
+from yt_dlp import YoutubeDL
+from thefuzz import fuzz
+
+
+def falsy(value: Any) -> bool:
+    return not value or value is False
+
+
+JobType = Enum(
+    "JobType", ["fetch_artists", "fetch_artist_videos", "find_video_sources"]
+)
+
+
+type Job = {"type": JobType.value, "payload": Any}
+
+type MBRelation = {}
+
+type MBRecording = {
+    "length": Optional[int],
+    "title": Optional[str],
+    "id": str,
+    "disambiguation": Optional[str],
+    "first-release-date": Optional[str],
+    "video": Optional[bool],
+    "relations": Optional[list[MBRelation]],
+}
+
+
+class JobWidget(ListItem):
+    def __init__(self, job: Job):
+        super().__init__()
+        self.job = job
+
+    def compose(self) -> ComposeResult:
+        yield Label(self.job["name"])
+
+
+class JobWidget(ListItem):
+    def __init__(self, job: dict):
+        super().__init__()
+        self.job = job
+
+    def compose(self):
+        yield Label(self.job["name"])
+
+
+class JobList(ListView):
+    jobs = reactive([], recompose=True)  # trigger recompose on change
+
+    def compose(self):
+        # This is the only place we map jobs -> ListItems
+        for job in self.jobs:
+            yield JobWidget(job)
+
+
+# class MVDownloaderTUI(App):
+#     def compose(self):
+#         yield Header()
+#         job_list = JobList(id="job-queue")
+#         yield job_list
+#         yield Footer()
+
+
+# class MVDownloaderTUI(App):
+#     BINDINGS = [
+#         ("a", "add_item", "Add a new job")
+#     ]
+
+#     jobs = reactive([])
+
+#     def compose(self) -> ComposeResult:
+#         yield Header()
+#         yield Label("Job Queue")
+#         job_list = JobList(id="job-queue")
+#         yield job_list
+#         yield Footer()
+
+#     def watch_jobs(self, jobs):
+#         list_view = self.query_one("#job-queue", ListView)
+#         list_view.clear()
+#         for job in jobs:
+#             list_view.append(JobWidget(job))
+
+#     def _action_add_item(self) -> None:
+#         job_list = self.query_one("#job-queue", JobList)
+#         id = len(job_list.jobs)
+#         job_list.jobs = [*job_list.jobs, {"id": id, "name": "Test Job"}]
+
+type UrlSource = {"url": str, "type": str}
+
+type Video = {
+    "id": str,
+    "title": Optional[str],
+    "artist": Optional[str],
+    "year": Optional[str],
+    "source_urls": Optional[list[UrlSource]],
+}
+
+
+class MVDownloader:
+    def __init__(self):
+        self.temp_path = "/mnt/user/data/downloads/temp"
+        self.download_path = "/mnt/user/data/downloads/downloads"
+        self.lidarr_api_key = "36fb27b01480452b8e5d01a0a0ce9979"
+        self.lidarr_url = "http://10.0.0.101:8686"
+        self.musicbrainzapi = musicbrainz
+        self.artists = {}
+        self.videos: dict[str, Video] = {}
+        self.fetch_log = {}
+        self.queue = Queue[Job]()
+        self.semaphore = asyncio.Semaphore(10)
+        self.session: aiohttp.ClientSession | None = None
+
+    # async def http_get_json(self, url: str, headers: Optional[Dict[str, str]] = None) -> Any:
+    #     req = Request(url, headers=headers or {})
+    #     try:
+    #         with urlopen(req, timeout=None) as resp:
+    #             data = resp.read()
+    #         return json.loads(data.decode("utf-8"))
+    #     except (URLError, HTTPError, json.JSONDecodeError) as e:
+    #         self.log(f"ERROR :: HTTP/JSON error for {url}: {e}")
+    #         return None
+
+    async def http_get_json(
+        self, url: str, headers: Optional[Dict[str, str]] = None
+    ) -> Any:
+        async with self.semaphore:
+            try:
+                async with self.session.get(url, headers=headers) as resp:
+                    return await resp.json()
+            except (URLError, HTTPError, json.JSONDecodeError) as e:
+                self.log(f"ERROR :: HTTP/JSON error for {url}: {e}")
+                return None
+
+    async def fetch_artists(self):
+        base = self.lidarr_url.rstrip("/")
+        url = f"{base}/api/v1/artist?apikey={self.lidarr_api_key}"
+        artists = await self.http_get_json(url)
+        for artist in artists:
+            id = artist["foreignArtistId"]
+            if falsy(self.fetch_log.get("lidarr_artists")):
+                self.fetch_log["lidarr_artists"] = {}
+            if falsy(self.fetch_log["lidarr_artists"].get(id)):
+                self.queue.put(
+                    {
+                        "type": JobType.fetch_artist_videos,
+                        "payload": {"id": id, "name": artist["artistName"]},
+                    }
+                )
+                if falsy(self.artists.get(id)):
+                    self.artists[id] = {}
+                self.artists[id]["name"] = artist["artistName"]
+        # print(results)
+        # await self.fetch_artist_videos(artists[0]["id"])
+
+    def filter_videos_factory(self, payload: dict[str, str]):
+        def filter_videos(info_dict, incomplete: bool):
+            extra_points = {
+                "music video": 50,
+                "lyric video": -25,
+            }
+            # print(payload)
+            # if info_dict['playlist']:
+            #     return "It's a playlist"
+            if info_dict.get("title"):
+                yt_title = info_dict["title"]
+                if payload["video"]["title"].lower() not in yt_title.lower():
+                    return "Artist or title mismatch"
+                score = 0
+                # print(info_dict)
+                test = f"{payload['video']['title']} - {payload['video']['artist-credit-phrase']}"
+                # print(f"YT Title: {yt_title}")
+                # print(f"Test: {test}")
+                score += fuzz.token_sort_ratio(yt_title, test)
+                for keyword, points in extra_points.items():
+                    if keyword.lower() in yt_title.lower():
+                        score += points
+                if (
+                    info_dict.get("uploader").lower()
+                    == payload["video"]["artist-credit-phrase"].lower()
+                ):
+                    score += 10
+                # print(f"Score: {score}")
+                return
+            return None
+
+        return filter_videos
+
+    async def find_video_sources(self, payload: dict[str, str]):
+        filter_videos = self.filter_videos_factory(payload)
+        ydl_opts = {
+            "t": "sleep",
+            "noplaylist": True,
+            "quiet": True,
+            "no_warnings": True,
+            "extract_flat": True,
+            "extract_audio": False,
+            "audio_format": "best",
+            "default_search": "",
+            "skip_download": True,
+            "match_filter": filter_videos,
+        }
+        # print(payload)
+        artist = payload["video"]["artist-credit-phrase"]
+        title = payload["video"]["title"]
+        if artist == "20SIX Hundred":
+            return
+
+        search_query = f"{artist} - {title}"
+        with YoutubeDL(ydl_opts) as ydl:
+            output = ydl.download(
+                [f"https://youtube.com/results?search_query={search_query}"]
+            )
+            # print(output)
+        # id = payload['id']
+        # video = payload['video']
+        # print(video)
+
+    async def fetch_mb_relations(self, id: str):
+        info = self.musicbrainzapi.get_recording_by_id(id, includes=["url-rels"])
+
+    async def fetch_artist_videos(self, payload: dict[str, str]):
+        self.musicbrainzapi.set_useragent("MVDownloader", "1.0.0")
+        id = payload["id"]
+
+        if self.fetch_log.get("video_list") is None:
+            self.fetch_log["video_list"] = {}
+        if falsy(self.fetch_log["video_list"].get(id)):
+            recordings = self.musicbrainzapi.search_recordings(
+                strict=True, arid=id, video=True
+            )
+            for recording in recordings["recording-list"]:
+                info = self.musicbrainzapi.get_recording_by_id(
+                    recording["id"], includes=["url-rels"]
+                )
+                print("inside for recording")
+                if info.get("relations"):
+                    print("inside info.get('relations')")
+                    for relation in info["relations"]:
+                        if relation["target-type"] == "url":
+                            print("relation: ", relation)
+                # print("info: ", info)
+                await self.find_video_sources({"id": id, "video": recording})
+                self.queue.put(
+                    {
+                        "type": JobType.find_video_sources,
+                        "payload": {"id": id, "video": recording},
+                    }
+                )
+
+            # for video in videos:
+            # await self.find_video_sources({"id": id, "video": video})
+            # self.videos.append({"id": id, "videos": videos})
+            # self.videos.put({"type": JobType.find_video_sources, "payload": {"id": id, "videos": videos}})
+            # self.fetch_log["video_list"][id] = True
+
+    async def __aenter__(self):
+        self.session = aiohttp.ClientSession()
+        return self
+
+    async def __aexit__(self, exc_type, exc_value, traceback):
+        await self.session.close()
+
+    async def main(self):
+        await self.fetch_artists()
+
+    async def run_forever(self):
+        await self.fetch_artists()
+        while True:
+            task = self.queue.get()
+            print(task)
+            match task["type"]:
+                case JobType.fetch_artists:
+                    await self.fetch_artists()
+                case JobType.fetch_artist_videos:
+                    await self.fetch_artist_videos(task["payload"])
+                case JobType.find_video_sources:
+                    await self.find_video_sources(task["payload"])
+                case _:
+                    raise ValueError(f"Unknown job type: {task.type}")
+            await asyncio.sleep(3)
+
+
+async def main():
+    async with MVDownloader() as downloader:
+        await downloader.run_forever()
+
+
+if __name__ == "__main__":
+    # app = MVDownloaderTUI()
+    # app.run()
+    asyncio.run(main())
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,53 @@
+[project]
+name = "tidal-downloader"
+version = "0.1.0"
+description = ""
+authors = [
+    {name = "David Freitag",email = "david@freitag.site"}
+]
+requires-python = ">=3.14,<4.0"
+dependencies = [
+    "ffmpeg (>=1.4,<2.0)",
+    "tidalapi (>=0.8.11,<0.9.0)",
+    "yt-dlp (>=2026.3.3,<2027.0.0)",
+    "musicbrainzngs @ git+https://github.com/freitagdavid/python-musicbrainzngs-neo.git",
+    "python-redux (>=0.25.4,<0.26.0)",
+    "aiohttp (>=3.13.3,<4.0.0)",
+    "textual (>=8.0.2,<9.0.0)",
+    "textual-dev (>=1.8.0,<2.0.0)",
+    "lxml[cssselect] (>=6.0.2,<7.0.0)",
+    "cssselect (>=1.4.0,<2.0.0)",
+    "selenium (>=4.41.0,<5.0.0)",
+    "thefuzz (>=0.22.1,<0.23.0)",
+    "ruff (>=0.15.5,<0.16.0)",
+]
+
+[tool.ruff.lint]
+select = [
+    # pycodestyle
+    "E",
+    # Pyflakes
+    "F",
+    # pyupgrade
+    "UP",
+    # flake8-bugbear
+    "B",
+    # flake8-simplify
+    "SIM",
+    # isort
+    "I",
+]
+
+[tool.textual]
+    default_theme = "textual.themes.tui"
+    default_palette = "textual.palettes.tui"
+    default_font = "textual.fonts.tui"
+    default_font_size = 12
+    default_font_weight = "normal"
+    default_font_style = "normal"
+    default_font_color = "white"
+    default_background_color = "black"
+    default_foreground_color = "white"
+[build-system]
+requires = ["poetry-core>=2.0.0,<3.0.0"]
+build-backend = "poetry.core.masonry.api"
--- a/vimeo_search.py
+++ b/vimeo_search.py
@@ -0,0 +1,60 @@
+import pprint
+import urllib
+import requests
+import lxml.html
+from selenium import webdriver
+
+BASE_URL = "https://vimeo.com/search"
+
+hdr = {
+    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36",
+    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+    "Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.3",
+    "Accept-Encoding": "none",
+    "Accept-Language": "en-US,en;q=0.8",
+    "Connection": "keep-alive",
+}
+
+
+def parse_vimeo_page(page: str):
+
+    tree = lxml.html.fromstring(page)
+    items = []
+    videos = tree.cssselect("a.chakra-card > data-testid[clip-result]")
+    for item in videos:
+        print(item.text_content())
+        # title = item.xpath('.//div[@class="video-title"]/a/text()')[0]
+        # url = item.xpath('.//div[@class="video-title"]/a/@href')[0]
+        # items.append({
+        #     "title": title,
+        #     "url": url
+        # })
+    return items
+
+
+def search_vimeo(type: str, query: str, price: str, resolution: str):
+    url = f"{BASE_URL}?type={type}&q={query}&price={price}&resolution={resolution}"
+
+    # req = requests.get(url, headers=hdr)
+    # the_page = req.text
+    # print(the_page)
+    # tree = parse_vimeo_page(the_page)
+    # return tree
+
+    driver = webdriver.Chrome()
+    driver.get(url)
+    the_page = driver.page_source
+    tree = parse_vimeo_page(the_page)
+    driver.quit()
+    return tree
+    # req = urllib.request.Request(url, headers=hdr)
+    # with urllib.request.urlopen(req) as response:
+    #     the_page = response.read()
+    #     tree = parse_vimeo_page(the_page)
+    #     return tree
+
+
+if __name__ == "__main__":
+    pprint.pprint(
+        search_vimeo(type="clip", query="paramore", price="free", resolution="4k")
+    )