diff --git a/README.md b/README.md index c475225..4229220 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ serving this scraper will download the gif and mp3 from a ytmnd and write a file embedding these things in addition to zoom text (if any). -The downloaded files cannot be loaded from a `file://` url. In order to view these files, put them online or run a local server. For example, `python -m http.server` from the directory and got to [http://localhost:8000/](http://localhost:8000/). If you host them somewhere, remember to include `ytmnd.js` in the same directory. +The downloaded files cannot be loaded from a `file://` url. In order to view these files, put them online or run a local server. For example, `python -m http.server` from the directory and got to [http://localhost:8000/](http://localhost:8000/). options ------- diff --git a/ytmnd.js b/ytmnd.js deleted file mode 100644 index ed80d77..0000000 --- a/ytmnd.js +++ /dev/null @@ -1,43 +0,0 @@ -(function () { - var audio = new Audio(url); - audio.loop = true; - audio.muted = true; - audio - .play() - .then(function () { - console.log("Audio started (muted). Click/tap to unmute!"); - var unmuteMsg = document.createElement("div"); - unmuteMsg.textContent = "Click to unmute"; - unmuteMsg.style.cssText = - "position:fixed;top:10px;right:10px;background:rgba(0,0,0,0.8);color:#fff;padding:10px 20px;border-radius:5px;font-family:sans-serif;z-index:9999;cursor:pointer;"; - document.body.appendChild(unmuteMsg); - function unmute() { - audio.muted = false; - unmuteMsg.remove(); - console.log("Audio unmuted!"); - } - document.addEventListener("click", unmute, { once: true }); - document.addEventListener("keydown", unmute, { once: true }); - document.addEventListener("touchstart", unmute, { once: true }); - unmuteMsg.addEventListener("click", unmute, { once: true }); - }) - .catch(function (error) { - console.error("Autoplay failed even when muted:", error); - function playOnInteraction() { - audio.muted = false; - audio - .play() - .then(function () { - console.log("Audio started after user interaction"); - }) - .catch(function (err) { - console.error("Still couldn't play:", err); - }); - } - document.addEventListener("click", playOnInteraction, { once: true }); - document.addEventListener("keydown", playOnInteraction, { once: true }); - document.addEventListener("touchstart", playOnInteraction, { - once: true, - }); - }); -})(); diff --git a/ytmndd.py b/ytmndd.py index 3d562dc..6f82dcd 100755 --- a/ytmndd.py +++ b/ytmndd.py @@ -1,311 +1,485 @@ #!/usr/bin/env python3 -import sys +import json import os import os.path import re -import time -import json import subprocess +import sys +import time from optparse import OptionParser + import requests from requests.exceptions import RequestException + class YTMND: + def __init__(self): + self.user_mode = False + self.media_only = False + self.html_only = False + self.json_only = False + self.no_web_audio = False + self.print_json = False + self.sleep = 5 - def __init__(self): - self.user_mode = False - self.media_only = False - self.html_only = False - self.json_only = False - self.no_web_audio = False - self.print_json = False - self.sleep = 5 + def fetch_user(self, user): + if user == "": + print("expecting one ytmnd name, got " + str(sys.argv)) + return - def fetch_user(self, user): - if user == "": - print("expecting one ytmnd name, got " + str(sys.argv)) - return + ytmnd_name = user + try: + response = requests.get( + "http://ytmnd.com/users/" + ytmnd_name + "/sites", + headers={"User-Agent": "Mozilla/5.0"}, + ) + response.raise_for_status() + ytmnd_html = response.text.splitlines() + except RequestException as e: + print(f"Error fetching user page: {e}") + return - ytmnd_name = user - try: - response = requests.get("http://ytmnd.com/users/" + ytmnd_name + "/sites", - headers={'User-Agent': 'Mozilla/5.0'}) - response.raise_for_status() - ytmnd_html = response.text.splitlines() - except RequestException as e: - print(f"Error fetching user page: {e}") - return + domains = [] - domains = [] + for line in ytmnd_html: + if "profile_link" in line: + expr = r"site_link\" href=\"http://(\S+).ytmn(d|sfw)?.com\"" + match = re.search(expr, line) + if match: + domain = match.group(1) + domains.append(domain) - for line in ytmnd_html: - if 'profile_link' in line: - expr = r"site_link\" href=\"http://(\S+).ytmn(d|sfw)?.com\"" - match = re.search(expr, line) - if match: - domain = match.group(1) - domains.append(domain) + if self.json_only: + if self.media_only: + os.makedirs(user, exist_ok=True) + os.chdir(user) + parsed = [] + for domain in domains: + result = self.fetch_ytmnd(domain) + if result: + parsed.append(result) + if self.media_only: + os.chdir("..") + self.write_json(ytmnd_name, parsed) - if self.json_only: - if self.media_only: - os.makedirs(user, exist_ok=True) - os.chdir(user) - parsed = [] - for domain in domains: - result = self.fetch_ytmnd(domain) - if result: - parsed.append(result) - if self.media_only: - os.chdir("..") - self.write_json(ytmnd_name, parsed) + else: + print(">> found %d domains" % len(domains)) + os.makedirs(user, exist_ok=True) + os.chdir(user) + for domain in domains: + self.fetch_ytmnd(domain) + os.chdir("..") + + def fetch_ytmnd(self, domain): + if domain == "": + print("expecting one ytmnd name, got " + str(sys.argv)) + return None + + if not self.print_json: + print("fetching %s" % domain) + if self.sleep: + time.sleep(self.sleep) + + ytmnd_name = domain + try: + response = requests.get( + "http://" + domain + ".ytmnd.com", headers={"User-Agent": "Mozilla/5.0"} + ) + response.raise_for_status() + ytmnd_html = response.text + + expr = r"ytmnd.site_id = (\d+);" + match = re.search(expr, ytmnd_html) + if not match: + print(f"Could not find site_id for {domain}") + return None + ytmnd_id = match.group(1) + + response = requests.get( + "http://" + domain + ".ytmnd.com/info/" + ytmnd_id + "/json", + headers={"User-Agent": "Mozilla/5.0"}, + ) + response.raise_for_status() + ytmnd_info = response.json() + + except RequestException as e: + print(f"Error fetching {domain}: {e}") + return None + + if self.print_json: + print(json.dumps(ytmnd_info, sort_keys=True, indent=4)) + elif self.json_only: + if self.media_only: + self.fetch_media(ytmnd_info) + return self.parse_json(ytmnd_info) + elif self.media_only: + self.fetch_media(ytmnd_info) + elif self.html_only: + self.write_index(ytmnd_info) + else: + self.fetch_media(ytmnd_info) + self.write_index(ytmnd_info) + + return ytmnd_info + + def fetch_media(self, ytmnd_info): + domain = ytmnd_info["site"]["domain"] + original_gif = ytmnd_info["site"]["foreground"]["url"] + gif_type = original_gif.split(".")[-1] + original_wav = ytmnd_info["site"]["sound"]["url"] + wav_type = ytmnd_info["site"]["sound"]["type"] + + if "alternates" in ytmnd_info["site"]["sound"]: + key = list(ytmnd_info["site"]["sound"]["alternates"].keys())[0] + value = ytmnd_info["site"]["sound"]["alternates"][key] + if value["file_type"] != "swf": + original_wav = value["file_url"] + wav_type = ytmnd_info["site"]["sound"]["file_type"] + + subprocess.run(["wget", "--quiet", "-O", f"{domain}.{gif_type}", original_gif]) + subprocess.run(["wget", "--quiet", "-O", f"{domain}.{wav_type}", original_wav]) + + def write_index(self, ytmnd_info): + domain = ytmnd_info["site"]["domain"] + bgcolor = ytmnd_info["site"]["background"]["color"] + title = ytmnd_info["site"]["description"] + placement = ytmnd_info["site"]["foreground"]["placement"] + + original_gif = ytmnd_info["site"]["foreground"]["url"] + gif_type = original_gif.split(".")[-1] + wav_type = ytmnd_info["site"]["sound"]["type"] + + if "alternates" in ytmnd_info["site"]["sound"]: + key = list(ytmnd_info["site"]["sound"]["alternates"].keys())[0] + value = ytmnd_info["site"]["sound"]["alternates"][key] + if value["file_type"] != "swf": + original_wav = value["file_url"] + wav_type = ytmnd_info["site"]["sound"]["file_type"] + + with open(domain + ".html", "w", encoding="utf-8") as fn: + fn.write("\n") + fn.write("\n") + fn.write("
\n") + fn.write("\n") + fn.write( + "\n" + ) + fn.write("