Compare commits
10 Commits
2646007c2a
...
6f0866df97
| Author | SHA1 | Date | |
|---|---|---|---|
| 6f0866df97 | |||
| 90583e7a93 | |||
| 57a3b80a33 | |||
| 740db5ce15 | |||
|
|
acae8c71fc | ||
|
|
56fcf1b959 | ||
|
|
740993b19a | ||
|
|
bb52003118 | ||
|
|
6d3d945398 | ||
|
|
ee86ef7130 |
10
.gitignore
vendored
10
.gitignore
vendored
@@ -1,10 +0,0 @@
|
|||||||
.DS_Store
|
|
||||||
*~
|
|
||||||
typogra
|
|
||||||
klasky
|
|
||||||
alas
|
|
||||||
ROY4L
|
|
||||||
directory
|
|
||||||
CarlWinslows
|
|
||||||
Coach
|
|
||||||
|
|
||||||
29
LICENSE
29
LICENSE
@@ -1,29 +0,0 @@
|
|||||||
Jollo LNT license
|
|
||||||
Version 1 - February 2015
|
|
||||||
|
|
||||||
Copyright, 2015. JOLLO NET NA.
|
|
||||||
The Jollo IRC Network. <https://jollo.org/>
|
|
||||||
|
|
||||||
Vu, fare wanderer, confronted with raw, programmatic instruction
|
|
||||||
dans la forme la plus pure. A hesitation, troubled to the terms
|
|
||||||
qui ce license affirme. Par un voyage du explorer le mechanisme
|
|
||||||
et ponder la fabrication. Voila! La remide: egress sans risque.
|
|
||||||
|
|
||||||
Sans trace (Leave No Trace) via sept principales:
|
|
||||||
|
|
||||||
0. Modifique language en advance. L'Apposer Jollo LNT license
|
|
||||||
with copies en distribuer.
|
|
||||||
|
|
||||||
1. Non responsible pour neglige programme du problematique.
|
|
||||||
|
|
||||||
2. Non sympathie pour neglige programme du problematique.
|
|
||||||
|
|
||||||
3. Non permission l'modifique under any circumstance.
|
|
||||||
|
|
||||||
4. Non permission distribution under any circumstance.
|
|
||||||
|
|
||||||
5. Respect les programmatic instructions.
|
|
||||||
|
|
||||||
6. Non interfere avec l'harmonie d'une amitie.
|
|
||||||
|
|
||||||
|
|
||||||
16
README.md
16
README.md
@@ -1,18 +1,18 @@
|
|||||||
ytmnd
|
ytmndd - ytmnd downloader
|
||||||
=====
|
=====
|
||||||
|
|
||||||
ytmnd scraper.
|
An updated ytmnd scraper based on [https://github.com/julescarbon/ytmnd](https://github.com/julescarbon/ytmnd)
|
||||||
|
|
||||||
`./ytmnd.py -u [username]`
|
`./ytmndd.py -u [username]`
|
||||||
|
|
||||||
`./ytmnd.py [domain]`
|
`./ytmndd.py [domain]`
|
||||||
|
|
||||||
serving
|
serving
|
||||||
-------
|
-------
|
||||||
|
|
||||||
this scraper will download the gif and mp3 from a ytmnd and write a file embedding these things in addition to zoom text (if any).
|
this scraper will download the gif and mp3 from a ytmnd and write a file embedding these things in addition to zoom text (if any).
|
||||||
|
|
||||||
the html files use the web audio api in an attempt to get seamless looping (oddly complicated). since they download binary data, they cannot be loaded from a `file://` url.. to view these files, put them online. alternatively, run `python -m SimpleHTTPServer 8000` from the directory and navigate to e.g. http://lvh.me:8000/
|
The downloaded files cannot be loaded from a `file://` url. In order to view these files, put them online or run a local server. For example, `python -m http.server` from the directory and got to [http://localhost:8000/](http://localhost:8000/)
|
||||||
|
|
||||||
options
|
options
|
||||||
-------
|
-------
|
||||||
@@ -25,9 +25,3 @@ options
|
|||||||
| `--json-only` | writes simplified json to a file |
|
| `--json-only` | writes simplified json to a file |
|
||||||
| `--no-web-audio` | uses the <audio> tag instead of web audio |
|
| `--no-web-audio` | uses the <audio> tag instead of web audio |
|
||||||
| `--print-json` | dumps raw json from ytmnd to stdout |
|
| `--print-json` | dumps raw json from ytmnd to stdout |
|
||||||
|
|
||||||
license
|
|
||||||
-------
|
|
||||||
|
|
||||||
_This software made available under the Jollo LNT License (Leave no trace)_
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,2 +1 @@
|
|||||||
httplib2==0.9
|
requests>=2.31.0
|
||||||
simplejson==3.4.0
|
|
||||||
|
|||||||
61
ytmnd.js
61
ytmnd.js
@@ -1,25 +1,42 @@
|
|||||||
(function () {
|
(function () {
|
||||||
var hasWebKit = ('webkitAudioContext' in window) && !('chrome' in window)
|
var AudioContext = window.AudioContext || window.webkitAudioContext;
|
||||||
var context = new webkitAudioContext()
|
var context = new AudioContext();
|
||||||
var request = new XMLHttpRequest()
|
var request = new XMLHttpRequest();
|
||||||
var source
|
var source;
|
||||||
request.open('GET', url, true)
|
var buffer;
|
||||||
request.responseType = 'arraybuffer'
|
|
||||||
|
request.open("GET", url, true);
|
||||||
|
request.responseType = "arraybuffer";
|
||||||
|
|
||||||
request.onload = function () {
|
request.onload = function () {
|
||||||
context.decodeAudioData(request.response, function(response) {
|
context.decodeAudioData(
|
||||||
(function loop(){
|
request.response,
|
||||||
if (source) {
|
function (response) {
|
||||||
source.start(0)
|
buffer = response;
|
||||||
setTimeout(loop, source.buffer.duration * 1000 - (source.buffer.duration < 2 ? 0 : 60) )
|
|
||||||
|
function playBuffer() {
|
||||||
|
source = context.createBufferSource();
|
||||||
|
source.buffer = buffer;
|
||||||
|
source.connect(context.destination);
|
||||||
|
|
||||||
|
source.onended = function () {
|
||||||
|
playBuffer();
|
||||||
|
};
|
||||||
|
|
||||||
|
source.start(0);
|
||||||
}
|
}
|
||||||
else {
|
|
||||||
setTimeout(loop, 0)
|
playBuffer();
|
||||||
}
|
},
|
||||||
source = context.createBufferSource()
|
function (error) {
|
||||||
source.connect(context.destination)
|
console.error("Audio decoding failed:", error);
|
||||||
source.buffer = response
|
},
|
||||||
})()
|
);
|
||||||
}, function () { console.error('The request failed.') } )
|
};
|
||||||
}
|
|
||||||
request.send()
|
request.onerror = function () {
|
||||||
})()
|
console.error("Failed to load audio file");
|
||||||
|
};
|
||||||
|
|
||||||
|
request.send();
|
||||||
|
})();
|
||||||
|
|||||||
@@ -1,13 +1,15 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import os.path
|
import os.path
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
import urllib2
|
import json
|
||||||
import simplejson
|
import subprocess
|
||||||
from optparse import OptionParser
|
from optparse import OptionParser
|
||||||
|
import requests
|
||||||
|
from requests.exceptions import RequestException
|
||||||
|
|
||||||
class YTMND:
|
class YTMND:
|
||||||
|
|
||||||
@@ -20,37 +22,47 @@ class YTMND:
|
|||||||
self.print_json = False
|
self.print_json = False
|
||||||
self.sleep = 5
|
self.sleep = 5
|
||||||
|
|
||||||
# Scrapes sites from the profile page, then fetches them
|
|
||||||
def fetch_user(self, user):
|
def fetch_user(self, user):
|
||||||
if user == "":
|
if user == "":
|
||||||
print("expecting one ytmnd name, got " + str(sys.argv))
|
print("expecting one ytmnd name, got " + str(sys.argv))
|
||||||
return
|
return
|
||||||
|
|
||||||
ytmnd_name = user
|
ytmnd_name = user
|
||||||
ytmnd_html = urllib2.urlopen("http://ytmnd.com/users/" + ytmnd_name + "/sites").readlines()
|
try:
|
||||||
|
response = requests.get("http://ytmnd.com/users/" + ytmnd_name + "/sites",
|
||||||
|
headers={'User-Agent': 'Mozilla/5.0'})
|
||||||
|
response.raise_for_status()
|
||||||
|
ytmnd_html = response.text.splitlines()
|
||||||
|
except RequestException as e:
|
||||||
|
print(f"Error fetching user page: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
domains = []
|
domains = []
|
||||||
|
|
||||||
for line in ytmnd_html:
|
for line in ytmnd_html:
|
||||||
if 'profile_link' in line:
|
if 'profile_link' in line:
|
||||||
expr = r"site_link\" href=\"http://(\S+).ytmn(d|sfw)?.com\""
|
expr = r"site_link\" href=\"http://(\S+).ytmn(d|sfw)?.com\""
|
||||||
domain = re.search(expr,line).group(1)
|
match = re.search(expr, line)
|
||||||
|
if match:
|
||||||
|
domain = match.group(1)
|
||||||
domains.append(domain)
|
domains.append(domain)
|
||||||
|
|
||||||
if self.json_only:
|
if self.json_only:
|
||||||
if self.media_only:
|
if self.media_only:
|
||||||
os.system("mkdir -p %s" % user)
|
os.makedirs(user, exist_ok=True)
|
||||||
os.chdir(user)
|
os.chdir(user)
|
||||||
parsed = []
|
parsed = []
|
||||||
for domain in domains:
|
for domain in domains:
|
||||||
parsed.append( self.fetch_ytmnd( domain ) )
|
result = self.fetch_ytmnd(domain)
|
||||||
|
if result:
|
||||||
|
parsed.append(result)
|
||||||
if self.media_only:
|
if self.media_only:
|
||||||
os.chdir("..")
|
os.chdir("..")
|
||||||
self.write_json(ytmnd_name, parsed)
|
self.write_json(ytmnd_name, parsed)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
print ">> found %d domains" % len( domains )
|
print(">> found %d domains" % len(domains))
|
||||||
os.system("mkdir -p %s" % user)
|
os.makedirs(user, exist_ok=True)
|
||||||
os.chdir(user)
|
os.chdir(user)
|
||||||
if not self.no_web_audio:
|
if not self.no_web_audio:
|
||||||
self.copy_ytmnd_js()
|
self.copy_ytmnd_js()
|
||||||
@@ -58,26 +70,42 @@ class YTMND:
|
|||||||
self.fetch_ytmnd(domain)
|
self.fetch_ytmnd(domain)
|
||||||
os.chdir("..")
|
os.chdir("..")
|
||||||
|
|
||||||
# Fetches a single subdomain
|
|
||||||
def fetch_ytmnd(self, domain):
|
def fetch_ytmnd(self, domain):
|
||||||
|
|
||||||
if domain == "":
|
if domain == "":
|
||||||
print("expecting one ytmnd name, got " + str(sys.argv))
|
print("expecting one ytmnd name, got " + str(sys.argv))
|
||||||
return
|
return None
|
||||||
|
|
||||||
if not self.print_json:
|
if not self.print_json:
|
||||||
print "fetching %s" % domain
|
print("fetching %s" % domain)
|
||||||
if not self.sleep:
|
if self.sleep:
|
||||||
time.sleep(self.sleep)
|
time.sleep(self.sleep)
|
||||||
|
|
||||||
ytmnd_name = domain
|
ytmnd_name = domain
|
||||||
ytmnd_html = urllib2.urlopen("http://" + domain + ".ytmnd.com").read()
|
try:
|
||||||
|
response = requests.get("http://" + domain + ".ytmnd.com",
|
||||||
|
headers={'User-Agent': 'Mozilla/5.0'})
|
||||||
|
response.raise_for_status()
|
||||||
|
ytmnd_html = response.text
|
||||||
|
|
||||||
expr = r"ytmnd.site_id = (\d+);"
|
expr = r"ytmnd.site_id = (\d+);"
|
||||||
ytmnd_id = re.search(expr,ytmnd_html).group(1)
|
match = re.search(expr, ytmnd_html)
|
||||||
ytmnd_info = simplejson.load(urllib2.urlopen("http://" + domain + ".ytmnd.com/info/" + ytmnd_id + "/json"))
|
if not match:
|
||||||
|
print(f"Could not find site_id for {domain}")
|
||||||
|
return None
|
||||||
|
ytmnd_id = match.group(1)
|
||||||
|
|
||||||
|
response = requests.get("http://" + domain + ".ytmnd.com/info/" + ytmnd_id + "/json",
|
||||||
|
headers={'User-Agent': 'Mozilla/5.0'})
|
||||||
|
response.raise_for_status()
|
||||||
|
ytmnd_info = response.json()
|
||||||
|
|
||||||
|
except RequestException as e:
|
||||||
|
print(f"Error fetching {domain}: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
if self.print_json:
|
if self.print_json:
|
||||||
print simplejson.dumps(ytmnd_info, sort_keys=True, indent=4 * ' ')
|
print(json.dumps(ytmnd_info, sort_keys=True, indent=4))
|
||||||
elif self.json_only:
|
elif self.json_only:
|
||||||
if self.media_only:
|
if self.media_only:
|
||||||
self.fetch_media(ytmnd_info)
|
self.fetch_media(ytmnd_info)
|
||||||
@@ -92,7 +120,6 @@ class YTMND:
|
|||||||
|
|
||||||
return ytmnd_info
|
return ytmnd_info
|
||||||
|
|
||||||
# Fetches the gif and mp3 for a post
|
|
||||||
def fetch_media(self, ytmnd_info):
|
def fetch_media(self, ytmnd_info):
|
||||||
domain = ytmnd_info['site']['domain']
|
domain = ytmnd_info['site']['domain']
|
||||||
original_gif = ytmnd_info['site']['foreground']['url']
|
original_gif = ytmnd_info['site']['foreground']['url']
|
||||||
@@ -101,19 +128,17 @@ class YTMND:
|
|||||||
wav_type = ytmnd_info['site']['sound']['type']
|
wav_type = ytmnd_info['site']['sound']['type']
|
||||||
|
|
||||||
if 'alternates' in ytmnd_info['site']['sound']:
|
if 'alternates' in ytmnd_info['site']['sound']:
|
||||||
key = ytmnd_info['site']['sound']['alternates'].keys()[0]
|
key = list(ytmnd_info['site']['sound']['alternates'].keys())[0]
|
||||||
value = ytmnd_info['site']['sound']['alternates'][key]
|
value = ytmnd_info['site']['sound']['alternates'][key]
|
||||||
if value['file_type'] != 'swf':
|
if value['file_type'] != 'swf':
|
||||||
original_wav = value['file_url']
|
original_wav = value['file_url']
|
||||||
wav_type = ytmnd_info['site']['sound']['file_type']
|
wav_type = ytmnd_info['site']['sound']['file_type']
|
||||||
|
|
||||||
os.system("wget --quiet -O %s %s" % (domain + "." + gif_type, original_gif))
|
subprocess.run(["wget", "--quiet", "-O", f"{domain}.{gif_type}", original_gif])
|
||||||
os.system("wget --quiet -O %s %s" % (domain + "." + wav_type, original_wav))
|
subprocess.run(["wget", "--quiet", "-O", f"{domain}.{wav_type}", original_wav])
|
||||||
|
|
||||||
# Writes an html file emulating the ytmnd format
|
|
||||||
def write_index(self, ytmnd_info):
|
def write_index(self, ytmnd_info):
|
||||||
|
|
||||||
# print simplejson.dumps(ytmnd_info)
|
|
||||||
domain = ytmnd_info['site']['domain']
|
domain = ytmnd_info['site']['domain']
|
||||||
bgcolor = ytmnd_info['site']['background']['color']
|
bgcolor = ytmnd_info['site']['background']['color']
|
||||||
title = ytmnd_info['site']['description']
|
title = ytmnd_info['site']['description']
|
||||||
@@ -124,13 +149,13 @@ class YTMND:
|
|||||||
wav_type = ytmnd_info['site']['sound']['type']
|
wav_type = ytmnd_info['site']['sound']['type']
|
||||||
|
|
||||||
if 'alternates' in ytmnd_info['site']['sound']:
|
if 'alternates' in ytmnd_info['site']['sound']:
|
||||||
key = ytmnd_info['site']['sound']['alternates'].keys()[0]
|
key = list(ytmnd_info['site']['sound']['alternates'].keys())[0]
|
||||||
value = ytmnd_info['site']['sound']['alternates'][key]
|
value = ytmnd_info['site']['sound']['alternates'][key]
|
||||||
if value['file_type'] != 'swf':
|
if value['file_type'] != 'swf':
|
||||||
original_wav = value['file_url']
|
original_wav = value['file_url']
|
||||||
wav_type = ytmnd_info['site']['sound']['file_type']
|
wav_type = ytmnd_info['site']['sound']['file_type']
|
||||||
|
|
||||||
fn = open(domain + ".html", 'w')
|
with open(domain + ".html", 'w', encoding='utf-8') as fn:
|
||||||
fn.write("<html>\n")
|
fn.write("<html>\n")
|
||||||
fn.write("<head>\n")
|
fn.write("<head>\n")
|
||||||
fn.write("<title>%s</title>\n" % title)
|
fn.write("<title>%s</title>\n" % title)
|
||||||
@@ -160,13 +185,10 @@ class YTMND:
|
|||||||
fn.write("<script>var url = '%s.%s'</script>\n" % (domain, wav_type))
|
fn.write("<script>var url = '%s.%s'</script>\n" % (domain, wav_type))
|
||||||
fn.write("<script src='ytmnd.js'></script>\n")
|
fn.write("<script src='ytmnd.js'></script>\n")
|
||||||
fn.write("<script type='application/json'>\n")
|
fn.write("<script type='application/json'>\n")
|
||||||
fn.write(simplejson.dumps(ytmnd_info, sort_keys=True, indent=4 * ' ') + "\n")
|
fn.write(json.dumps(ytmnd_info, sort_keys=True, indent=4) + "\n")
|
||||||
fn.write("</script>\n")
|
fn.write("</script>\n")
|
||||||
fn.write("</html>")
|
fn.write("</html>")
|
||||||
|
|
||||||
fn.close()
|
|
||||||
|
|
||||||
# print out the zoom text
|
|
||||||
def write_zoom_text(self, fn, ytmnd_info):
|
def write_zoom_text(self, fn, ytmnd_info):
|
||||||
if 'zoom_text' not in ytmnd_info['site']:
|
if 'zoom_text' not in ytmnd_info['site']:
|
||||||
return
|
return
|
||||||
@@ -177,24 +199,23 @@ class YTMND:
|
|||||||
|
|
||||||
offset = 100
|
offset = 100
|
||||||
if "line_3" in zoom_text and len(zoom_text["line_3"]) > 0:
|
if "line_3" in zoom_text and len(zoom_text["line_3"]) > 0:
|
||||||
self.write_zoom_layers( fn, zoom_text['line_3'], offset, 500 )
|
self.write_zoom_layers(fn, zoom_text['line_3'], offset, 269)
|
||||||
offset += 50
|
offset += 21
|
||||||
if "line_2" in zoom_text and len(zoom_text["line_2"]) > 0:
|
if "line_2" in zoom_text and len(zoom_text["line_2"]) > 0:
|
||||||
self.write_zoom_layers( fn, zoom_text['line_2'], offset, 250 )
|
self.write_zoom_layers(fn, zoom_text['line_2'], offset, 135)
|
||||||
offset += 50
|
offset += 21
|
||||||
if "line_1" in zoom_text and len(zoom_text["line_1"]) > 0:
|
if "line_1" in zoom_text and len(zoom_text["line_1"]) > 0:
|
||||||
self.write_zoom_layers( fn, zoom_text['line_1'], offset, 0 )
|
self.write_zoom_layers(fn, zoom_text['line_1'], offset, 1)
|
||||||
|
|
||||||
fn.write('</div>')
|
fn.write('</div>')
|
||||||
|
|
||||||
# print the layers of zoom text
|
|
||||||
def write_zoom_layers(self, fn, text, offset, top):
|
def write_zoom_layers(self, fn, text, offset, top):
|
||||||
for i in xrange(1, 51):
|
for i in range(1, 22):
|
||||||
z_index = offset + i
|
z_index = offset + i
|
||||||
row_left = i * 2
|
row_left = i * 2
|
||||||
row_top = top + i
|
row_top = top + i
|
||||||
font_size = i * 2
|
font_size = i * 2
|
||||||
if i == 50:
|
if i == 21:
|
||||||
color = 0
|
color = 0
|
||||||
else:
|
else:
|
||||||
color = i * 4
|
color = i * 4
|
||||||
@@ -202,12 +223,12 @@ class YTMND:
|
|||||||
fn.write("<div style='z-index: %d; left: %dpx; top: %dpx; color: rgb(%d, %d, %d); font-size: %dpt;'>%s</div>"
|
fn.write("<div style='z-index: %d; left: %dpx; top: %dpx; color: rgb(%d, %d, %d); font-size: %dpt;'>%s</div>"
|
||||||
% (z_index, row_left, row_top, color, color, color, font_size, text))
|
% (z_index, row_left, row_top, color, color, color, font_size, text))
|
||||||
|
|
||||||
# Copies the looping audio JS into place
|
|
||||||
def copy_ytmnd_js(self):
|
def copy_ytmnd_js(self):
|
||||||
if not os.path.isfile("ytmnd.js"):
|
if not os.path.isfile("ytmnd.js"):
|
||||||
os.system("cp ../ytmnd.js .")
|
parent_js = os.path.join("..", "ytmnd.js")
|
||||||
|
if os.path.isfile(parent_js):
|
||||||
|
subprocess.run(["cp", parent_js, "."])
|
||||||
|
|
||||||
# Parses data we need out of JSON
|
|
||||||
def parse_json(self, ytmnd_info):
|
def parse_json(self, ytmnd_info):
|
||||||
domain = ytmnd_info['site']['domain']
|
domain = ytmnd_info['site']['domain']
|
||||||
bgcolor = ytmnd_info['site']['background']['color']
|
bgcolor = ytmnd_info['site']['background']['color']
|
||||||
@@ -227,7 +248,7 @@ class YTMND:
|
|||||||
zoom_text = ""
|
zoom_text = ""
|
||||||
|
|
||||||
if 'alternates' in ytmnd_info['site']['sound']:
|
if 'alternates' in ytmnd_info['site']['sound']:
|
||||||
key = ytmnd_info['site']['sound']['alternates'].keys()[0]
|
key = list(ytmnd_info['site']['sound']['alternates'].keys())[0]
|
||||||
value = ytmnd_info['site']['sound']['alternates'][key]
|
value = ytmnd_info['site']['sound']['alternates'][key]
|
||||||
if value['file_type'] != 'swf':
|
if value['file_type'] != 'swf':
|
||||||
wav_type = ytmnd_info['site']['sound']['file_type']
|
wav_type = ytmnd_info['site']['sound']['file_type']
|
||||||
@@ -250,11 +271,9 @@ class YTMND:
|
|||||||
|
|
||||||
return simplified_info
|
return simplified_info
|
||||||
|
|
||||||
# Writes site JSON to a file
|
|
||||||
def write_json(self, domain, data):
|
def write_json(self, domain, data):
|
||||||
fn = open(domain + '.json', 'w')
|
with open(domain + '.json', 'w', encoding='utf-8') as fn:
|
||||||
fn.write( simplejson.dumps(data) )
|
fn.write(json.dumps(data))
|
||||||
fn.close()
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
@@ -290,4 +309,3 @@ if __name__ == '__main__':
|
|||||||
else:
|
else:
|
||||||
name = args[0].replace("http://","").replace(".ytmnsfw.com","").replace(".ytmnd.com","").replace("/","")
|
name = args[0].replace("http://","").replace(".ytmnsfw.com","").replace(".ytmnd.com","").replace("/","")
|
||||||
ytmnd.fetch_ytmnd(name)
|
ytmnd.fetch_ytmnd(name)
|
||||||
|
|
||||||
Reference in New Issue
Block a user