#!/usr/bin/python import sys import os import os.path import re import time import urllib2 import simplejson from optparse import OptionParser class YTMND: def __init__ (self): self.user_mode = False self.media_only = False self.html_only = False self.json_only = False self.no_web_audio = False self.print_json = False self.sleep = 5 # Scrapes sites from the profile page, then fetches them def fetch_user(self, user): if user == "": print("expecting one ytmnd name, got "+str(sys.argv)) return ytmnd_name = user ytmnd_html = urllib2.urlopen("http://ytmnd.com/users/" + ytmnd_name + "/sites").readlines() domains = [] for line in ytmnd_html: if 'profile_link' in line: expr = r"site_link\" href=\"http://(\S+).ytmn(d|sfw)?.com\"" domain = re.search(expr,line).group(1) domains.append(domain) if self.json_only: if self.media_only: os.system("mkdir -p %s" % user) os.chdir(user) parsed = [] for domain in domains: parsed.append( self.fetch_ytmnd( domain ) ) if self.media_only: os.chdir("..") self.write_json(ytmnd_name, parsed) else: print ">> found %d domains" % len( domains ) os.system("mkdir -p %s" % user) os.chdir(user) if not self.no_web_audio: self.copy_ytmnd_js() for domain in domains: self.fetch_ytmnd( domain ) os.chdir("..") # Fetches a single subdomain def fetch_ytmnd(self, domain): if domain == "": print("expecting one ytmnd name, got "+str(sys.argv)) return if not self.print_json: print "fetching %s" % domain if not self.sleep: time.sleep(self.sleep) ytmnd_name = domain ytmnd_html = urllib2.urlopen("http://" + domain + ".ytmnd.com").read() expr = r"ytmnd.site_id = (\d+);" ytmnd_id = re.search(expr,ytmnd_html).group(1) ytmnd_info = simplejson.load(urllib2.urlopen("http://" + domain + ".ytmnd.com/info/" + ytmnd_id + "/json")) if self.print_json: print simplejson.dumps(ytmnd_info, sort_keys=True, indent=4 * ' ') elif self.json_only: if self.media_only: self.fetch_media(ytmnd_info) return self.parse_json(ytmnd_info) elif self.media_only: self.fetch_media(ytmnd_info) elif self.html_only: self.write_index(ytmnd_info) else: self.fetch_media(ytmnd_info) self.write_index(ytmnd_info) return ytmnd_info # Fetches the gif and mp3 for a post def fetch_media(self, ytmnd_info): domain = ytmnd_info['site']['domain'] original_gif = ytmnd_info['site']['foreground']['url'] gif_type = original_gif.split(".")[-1] original_wav = ytmnd_info['site']['sound']['url'] wav_type = ytmnd_info['site']['sound']['type'] if 'alternates' in ytmnd_info['site']['sound']: key = ytmnd_info['site']['sound']['alternates'].keys()[0] value = ytmnd_info['site']['sound']['alternates'][key] if value['file_type'] != 'swf': original_wav = value['file_url'] wav_type = ytmnd_info['site']['sound']['file_type'] os.system("wget --quiet -O %s %s" % (domain + "." + gif_type, original_gif)) os.system("wget --quiet -O %s %s" % (domain + "." + wav_type, original_wav)) # Writes an html file emulating the ytmnd format def write_index(self, ytmnd_info): # print simplejson.dumps(ytmnd_info) domain = ytmnd_info['site']['domain'] bgcolor = ytmnd_info['site']['background']['color'] title = ytmnd_info['site']['description'] placement = ytmnd_info['site']['foreground']['placement'] original_gif = ytmnd_info['site']['foreground']['url'] gif_type = original_gif.split(".")[-1] wav_type = ytmnd_info['site']['sound']['type'] if 'alternates' in ytmnd_info['site']['sound']: key = ytmnd_info['site']['sound']['alternates'].keys()[0] value = ytmnd_info['site']['sound']['alternates'][key] if value['file_type'] != 'swf': original_wav = value['file_url'] wav_type = ytmnd_info['site']['sound']['file_type'] fn = open(domain + ".html", 'w') fn.write("\n") fn.write("
\n") fn.write("