diff --git a/youtube-dl b/youtube-dl index fba11fab03..b94fe4ce2d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1085,6 +1085,123 @@ class MetacafeIE(InfoExtractor): self._downloader.trouble(u'ERROR: format not available for video') +class DailymotionIE(InfoExtractor): + """Information Extractor for Dailymotion""" + + _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)' + _DISCLAIMER = '' + _FILTER_POST = '' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + @staticmethod + def suitable(url): + return (re.match(DailymotionIE._VALID_URL, url) is not None) + + def report_disclaimer(self): + """Report disclaimer retrieval.""" + self._downloader.to_stdout(u'[dailymotion] Retrieving disclaimer') + + def report_age_confirmation(self): + """Report attempt to confirm age.""" + self._downloader.to_stdout(u'[dailymotion] Confirming age') + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_stdout(u'[dailymotion] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_stdout(u'[dailymotion] %s: Extracting information' % video_id) + + def _real_initialize(self): + return + + # Retrieve disclaimer + request = urllib2.Request(self._DISCLAIMER, None, std_headers) + try: + self.report_disclaimer() + disclaimer = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err)) + return + + # Confirm age + disclaimer_form = { + 'filters': '0', + 'submit': "Continue - I'm over 18", + } + request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers) + try: + self.report_age_confirmation() + disclaimer = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err)) + return + + def _real_extract(self, url): + # Extract id and simplified title from URL + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + + video_id = mobj.group(1) + + simple_title = mobj.group(2).decode('utf-8') + video_extension = 'flv' + + # Retrieve video webpage to extract further information + request = urllib2.Request(url) + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err)) + return + + # Extract URL, uploader and title from webpage + self.report_extraction(video_id) + mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract media URL') + return + mediaURL = urllib.unquote(mobj.group(1)) + + # if needed add http://www.dailymotion.com/ if relative URL + + video_url = mediaURL + + # '' + mobj = re.search(r'(?im)