Reorganize request code to make it a bit more robust

This commit is contained in:
Ricardo Garcia 2010-07-27 20:11:06 +02:00
parent f95f29fd25
commit 101e0d1e91

View File

@ -287,16 +287,6 @@ class FileDownloader(object):
multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
return long(round(number * multiplier)) return long(round(number * multiplier))
@staticmethod
def verify_url(url):
"""Verify a URL is valid and data could be downloaded. Return real data URL."""
request = urllib2.Request(url, None, std_headers)
data = urllib2.urlopen(request)
data.read(1)
url = data.geturl()
data.close()
return url
def add_info_extractor(self, ie): def add_info_extractor(self, ie):
"""Add an InfoExtractor object to the end of the list.""" """Add an InfoExtractor object to the end of the list."""
self._ies.append(ie) self._ies.append(ie)
@ -396,13 +386,6 @@ class FileDownloader(object):
"""Process a single dictionary returned by an InfoExtractor.""" """Process a single dictionary returned by an InfoExtractor."""
# Do nothing else if in simulate mode # Do nothing else if in simulate mode
if self.params.get('simulate', False): if self.params.get('simulate', False):
# Verify URL if it's an HTTP one
if info_dict['url'].startswith('http'):
try:
self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8')
except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err:
raise UnavailableVideoError
# Forced printings # Forced printings
if self.params.get('forcetitle', False): if self.params.get('forcetitle', False):
print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace') print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
@ -539,32 +522,43 @@ class FileDownloader(object):
count = 0 count = 0
retries = self.params.get('retries', 0) retries = self.params.get('retries', 0)
while True: while count <= retries:
# Establish connection # Establish connection
try: try:
data = urllib2.urlopen(request) data = urllib2.urlopen(request)
break break
except (urllib2.HTTPError, ), err: except (urllib2.HTTPError, ), err:
if err.code == 503: if err.code != 503 and err.code != 416:
# Retry in case of HTTP error 503 # Unexpected HTTP error
count += 1
if count <= retries:
self.report_retry(count, retries)
continue
if err.code != 416: # 416 is 'Requested range not satisfiable'
raise raise
# Unable to resume elif err.code == 416:
data = urllib2.urlopen(basic_request) # Unable to resume (requested range not satisfiable)
content_length = data.info()['Content-Length'] try:
# Open the connection again without the range header
data = urllib2.urlopen(basic_request)
content_length = data.info()['Content-Length']
except (urllib2.HTTPError, ), err:
if err.code != 503:
raise
else:
# Examine the reported length
if content_length is not None and long(content_length) == resume_len:
# The file had already been fully downloaded
self.report_file_already_downloaded(filename)
return True
else:
# The length does not match, we start the download over
self.report_unable_to_resume()
open_mode = 'wb'
break
# Retry
count += 1
if count <= retries:
self.report_retry(count, retries)
if content_length is not None and long(content_length) == resume_len: if count > retries:
# Because the file had already been fully downloaded self.trouble(u'ERROR: giving up after %s retries' % retries)
self.report_file_already_downloaded(filename) return False
return True
else:
# Because the server didn't let us
self.report_unable_to_resume()
open_mode = 'wb'
data_len = data.info().get('Content-length', None) data_len = data.info().get('Content-length', None)
data_len_str = self.format_bytes(data_len) data_len_str = self.format_bytes(data_len)