mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-15 13:43:04 +00:00
Compare commits
No commits in common. "61544381781d35276e1e7831456c653107ac8909" and "385f7f38957e21701593ff1229295bf4ca00eba0" have entirely different histories.
6154438178
...
385f7f3895
@ -146,8 +146,8 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu
|
|||||||
* Thumbnail embedding in `mp4` is done with mutagen if possible. Use `--compat-options embed-thumbnail-atomicparsley` to force the use of AtomicParsley instead
|
* Thumbnail embedding in `mp4` is done with mutagen if possible. Use `--compat-options embed-thumbnail-atomicparsley` to force the use of AtomicParsley instead
|
||||||
* Some private fields such as filenames are removed by default from the infojson. Use `--no-clean-infojson` or `--compat-options no-clean-infojson` to revert this
|
* Some private fields such as filenames are removed by default from the infojson. Use `--no-clean-infojson` or `--compat-options no-clean-infojson` to revert this
|
||||||
* When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this
|
* When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this
|
||||||
* `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi`
|
* `certifi` will be used for SSL root certificates, if installed. If you want to use only system certificates, use `--compat-options no-certifi`
|
||||||
* youtube-dl tries to remove some superfluous punctuations from filenames. While this can sometimes be helpfull, it is often undesirable. So yt-dlp tries to keep the fields in the filenames as close to their original values as possible. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior
|
* youtube-dl tries to remove some superfluous punctuations from filenames. While this can sometimes be helpful, it is often undesirable. So yt-dlp tries to keep the fields in the filenames as close to their original values as possible. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior
|
||||||
|
|
||||||
For ease of use, a few more compat options are available:
|
For ease of use, a few more compat options are available:
|
||||||
|
|
||||||
|
@ -2377,18 +2377,13 @@ class YoutubeDL:
|
|||||||
self.report_warning('"duration" field is negative, there is an error in extractor')
|
self.report_warning('"duration" field is negative, there is an error in extractor')
|
||||||
|
|
||||||
chapters = info_dict.get('chapters') or []
|
chapters = info_dict.get('chapters') or []
|
||||||
if chapters and chapters[0].get('start_time'):
|
|
||||||
chapters.insert(0, {'start_time': 0})
|
|
||||||
|
|
||||||
dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
|
dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
|
||||||
for idx, (prev, current, next_) in enumerate(zip(
|
for prev, current, next_ in zip(
|
||||||
(dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
|
(dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)):
|
||||||
if current.get('start_time') is None:
|
if current.get('start_time') is None:
|
||||||
current['start_time'] = prev.get('end_time')
|
current['start_time'] = prev.get('end_time')
|
||||||
if not current.get('end_time'):
|
if not current.get('end_time'):
|
||||||
current['end_time'] = next_.get('start_time')
|
current['end_time'] = next_.get('start_time')
|
||||||
if not current.get('title'):
|
|
||||||
current['title'] = f'<Untitled Chapter {idx}>'
|
|
||||||
|
|
||||||
if 'playlist' not in info_dict:
|
if 'playlist' not in info_dict:
|
||||||
# It isn't part of a playlist
|
# It isn't part of a playlist
|
||||||
|
@ -111,6 +111,7 @@ from ..compat import compat_etree_fromstring
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
KNOWN_EXTENSIONS,
|
KNOWN_EXTENSIONS,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
HEADRequest,
|
||||||
UnsupportedError,
|
UnsupportedError,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
dict_get,
|
dict_get,
|
||||||
@ -123,6 +124,7 @@ from ..utils import (
|
|||||||
orderedSet,
|
orderedSet,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_resolution,
|
parse_resolution,
|
||||||
|
sanitized_Request,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
try_call,
|
try_call,
|
||||||
@ -2805,30 +2807,49 @@ class GenericIE(InfoExtractor):
|
|||||||
else:
|
else:
|
||||||
video_id = self._generic_id(url)
|
video_id = self._generic_id(url)
|
||||||
|
|
||||||
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
|
self.to_screen('%s: Requesting header' % video_id)
|
||||||
# making it impossible to download only chunk of the file (yet we need only 512kB to
|
|
||||||
# test whether it's HTML or not). According to yt-dlp default Accept-Encoding
|
head_req = HEADRequest(url)
|
||||||
# that will always result in downloading the whole file that is not desirable.
|
head_response = self._request_webpage(
|
||||||
# Therefore for extraction pass we have to override Accept-Encoding to any in order
|
head_req, video_id,
|
||||||
# to accept raw bytes and being able to download only a chunk.
|
note=False, errnote='Could not send HEAD request to %s' % url,
|
||||||
# It may probably better to solve this by checking Content-Type for application/octet-stream
|
fatal=False)
|
||||||
# after a HEAD request, but not sure if we can rely on this.
|
|
||||||
full_response = self._request_webpage(url, video_id, headers={'Accept-Encoding': '*'})
|
if head_response is not False:
|
||||||
new_url = full_response.geturl()
|
# Check for redirect
|
||||||
if url != new_url:
|
new_url = head_response.geturl()
|
||||||
self.report_following_redirect(new_url)
|
if url != new_url:
|
||||||
if force_videoid:
|
self.report_following_redirect(new_url)
|
||||||
new_url = smuggle_url(new_url, {'force_videoid': force_videoid})
|
if force_videoid:
|
||||||
return self.url_result(new_url)
|
new_url = smuggle_url(
|
||||||
|
new_url, {'force_videoid': force_videoid})
|
||||||
|
return self.url_result(new_url)
|
||||||
|
|
||||||
|
def request_webpage():
|
||||||
|
request = sanitized_Request(url)
|
||||||
|
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
|
||||||
|
# making it impossible to download only chunk of the file (yet we need only 512kB to
|
||||||
|
# test whether it's HTML or not). According to yt-dlp default Accept-Encoding
|
||||||
|
# that will always result in downloading the whole file that is not desirable.
|
||||||
|
# Therefore for extraction pass we have to override Accept-Encoding to any in order
|
||||||
|
# to accept raw bytes and being able to download only a chunk.
|
||||||
|
# It may probably better to solve this by checking Content-Type for application/octet-stream
|
||||||
|
# after HEAD request finishes, but not sure if we can rely on this.
|
||||||
|
request.add_header('Accept-Encoding', '*')
|
||||||
|
return self._request_webpage(request, video_id)
|
||||||
|
|
||||||
|
full_response = None
|
||||||
|
if head_response is False:
|
||||||
|
head_response = full_response = request_webpage()
|
||||||
|
|
||||||
info_dict = {
|
info_dict = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._generic_title(url),
|
'title': self._generic_title(url),
|
||||||
'timestamp': unified_timestamp(full_response.headers.get('Last-Modified'))
|
'timestamp': unified_timestamp(head_response.headers.get('Last-Modified'))
|
||||||
}
|
}
|
||||||
|
|
||||||
# Check for direct link to a video
|
# Check for direct link to a video
|
||||||
content_type = full_response.headers.get('Content-Type', '').lower()
|
content_type = head_response.headers.get('Content-Type', '').lower()
|
||||||
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
||||||
if m:
|
if m:
|
||||||
self.report_detected('direct video link')
|
self.report_detected('direct video link')
|
||||||
@ -2857,6 +2878,7 @@ class GenericIE(InfoExtractor):
|
|||||||
self.report_warning(
|
self.report_warning(
|
||||||
'%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
|
'%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
|
||||||
|
|
||||||
|
full_response = full_response or request_webpage()
|
||||||
first_bytes = full_response.read(512)
|
first_bytes = full_response.read(512)
|
||||||
|
|
||||||
# Is it an M3U playlist?
|
# Is it an M3U playlist?
|
||||||
@ -4081,7 +4103,7 @@ class GenericIE(InfoExtractor):
|
|||||||
webpage)
|
webpage)
|
||||||
if not found:
|
if not found:
|
||||||
# Look also in Refresh HTTP header
|
# Look also in Refresh HTTP header
|
||||||
refresh_header = full_response.headers.get('Refresh')
|
refresh_header = head_response.headers.get('Refresh')
|
||||||
if refresh_header:
|
if refresh_header:
|
||||||
found = re.search(REDIRECT_REGEX, refresh_header)
|
found = re.search(REDIRECT_REGEX, refresh_header)
|
||||||
if found:
|
if found:
|
||||||
|
@ -2764,15 +2764,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
if not strict:
|
if not strict:
|
||||||
chapter_list.sort(key=lambda c: c['start_time'] or 0)
|
chapter_list.sort(key=lambda c: c['start_time'] or 0)
|
||||||
|
|
||||||
chapters = [{'start_time': 0}]
|
chapters = [{'start_time': 0, 'title': '<Untitled>'}]
|
||||||
for idx, chapter in enumerate(chapter_list):
|
for idx, chapter in enumerate(chapter_list):
|
||||||
if chapter['start_time'] is None:
|
if chapter['start_time'] is None or not chapter['title']:
|
||||||
self.report_warning(f'Incomplete chapter {idx}')
|
self.report_warning(f'Incomplete chapter {idx}')
|
||||||
elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
|
elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
|
||||||
|
chapters[-1]['end_time'] = chapter['start_time']
|
||||||
chapters.append(chapter)
|
chapters.append(chapter)
|
||||||
else:
|
else:
|
||||||
self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')
|
self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')
|
||||||
return chapters[1:]
|
chapters[-1]['end_time'] = duration
|
||||||
|
return chapters if len(chapters) > 1 and chapters[1]['start_time'] else chapters[1:]
|
||||||
|
|
||||||
def _extract_comment(self, comment_renderer, parent=None):
|
def _extract_comment(self, comment_renderer, parent=None):
|
||||||
comment_id = comment_renderer.get('commentId')
|
comment_id = comment_renderer.get('commentId')
|
||||||
|
@ -950,18 +950,17 @@ def make_HTTPS_handler(params, **kwargs):
|
|||||||
if opts_check_certificate:
|
if opts_check_certificate:
|
||||||
if has_certifi and 'no-certifi' not in params.get('compat_opts', []):
|
if has_certifi and 'no-certifi' not in params.get('compat_opts', []):
|
||||||
context.load_verify_locations(cafile=certifi.where())
|
context.load_verify_locations(cafile=certifi.where())
|
||||||
else:
|
try:
|
||||||
try:
|
context.load_default_certs()
|
||||||
context.load_default_certs()
|
# Work around the issue in load_default_certs when there are bad certificates. See:
|
||||||
# Work around the issue in load_default_certs when there are bad certificates. See:
|
# https://github.com/yt-dlp/yt-dlp/issues/1060,
|
||||||
# https://github.com/yt-dlp/yt-dlp/issues/1060,
|
# https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
|
||||||
# https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
|
except ssl.SSLError:
|
||||||
except ssl.SSLError:
|
# enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
|
||||||
# enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
|
if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
|
||||||
if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
|
for storename in ('CA', 'ROOT'):
|
||||||
for storename in ('CA', 'ROOT'):
|
_ssl_load_windows_store_certs(context, storename)
|
||||||
_ssl_load_windows_store_certs(context, storename)
|
context.set_default_verify_paths()
|
||||||
context.set_default_verify_paths()
|
|
||||||
|
|
||||||
client_certfile = params.get('client_certificate')
|
client_certfile = params.get('client_certificate')
|
||||||
if client_certfile:
|
if client_certfile:
|
||||||
|
Loading…
Reference in New Issue
Block a user