Compare commits

..

No commits in common. "09b49e1f688831c3ad7181decf38c90f8451e6c4" and "77cc7c6e6093c7b16a96870098edec70f943c62c" have entirely different histories.

9 changed files with 62 additions and 54 deletions

View File

@ -951,7 +951,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi
(currently supported: srt|vtt|ass|lrc) (currently supported: srt|vtt|ass|lrc)
(Alias: --convert-subtitles) (Alias: --convert-subtitles)
--convert-thumbnails FORMAT Convert the thumbnails to another format --convert-thumbnails FORMAT Convert the thumbnails to another format
(currently supported: jpg|png|webp) (currently supported: jpg|png)
--split-chapters Split video into multiple files based on --split-chapters Split video into multiple files based on
internal chapters. The "chapter:" prefix internal chapters. The "chapter:" prefix
can be used with "--paths" and "--output" can be used with "--paths" and "--output"
@ -982,17 +982,15 @@ You can also fork the project on github and run your fork's [build workflow](.gi
semicolon ";" delimited list of NAME=VALUE. semicolon ";" delimited list of NAME=VALUE.
The "when" argument determines when the The "when" argument determines when the
postprocessor is invoked. It can be one of postprocessor is invoked. It can be one of
"pre_process" (after video extraction), "pre_process" (after extraction),
"after_filter" (after video passes filter), "before_dl" (before video download),
"before_dl" (before each video download), "post_process" (after video download;
"post_process" (after each video download; default), "after_move" (after moving file
default), "after_move" (after moving video to their final locations), "after_video"
file to it's final locations), (after downloading and processing all
"after_video" (after downloading and formats of a video), or "playlist" (end of
processing all formats of a video), or playlist). This option can be used multiple
"playlist" (at end of playlist). This times to add different postprocessors
option can be used multiple times to add
different postprocessors
## SponsorBlock Options: ## SponsorBlock Options:
Make chapter entries for, or remove various segments (sponsor, Make chapter entries for, or remove various segments (sponsor,
@ -1665,7 +1663,6 @@ The following extractors use this feature:
#### youtubetab (YouTube playlists, channels, feeds, etc.) #### youtubetab (YouTube playlists, channels, feeds, etc.)
* `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details) * `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details)
* `approximate_date`: Extract approximate `upload_date` in flat-playlist. This may cause date-based filters to be slightly off
#### funimation #### funimation
* `language`: Languages to extract. Eg: `funimation:language=english,japanese` * `language`: Languages to extract. Eg: `funimation:language=english,japanese`

View File

@ -30,7 +30,9 @@ class YDL(FakeYDL):
self.msgs = [] self.msgs = []
def process_info(self, info_dict): def process_info(self, info_dict):
self.downloaded_info_dicts.append(info_dict.copy()) info_dict = info_dict.copy()
info_dict.pop('__original_infodict', None)
self.downloaded_info_dicts.append(info_dict)
def to_screen(self, msg): def to_screen(self, msg):
self.msgs.append(msg) self.msgs.append(msg)
@ -896,6 +898,20 @@ class TestYoutubeDL(unittest.TestCase):
os.unlink(filename) os.unlink(filename)
def test_match_filter(self): def test_match_filter(self):
class FilterYDL(YDL):
def __init__(self, *args, **kwargs):
super(FilterYDL, self).__init__(*args, **kwargs)
self.params['simulate'] = True
def process_info(self, info_dict):
super(YDL, self).process_info(info_dict)
def _match_entry(self, info_dict, incomplete=False):
res = super(FilterYDL, self)._match_entry(info_dict, incomplete)
if res is None:
self.downloaded_info_dicts.append(info_dict.copy())
return res
first = { first = {
'id': '1', 'id': '1',
'url': TEST_URL, 'url': TEST_URL,
@ -923,7 +939,7 @@ class TestYoutubeDL(unittest.TestCase):
videos = [first, second] videos = [first, second]
def get_videos(filter_=None): def get_videos(filter_=None):
ydl = YDL({'match_filter': filter_, 'simulate': True}) ydl = FilterYDL({'match_filter': filter_})
for v in videos: for v in videos:
ydl.process_ie_result(v, download=True) ydl.process_ie_result(v, download=True)
return [v['id'] for v in ydl.downloaded_info_dicts] return [v['id'] for v in ydl.downloaded_info_dicts]

View File

@ -1037,7 +1037,8 @@ class YoutubeDL(object):
@staticmethod @staticmethod
def _copy_infodict(info_dict): def _copy_infodict(info_dict):
info_dict = dict(info_dict) info_dict = dict(info_dict)
info_dict.pop('__postprocessors', None) for key in ('__original_infodict', '__postprocessors'):
info_dict.pop(key, None)
return info_dict return info_dict
def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False): def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
@ -2511,6 +2512,8 @@ class YoutubeDL(object):
if '__x_forwarded_for_ip' in info_dict: if '__x_forwarded_for_ip' in info_dict:
del info_dict['__x_forwarded_for_ip'] del info_dict['__x_forwarded_for_ip']
# TODO Central sorting goes here
if self.params.get('check_formats') is True: if self.params.get('check_formats') is True:
formats = LazyList(self._check_formats(formats[::-1]), reverse=True) formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
@ -2523,12 +2526,6 @@ class YoutubeDL(object):
info_dict, _ = self.pre_process(info_dict) info_dict, _ = self.pre_process(info_dict)
if self._match_entry(info_dict) is not None:
return info_dict
self.post_extract(info_dict)
info_dict, _ = self.pre_process(info_dict, 'after_filter')
# The pre-processors may have modified the formats # The pre-processors may have modified the formats
formats = info_dict.get('formats', [info_dict]) formats = info_dict.get('formats', [info_dict])
@ -2613,12 +2610,15 @@ class YoutubeDL(object):
+ ', '.join([f['format_id'] for f in formats_to_download])) + ', '.join([f['format_id'] for f in formats_to_download]))
max_downloads_reached = False max_downloads_reached = False
for i, fmt in enumerate(formats_to_download): for i, fmt in enumerate(formats_to_download):
formats_to_download[i] = new_info = self._copy_infodict(info_dict) formats_to_download[i] = new_info = dict(info_dict)
# Save a reference to the original info_dict so that it can be modified in process_info if needed
new_info.update(fmt) new_info.update(fmt)
new_info['__original_infodict'] = info_dict
try: try:
self.process_info(new_info) self.process_info(new_info)
except MaxDownloadsReached: except MaxDownloadsReached:
max_downloads_reached = True max_downloads_reached = True
new_info.pop('__original_infodict')
# Remove copied info # Remove copied info
for key, val in tuple(new_info.items()): for key, val in tuple(new_info.items()):
if info_dict.get(key) == val: if info_dict.get(key) == val:
@ -2826,7 +2826,7 @@ class YoutubeDL(object):
return None return None
def process_info(self, info_dict): def process_info(self, info_dict):
"""Process a single resolved IE result. (Modifies it in-place)""" """Process a single resolved IE result. (Modified it in-place)"""
assert info_dict.get('_type', 'video') == 'video' assert info_dict.get('_type', 'video') == 'video'
original_infodict = info_dict original_infodict = info_dict
@ -2834,22 +2834,18 @@ class YoutubeDL(object):
if 'format' not in info_dict and 'ext' in info_dict: if 'format' not in info_dict and 'ext' in info_dict:
info_dict['format'] = info_dict['ext'] info_dict['format'] = info_dict['ext']
# This is mostly just for backward compatibility of process_info
# As a side-effect, this allows for format-specific filters
if self._match_entry(info_dict) is not None: if self._match_entry(info_dict) is not None:
info_dict['__write_download_archive'] = 'ignore' info_dict['__write_download_archive'] = 'ignore'
return return
# Does nothing under normal operation - for backward compatibility of process_info
self.post_extract(info_dict) self.post_extract(info_dict)
self._num_downloads += 1
# info_dict['_filename'] needs to be set for backward compatibility # info_dict['_filename'] needs to be set for backward compatibility
info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True) info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
temp_filename = self.prepare_filename(info_dict, 'temp') temp_filename = self.prepare_filename(info_dict, 'temp')
files_to_move = {} files_to_move = {}
self._num_downloads += 1
# Forced printings # Forced printings
self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict)) self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
@ -3263,14 +3259,17 @@ class YoutubeDL(object):
return info_dict return info_dict
info_dict.setdefault('epoch', int(time.time())) info_dict.setdefault('epoch', int(time.time()))
info_dict.setdefault('_type', 'video') info_dict.setdefault('_type', 'video')
remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict
keep_keys = ['_type'] # Always keep this to facilitate load-info-json
if remove_private_keys: if remove_private_keys:
reject = lambda k, v: v is None or (k.startswith('_') and k != '_type') or k in { remove_keys |= {
'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries', 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber', 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
} }
reject = lambda k, v: k not in keep_keys and (
k.startswith('_') or k in remove_keys or v is None)
else: else:
reject = lambda k, v: False reject = lambda k, v: k in remove_keys
def filter_fn(obj): def filter_fn(obj):
if isinstance(obj, dict): if isinstance(obj, dict):
@ -3297,8 +3296,14 @@ class YoutubeDL(object):
actual_post_extract(video_dict or {}) actual_post_extract(video_dict or {})
return return
post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {}) post_extractor = info_dict.get('__post_extractor') or (lambda: {})
info_dict.update(post_extractor()) extra = post_extractor().items()
info_dict.update(extra)
info_dict.pop('__post_extractor', None)
original_infodict = info_dict.get('__original_infodict') or {}
original_infodict.update(extra)
original_infodict.pop('__post_extractor', None)
actual_post_extract(info_dict or {}) actual_post_extract(info_dict or {})

View File

@ -474,8 +474,8 @@ def _real_main(argv=None):
'key': 'SponsorBlock', 'key': 'SponsorBlock',
'categories': sponsorblock_query, 'categories': sponsorblock_query,
'api': opts.sponsorblock_api, 'api': opts.sponsorblock_api,
# Run this after filtering videos # Run this immediately after extraction is complete
'when': 'after_filter' 'when': 'pre_process'
}) })
if opts.parse_metadata: if opts.parse_metadata:
postprocessors.append({ postprocessors.append({

View File

@ -7,7 +7,7 @@ from .jwplatform import JWPlatformIE
class SpiegelIE(InfoExtractor): class SpiegelIE(InfoExtractor):
_UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}' _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
_VALID_URL = r'https?://(?:www\.)?(?:spiegel|manager-magazin)\.de(?:/[^/]+)+/[^/]*-(?P<id>[0-9]+|%s)(?:-embed|-iframe)?(?:\.html)?(?:$|[#?])' % _UUID_RE _VALID_URL = r'https?://(?:www\.)?(?:spiegel|manager-magazin)\.de(?:/[^/]+)+/[^/]*-(?P<id>[0-9]+|%s)(?:-embed|-iframe)?(?:\.html)?(?:#.*)?$' % _UUID_RE
_TESTS = [{ _TESTS = [{
'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html', 'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
'md5': '50c7948883ec85a3e431a0a44b7ad1d6', 'md5': '50c7948883ec85a3e431a0a44b7ad1d6',

View File

@ -844,7 +844,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'uploader': uploader, 'uploader': uploader,
'channel_id': channel_id, 'channel_id': channel_id,
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'upload_date': strftime_or_none(timestamp, '%Y%m%d') if self._configuration_arg('approximate_date', ie_key='youtubetab') else None, # 'upload_date': strftime_or_none(timestamp, '%Y%m%d'),
'live_status': ('is_upcoming' if scheduled_timestamp is not None 'live_status': ('is_upcoming' if scheduled_timestamp is not None
else 'was_live' if 'streamed' in time_text.lower() else 'was_live' if 'streamed' in time_text.lower()
else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges
@ -4244,16 +4244,6 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
if 'webpage' not in self._configuration_arg('skip'): if 'webpage' not in self._configuration_arg('skip'):
webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal) webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage) ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
# Reject webpage data if redirected to home page without explicitly requesting
selected_tab = self._extract_selected_tab(traverse_obj(
data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[])) or {}
if (url != 'https://www.youtube.com/feed/recommended'
and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
if fatal:
raise ExtractorError(msg, expected=True)
self.report_warning(msg, only_once=True)
if not data: if not data:
if not ytcfg and self.is_authenticated: if not ytcfg and self.is_authenticated:
msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.' msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'

View File

@ -1550,11 +1550,11 @@ def create_parser():
'and (optionally) arguments to be passed to it, separated by a colon ":". ' 'and (optionally) arguments to be passed to it, separated by a colon ":". '
'ARGS are a semicolon ";" delimited list of NAME=VALUE. ' 'ARGS are a semicolon ";" delimited list of NAME=VALUE. '
'The "when" argument determines when the postprocessor is invoked. ' 'The "when" argument determines when the postprocessor is invoked. '
'It can be one of "pre_process" (after video extraction), "after_filter" (after video passes filter), ' 'It can be one of "pre_process" (after extraction), '
'"before_dl" (before each video download), "post_process" (after each video download; default), ' '"before_dl" (before video download), "post_process" (after video download; default), '
'"after_move" (after moving video file to it\'s final locations), ' '"after_move" (after moving file to their final locations), '
'"after_video" (after downloading and processing all formats of a video), ' '"after_video" (after downloading and processing all formats of a video), '
'or "playlist" (at end of playlist). ' 'or "playlist" (end of playlist). '
'This option can be used multiple times to add different postprocessors')) 'This option can be used multiple times to add different postprocessors'))
sponsorblock = optparse.OptionGroup(parser, 'SponsorBlock Options', description=( sponsorblock = optparse.OptionGroup(parser, 'SponsorBlock Options', description=(

View File

@ -1055,7 +1055,7 @@ class FFmpegSplitChaptersPP(FFmpegPostProcessor):
class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor): class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor):
SUPPORTED_EXTS = ('jpg', 'png', 'webp') SUPPORTED_EXTS = ('jpg', 'png')
def __init__(self, downloader=None, format=None): def __init__(self, downloader=None, format=None):
super(FFmpegThumbnailsConvertorPP, self).__init__(downloader) super(FFmpegThumbnailsConvertorPP, self).__init__(downloader)

View File

@ -3166,7 +3166,7 @@ def qualities(quality_ids):
return q return q
POSTPROCESS_WHEN = {'pre_process', 'after_filter', 'before_dl', 'after_move', 'post_process', 'after_video', 'playlist'} POSTPROCESS_WHEN = {'pre_process', 'before_dl', 'after_move', 'post_process', 'after_video', 'playlist'}
DEFAULT_OUTTMPL = { DEFAULT_OUTTMPL = {