[youtube,twitch] Allow waiting for channels to become live

Closes #2597
Reject entire playlists faster with --match-filter
2024-11-14 21:23:05 +00:00 · 2022-07-26 09:33:19 +05:30 · 2022-07-26 09:33:12 +05:30 · 2022-07-26 09:11:52 +05:30 · 2022-07-26 09:09:40 +05:30
8 changed files with 116 additions and 74 deletions
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@ -80,6 +80,7 @@ from .utils import (
    RejectedVideoReached,
    SameFileError,
    UnavailableVideoError,
    UserNotLive,
    YoutubeDLCookieProcessor,
    YoutubeDLHandler,
    YoutubeDLRedirectHandler,
@ -1309,7 +1310,7 @@ class YoutubeDL:
    def _match_entry(self, info_dict, incomplete=False, silent=False):
        """ Returns None if the file should be downloaded """
-        video_title = info_dict.get('title', info_dict.get('id', 'video'))
+        video_title = info_dict.get('title', info_dict.get('id', 'entry'))
        def check_filter():
            if 'title' in info_dict:
@ -1456,7 +1457,7 @@ class YoutubeDL:
                break
        return wrapper
-    def _wait_for_video(self, ie_result):
+    def _wait_for_video(self, ie_result={}):
        if (not self.params.get('wait_for_video')
                or ie_result.get('_type', 'video') != 'video'
                or ie_result.get('formats') or ie_result.get('url')):
@ -1480,7 +1481,7 @@ class YoutubeDL:
        if diff is None and ie_result.get('live_status') == 'is_upcoming':
            diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
            self.report_warning('Release time of video is not known')
-        elif (diff or 0) <= 0:
+        elif ie_result and (diff or 0) <= 0:
            self.report_warning('Video should already be available according to extracted info')
        diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
        self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
@ -1504,7 +1505,14 @@ class YoutubeDL:
    @_handle_extraction_exceptions
    def __extract_info(self, url, ie, download, extra_info, process):
-        ie_result = ie.extract(url)
+        try:
            ie_result = ie.extract(url)
        except UserNotLive as e:
            if process:
                if self.params.get('wait_for_video'):
                    self.report_warning(e)
                self._wait_for_video()
            raise
        if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
            self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
            return
@ -1677,23 +1685,37 @@ class YoutubeDL:
        return make_dir(path, self.report_error)
    @staticmethod
-    def _playlist_infodict(ie_result, **kwargs):
+    def _playlist_infodict(ie_result, strict=False, **kwargs):
-        return {
+        info = {
-            **ie_result,
+            'playlist_count': ie_result.get('playlist_count'),
            'playlist': ie_result.get('title') or ie_result.get('id'),
            'playlist_id': ie_result.get('id'),
            'playlist_title': ie_result.get('title'),
            'playlist_uploader': ie_result.get('uploader'),
            'playlist_uploader_id': ie_result.get('uploader_id'),
            'playlist_index': 0,
            **kwargs,
        }
        if strict:
            return info
        return {
            **info,
            'playlist_index': 0,
            '__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),
            'extractor': ie_result['extractor'],
            'webpage_url': ie_result['webpage_url'],
            'webpage_url_basename': url_basename(ie_result['webpage_url']),
            'webpage_url_domain': get_domain(ie_result['webpage_url']),
            'extractor_key': ie_result['extractor_key'],
        }
    def __process_playlist(self, ie_result, download):
        """Process each entry in the playlist"""
        assert ie_result['_type'] in ('playlist', 'multi_video')
-        title = ie_result.get('title') or ie_result.get('id') or '<Untitled>'
+        common_info = self._playlist_infodict(ie_result, strict=True)
        title = common_info.get('title') or '<Untitled>'
        if self._match_entry(common_info, incomplete=True) is not None:
            return
        self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
        all_entries = PlaylistEntries(self, ie_result)
@ -1711,12 +1733,14 @@ class YoutubeDL:
            # Better to do this after potentially exhausting entries
            ie_result['playlist_count'] = all_entries.get_full_count()
        common_info = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
        ie_copy = collections.ChainMap(ie_result, common_info)
        _infojson_written = False
        write_playlist_files = self.params.get('allow_playlist_files', True)
        if write_playlist_files and self.params.get('list_thumbnails'):
            self.list_thumbnails(ie_result)
        if write_playlist_files and not self.params.get('simulate'):
            ie_copy = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
            _infojson_written = self._write_info_json(
                'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
            if _infojson_written is None:
@ -1725,7 +1749,7 @@ class YoutubeDL:
                                       self.prepare_filename(ie_copy, 'pl_description')) is None:
                return
            # TODO: This should be passed to ThumbnailsConvertor if necessary
-            self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
+            self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
        if lazy:
            if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
@ -1749,35 +1773,26 @@ class YoutubeDL:
        for i, (playlist_index, entry) in enumerate(entries):
            if lazy:
                resolved_entries.append((playlist_index, entry))
-
+            if not entry:
            # TODO: Add auto-generated fields
            if not entry or self._match_entry(entry, incomplete=True) is not None:
                continue
            self.to_screen('[download] Downloading video %s of %s' % (
                self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
            entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
            if not lazy and 'playlist-index' in self.params.get('compat_opts', []):
                playlist_index = ie_result['requested_entries'][i]
-            entry_result = self.__process_iterable_entry(entry, download, {
+            extra = {
-                'n_entries': int_or_none(n_entries),
+                **common_info,
                '__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),
                'playlist_count': ie_result.get('playlist_count'),
                'playlist_index': playlist_index,
                'playlist_autonumber': i + 1,
-                'playlist': title,
+            }
-                'playlist_id': ie_result.get('id'),
+
-                'playlist_title': ie_result.get('title'),
+            if self._match_entry(collections.ChainMap(entry, extra), incomplete=True) is not None:
-                'playlist_uploader': ie_result.get('uploader'),
+                continue
-                'playlist_uploader_id': ie_result.get('uploader_id'),
+
-                'extractor': ie_result['extractor'],
+            self.to_screen('[download] Downloading video %s of %s' % (
-                'webpage_url': ie_result['webpage_url'],
+                self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
-                'webpage_url_basename': url_basename(ie_result['webpage_url']),
+
-                'webpage_url_domain': get_domain(ie_result['webpage_url']),
+            entry_result = self.__process_iterable_entry(entry, download, extra)
                'extractor_key': ie_result['extractor_key'],
            })
            if not entry_result:
                failures += 1
            if failures >= max_failures:
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -408,6 +408,7 @@ from .dplay import (
    DiscoveryLifeIE,
    AnimalPlanetIE,
    TLCIE,
    MotorTrendIE,
    DiscoveryPlusIndiaIE,
    DiscoveryNetworksDeIE,
    DiscoveryPlusItalyIE,
--- a/yt_dlp/extractor/crunchyroll.py
+++ b/yt_dlp/extractor/crunchyroll.py
@ -813,56 +813,36 @@ class CrunchyrollBetaIE(CrunchyrollBetaBaseIE):
        episode_response = self._download_json(
            f'{api_domain}/cms/v2{bucket}/episodes/{internal_id}', display_id,
-            note='Retrieving episode metadata',
+            note='Retrieving episode metadata', query=params)
            query=params)
        if episode_response.get('is_premium_only') and not episode_response.get('playback'):
            raise ExtractorError('This video is for premium members only.', expected=True)
        stream_response = self._download_json(
            episode_response['playback'], display_id,
            note='Retrieving stream info')
-        thumbnails = []
+        stream_response = self._download_json(episode_response['playback'], display_id, note='Retrieving stream info')
-        for thumbnails_data in traverse_obj(episode_response, ('images', 'thumbnail')):
+        get_streams = lambda name: (traverse_obj(stream_response, name) or {}).items()
            for thumbnail_data in thumbnails_data:
                thumbnails.append({
                    'url': thumbnail_data.get('source'),
                    'width': thumbnail_data.get('width'),
                    'height': thumbnail_data.get('height'),
                })
        subtitles = {}
        for lang, subtitle_data in stream_response.get('subtitles').items():
            subtitles[lang] = [{
                'url': subtitle_data.get('url'),
                'ext': subtitle_data.get('format')
            }]
        requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
        hardsub_preference = qualities(requested_hardsubs[::-1])
        requested_formats = self._configuration_arg('format') or ['adaptive_hls']
        formats = []
-        for stream_type, streams in stream_response.get('streams', {}).items():
+        for stream_type, streams in get_streams('streams'):
            if stream_type not in requested_formats:
                continue
            for stream in streams.values():
                hardsub_lang = stream.get('hardsub_locale') or ''
                if hardsub_lang.lower() not in requested_hardsubs:
                    continue
-                format_id = join_nonempty(
+                format_id = join_nonempty(stream_type, format_field(stream, 'hardsub_locale', 'hardsub-%s'))
                    stream_type,
                    format_field(stream, 'hardsub_locale', 'hardsub-%s'))
                if not stream.get('url'):
                    continue
-                if stream_type.split('_')[-1] == 'hls':
+                if stream_type.endswith('hls'):
                    adaptive_formats = self._extract_m3u8_formats(
                        stream['url'], display_id, 'mp4', m3u8_id=format_id,
-                        note='Downloading %s information' % format_id,
+                        fatal=False, note=f'Downloading {format_id} HLS manifest')
-                        fatal=False)
+                elif stream_type.endswith('dash'):
                elif stream_type.split('_')[-1] == 'dash':
                    adaptive_formats = self._extract_mpd_formats(
                        stream['url'], display_id, mpd_id=format_id,
-                        note='Downloading %s information' % format_id,
+                        fatal=False, note=f'Downloading {format_id} MPD manifest')
                        fatal=False)
                for f in adaptive_formats:
                    if f.get('acodec') != 'none':
                        f['language'] = stream_response.get('audio_locale')
@ -872,10 +852,10 @@ class CrunchyrollBetaIE(CrunchyrollBetaBaseIE):
        return {
            'id': internal_id,
-            'title': '%s Episode %s – %s' % (episode_response.get('season_title'), episode_response.get('episode'), episode_response.get('title')),
+            'title': '%s Episode %s – %s' % (
-            'description': episode_response.get('description').replace(r'\r\n', '\n'),
+                episode_response.get('season_title'), episode_response.get('episode'), episode_response.get('title')),
            'description': try_get(episode_response, lambda x: x['description'].replace(r'\r\n', '\n')),
            'duration': float_or_none(episode_response.get('duration_ms'), 1000),
            'thumbnails': thumbnails,
            'series': episode_response.get('series_title'),
            'series_id': episode_response.get('series_id'),
            'season': episode_response.get('season_title'),
@ -883,8 +863,18 @@ class CrunchyrollBetaIE(CrunchyrollBetaBaseIE):
            'season_number': episode_response.get('season_number'),
            'episode': episode_response.get('title'),
            'episode_number': episode_response.get('sequence_number'),
-            'subtitles': subtitles,
+            'formats': formats,
-            'formats': formats
+            'thumbnails': [{
                'url': thumb.get('source'),
                'width': thumb.get('width'),
                'height': thumb.get('height'),
            } for thumb in traverse_obj(episode_response, ('images', 'thumbnail', ..., ...)) or []],
            'subtitles': {
                lang: [{
                    'url': subtitle_data.get('url'),
                    'ext': subtitle_data.get('format')
                }] for lang, subtitle_data in get_streams('subtitles')
            },
        }
--- a/yt_dlp/extractor/dplay.py
+++ b/yt_dlp/extractor/dplay.py
@ -718,6 +718,33 @@ class TLCIE(DiscoveryPlusBaseIE):
    }
 class MotorTrendIE(DiscoveryPlusBaseIE):
    _VALID_URL = r'https?://(?:watch\.)?motortrend\.com/video' + DPlayBaseIE._PATH_REGEX
    _TESTS = [{
        'url': 'https://watch.motortrend.com/video/car-issues-motortrend-atve-us/double-dakotas',
        'info_dict': {
            'id': '"4859182"',
            'display_id': 'double-dakotas',
            'ext': 'mp4',
            'title': 'Double Dakotas',
            'description': 'Tylers buy-one-get-one Dakota deal has the Wizard pulling double duty.',
            'season_number': 2,
            'episode_number': 3,
        },
        'skip': 'Available for Premium users',
    }, {
        'url': 'https://watch.motortrend.com/video/car-issues-motortrend-atve-us/double-dakotas',
        'only_matching': True,
    }]
    _PRODUCT = 'vel'
    _DISCO_API_PARAMS = {
        'disco_host': 'us1-prod-direct.watch.motortrend.com',
        'realm': 'go',
        'country': 'us',
    }
 class DiscoveryPlusIE(DiscoveryPlusBaseIE):
    _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:\w{2}/)?video' + DPlayBaseIE._PATH_REGEX
    _TESTS = [{
--- a/yt_dlp/extractor/twitch.py
+++ b/yt_dlp/extractor/twitch.py
@ -12,10 +12,11 @@ from ..compat import (
    compat_urllib_parse_urlparse,
 )
 from ..utils import (
    ExtractorError,
    UserNotLive,
    base_url,
    clean_html,
    dict_get,
    ExtractorError,
    float_or_none,
    int_or_none,
    parse_duration,
@ -940,7 +941,7 @@ class TwitchStreamIE(TwitchBaseIE):
        stream = user['stream']
        if not stream:
-            raise ExtractorError('%s is offline' % channel_name, expected=True)
+            raise UserNotLive(video_id=channel_name)
        access_token = self._download_access_token(
            channel_name, 'stream', 'channelName')
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@ -22,6 +22,7 @@ from ..jsinterp import JSInterpreter
 from ..utils import (
    NO_DEFAULT,
    ExtractorError,
    UserNotLive,
    bug_reports_message,
    classproperty,
    clean_html,
@ -5383,9 +5384,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
                selected_tab_name = 'featured'
            requested_tab_name = mobj['tab'][1:]
            if 'no-youtube-channel-redirect' not in compat_opts:
-                if requested_tab_name == 'live':
+                if requested_tab_name == 'live':  # Live tab should have redirected to the video
-                    # Live tab should have redirected to the video
+                    raise UserNotLive(video_id=mobj['id'])
                    raise ExtractorError('The channel is not currently live', expected=True)
                if requested_tab_name not in ('', selected_tab_name):
                    redirect_warning = f'The channel does not have a {requested_tab_name} tab'
                    if not original_tab_name:
--- a/yt_dlp/postprocessor/ffmpeg.py
+++ b/yt_dlp/postprocessor/ffmpeg.py
@ -1149,9 +1149,9 @@ class FFmpegConcatPP(FFmpegPostProcessor):
        if len(in_files) < len(entries):
            raise PostProcessingError('Aborting concatenation because some downloads failed')
        ie_copy = self._downloader._playlist_infodict(info)
        exts = traverse_obj(entries, (..., 'requested_downloads', 0, 'ext'), (..., 'ext'))
-        ie_copy['ext'] = exts[0] if len(set(exts)) == 1 else 'mkv'
+        ie_copy = collections.ChainMap({'ext': exts[0] if len(set(exts)) == 1 else 'mkv'},
                                       info, self._downloader._playlist_infodict(info))
        out_file = self._downloader.prepare_filename(ie_copy, 'pl_video')
        files_to_delete = self.concat_files(in_files, out_file)
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@ -1072,6 +1072,14 @@ class GeoRestrictedError(ExtractorError):
        self.countries = countries
 class UserNotLive(ExtractorError):
    """Error when a channel/user is not live"""
    def __init__(self, msg=None, **kwargs):
        kwargs['expected'] = True
        super().__init__(msg or 'The channel is not currently live', **kwargs)
 class DownloadError(YoutubeDLError):
    """Download Error exception.
@ -3666,7 +3674,7 @@ def match_filter_func(filters):
        if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
            return NO_DEFAULT if interactive and not incomplete else None
        else:
-            video_title = info_dict.get('title') or info_dict.get('id') or 'video'
+            video_title = info_dict.get('title') or info_dict.get('id') or 'entry'
            filter_str = ') | ('.join(map(str.strip, filters))
            return f'{video_title} does not pass filter ({filter_str}), skipping ..'
    return _match_func
Author	SHA1	Message	Date
pukkandan	693f060040	[youtube,twitch] Allow waiting for channels to become live Closes #2597	2022-07-26 09:33:19 +05:30
pukkandan	3bec830a59	Reject entire playlists faster with `--match-filter` Rejected based on `playlist_id` etc can be checked before any entries are extracted Related: #4383	2022-07-26 09:33:12 +05:30
Burve	7d0f6f0c45	[extractor/Crunchyroll] Handle missing metadata correctly (#4405 ) Closes #4399 Authored by pukkandan, Burve	2022-07-26 09:11:52 +05:30
Sipherdrakon	26bafe7028	[extractor/dplay] Add MotorTrend extractor (#4446 ) Authored by: Sipherdrakon	2022-07-26 09:09:40 +05:30