[youtube,twitch] Allow waiting for channels to become live

Closes #2597
Reject entire playlists faster with --match-filter
2024-11-14 13:13:06 +00:00 · 2022-07-26 09:33:19 +05:30 · 2022-07-26 09:33:12 +05:30 · 2022-07-26 09:11:52 +05:30 · 2022-07-26 09:09:40 +05:30
8 changed files with 116 additions and 74 deletions
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@ -80,6 +80,7 @@ from .utils import (
    RejectedVideoReached,
    SameFileError,
    UnavailableVideoError,
+    UserNotLive,
    YoutubeDLCookieProcessor,
    YoutubeDLHandler,
    YoutubeDLRedirectHandler,
@ -1309,7 +1310,7 @@ class YoutubeDL:
    def _match_entry(self, info_dict, incomplete=False, silent=False):
        """ Returns None if the file should be downloaded """

-        video_title = info_dict.get('title', info_dict.get('id', 'video'))
+        video_title = info_dict.get('title', info_dict.get('id', 'entry'))

        def check_filter():
            if 'title' in info_dict:
@ -1456,7 +1457,7 @@ class YoutubeDL:
                break
        return wrapper

-    def _wait_for_video(self, ie_result):
+    def _wait_for_video(self, ie_result={}):
        if (not self.params.get('wait_for_video')
                or ie_result.get('_type', 'video') != 'video'
                or ie_result.get('formats') or ie_result.get('url')):
@ -1480,7 +1481,7 @@ class YoutubeDL:
        if diff is None and ie_result.get('live_status') == 'is_upcoming':
            diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
            self.report_warning('Release time of video is not known')
-        elif (diff or 0) <= 0:
+        elif ie_result and (diff or 0) <= 0:
            self.report_warning('Video should already be available according to extracted info')
        diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
        self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
@ -1504,7 +1505,14 @@ class YoutubeDL:

    @_handle_extraction_exceptions
    def __extract_info(self, url, ie, download, extra_info, process):
+        try:
            ie_result = ie.extract(url)
+        except UserNotLive as e:
+            if process:
+                if self.params.get('wait_for_video'):
+                    self.report_warning(e)
+                self._wait_for_video()
+            raise
        if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
            self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
            return
@ -1677,23 +1685,37 @@ class YoutubeDL:
        return make_dir(path, self.report_error)

    @staticmethod
-    def _playlist_infodict(ie_result, **kwargs):
-        return {
-            **ie_result,
+    def _playlist_infodict(ie_result, strict=False, **kwargs):
+        info = {
+            'playlist_count': ie_result.get('playlist_count'),
            'playlist': ie_result.get('title') or ie_result.get('id'),
            'playlist_id': ie_result.get('id'),
            'playlist_title': ie_result.get('title'),
            'playlist_uploader': ie_result.get('uploader'),
            'playlist_uploader_id': ie_result.get('uploader_id'),
-            'playlist_index': 0,
            **kwargs,
        }
+        if strict:
+            return info
+        return {
+            **info,
+            'playlist_index': 0,
+            '__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),
+            'extractor': ie_result['extractor'],
+            'webpage_url': ie_result['webpage_url'],
+            'webpage_url_basename': url_basename(ie_result['webpage_url']),
+            'webpage_url_domain': get_domain(ie_result['webpage_url']),
+            'extractor_key': ie_result['extractor_key'],
+        }

    def __process_playlist(self, ie_result, download):
        """Process each entry in the playlist"""
        assert ie_result['_type'] in ('playlist', 'multi_video')

-        title = ie_result.get('title') or ie_result.get('id') or '<Untitled>'
+        common_info = self._playlist_infodict(ie_result, strict=True)
+        title = common_info.get('title') or '<Untitled>'
+        if self._match_entry(common_info, incomplete=True) is not None:
+            return
        self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')

        all_entries = PlaylistEntries(self, ie_result)
@ -1711,12 +1733,14 @@ class YoutubeDL:
            # Better to do this after potentially exhausting entries
            ie_result['playlist_count'] = all_entries.get_full_count()

+        common_info = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
+        ie_copy = collections.ChainMap(ie_result, common_info)
+
        _infojson_written = False
        write_playlist_files = self.params.get('allow_playlist_files', True)
        if write_playlist_files and self.params.get('list_thumbnails'):
            self.list_thumbnails(ie_result)
        if write_playlist_files and not self.params.get('simulate'):
-            ie_copy = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
            _infojson_written = self._write_info_json(
                'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
            if _infojson_written is None:
@ -1725,7 +1749,7 @@ class YoutubeDL:
                                       self.prepare_filename(ie_copy, 'pl_description')) is None:
                return
            # TODO: This should be passed to ThumbnailsConvertor if necessary
-            self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
+            self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))

        if lazy:
            if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
@ -1749,35 +1773,26 @@ class YoutubeDL:
        for i, (playlist_index, entry) in enumerate(entries):
            if lazy:
                resolved_entries.append((playlist_index, entry))
-
-            # TODO: Add auto-generated fields
-            if not entry or self._match_entry(entry, incomplete=True) is not None:
+            if not entry:
                continue

-            self.to_screen('[download] Downloading video %s of %s' % (
-                self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
-
            entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
            if not lazy and 'playlist-index' in self.params.get('compat_opts', []):
                playlist_index = ie_result['requested_entries'][i]

-            entry_result = self.__process_iterable_entry(entry, download, {
-                'n_entries': int_or_none(n_entries),
-                '__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),
-                'playlist_count': ie_result.get('playlist_count'),
+            extra = {
+                **common_info,
                'playlist_index': playlist_index,
                'playlist_autonumber': i + 1,
-                'playlist': title,
-                'playlist_id': ie_result.get('id'),
-                'playlist_title': ie_result.get('title'),
-                'playlist_uploader': ie_result.get('uploader'),
-                'playlist_uploader_id': ie_result.get('uploader_id'),
-                'extractor': ie_result['extractor'],
-                'webpage_url': ie_result['webpage_url'],
-                'webpage_url_basename': url_basename(ie_result['webpage_url']),
-                'webpage_url_domain': get_domain(ie_result['webpage_url']),
-                'extractor_key': ie_result['extractor_key'],
-            })
+            }
+
+            if self._match_entry(collections.ChainMap(entry, extra), incomplete=True) is not None:
+                continue
+
+            self.to_screen('[download] Downloading video %s of %s' % (
+                self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
+
+            entry_result = self.__process_iterable_entry(entry, download, extra)
            if not entry_result:
                failures += 1
            if failures >= max_failures:
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -408,6 +408,7 @@ from .dplay import (
    DiscoveryLifeIE,
    AnimalPlanetIE,
    TLCIE,
+    MotorTrendIE,
    DiscoveryPlusIndiaIE,
    DiscoveryNetworksDeIE,
    DiscoveryPlusItalyIE,
--- a/yt_dlp/extractor/crunchyroll.py
+++ b/yt_dlp/extractor/crunchyroll.py
@ -813,56 +813,36 @@ class CrunchyrollBetaIE(CrunchyrollBetaBaseIE):

        episode_response = self._download_json(
            f'{api_domain}/cms/v2{bucket}/episodes/{internal_id}', display_id,
-            note='Retrieving episode metadata',
-            query=params)
+            note='Retrieving episode metadata', query=params)
        if episode_response.get('is_premium_only') and not episode_response.get('playback'):
            raise ExtractorError('This video is for premium members only.', expected=True)
-        stream_response = self._download_json(
-            episode_response['playback'], display_id,
-            note='Retrieving stream info')

-        thumbnails = []
-        for thumbnails_data in traverse_obj(episode_response, ('images', 'thumbnail')):
-            for thumbnail_data in thumbnails_data:
-                thumbnails.append({
-                    'url': thumbnail_data.get('source'),
-                    'width': thumbnail_data.get('width'),
-                    'height': thumbnail_data.get('height'),
-                })
-        subtitles = {}
-        for lang, subtitle_data in stream_response.get('subtitles').items():
-            subtitles[lang] = [{
-                'url': subtitle_data.get('url'),
-                'ext': subtitle_data.get('format')
-            }]
+        stream_response = self._download_json(episode_response['playback'], display_id, note='Retrieving stream info')
+        get_streams = lambda name: (traverse_obj(stream_response, name) or {}).items()

        requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
        hardsub_preference = qualities(requested_hardsubs[::-1])
        requested_formats = self._configuration_arg('format') or ['adaptive_hls']

        formats = []
-        for stream_type, streams in stream_response.get('streams', {}).items():
+        for stream_type, streams in get_streams('streams'):
            if stream_type not in requested_formats:
                continue
            for stream in streams.values():
                hardsub_lang = stream.get('hardsub_locale') or ''
                if hardsub_lang.lower() not in requested_hardsubs:
                    continue
-                format_id = join_nonempty(
-                    stream_type,
-                    format_field(stream, 'hardsub_locale', 'hardsub-%s'))
+                format_id = join_nonempty(stream_type, format_field(stream, 'hardsub_locale', 'hardsub-%s'))
                if not stream.get('url'):
                    continue
-                if stream_type.split('_')[-1] == 'hls':
+                if stream_type.endswith('hls'):
                    adaptive_formats = self._extract_m3u8_formats(
                        stream['url'], display_id, 'mp4', m3u8_id=format_id,
-                        note='Downloading %s information' % format_id,
-                        fatal=False)
-                elif stream_type.split('_')[-1] == 'dash':
+                        fatal=False, note=f'Downloading {format_id} HLS manifest')
+                elif stream_type.endswith('dash'):
                    adaptive_formats = self._extract_mpd_formats(
                        stream['url'], display_id, mpd_id=format_id,
-                        note='Downloading %s information' % format_id,
-                        fatal=False)
+                        fatal=False, note=f'Downloading {format_id} MPD manifest')
                for f in adaptive_formats:
                    if f.get('acodec') != 'none':
                        f['language'] = stream_response.get('audio_locale')
@ -872,10 +852,10 @@ class CrunchyrollBetaIE(CrunchyrollBetaBaseIE):

        return {
            'id': internal_id,
-            'title': '%s Episode %s – %s' % (episode_response.get('season_title'), episode_response.get('episode'), episode_response.get('title')),
-            'description': episode_response.get('description').replace(r'\r\n', '\n'),
+            'title': '%s Episode %s – %s' % (
+                episode_response.get('season_title'), episode_response.get('episode'), episode_response.get('title')),
+            'description': try_get(episode_response, lambda x: x['description'].replace(r'\r\n', '\n')),
            'duration': float_or_none(episode_response.get('duration_ms'), 1000),
-            'thumbnails': thumbnails,
            'series': episode_response.get('series_title'),
            'series_id': episode_response.get('series_id'),
            'season': episode_response.get('season_title'),
@ -883,8 +863,18 @@ class CrunchyrollBetaIE(CrunchyrollBetaBaseIE):
            'season_number': episode_response.get('season_number'),
            'episode': episode_response.get('title'),
            'episode_number': episode_response.get('sequence_number'),
-            'subtitles': subtitles,
-            'formats': formats
+            'formats': formats,
+            'thumbnails': [{
+                'url': thumb.get('source'),
+                'width': thumb.get('width'),
+                'height': thumb.get('height'),
+            } for thumb in traverse_obj(episode_response, ('images', 'thumbnail', ..., ...)) or []],
+            'subtitles': {
+                lang: [{
+                    'url': subtitle_data.get('url'),
+                    'ext': subtitle_data.get('format')
+                }] for lang, subtitle_data in get_streams('subtitles')
+            },
        }


--- a/yt_dlp/extractor/dplay.py
+++ b/yt_dlp/extractor/dplay.py
@ -718,6 +718,33 @@ class TLCIE(DiscoveryPlusBaseIE):
    }


+class MotorTrendIE(DiscoveryPlusBaseIE):
+    _VALID_URL = r'https?://(?:watch\.)?motortrend\.com/video' + DPlayBaseIE._PATH_REGEX
+    _TESTS = [{
+        'url': 'https://watch.motortrend.com/video/car-issues-motortrend-atve-us/double-dakotas',
+        'info_dict': {
+            'id': '"4859182"',
+            'display_id': 'double-dakotas',
+            'ext': 'mp4',
+            'title': 'Double Dakotas',
+            'description': 'Tylers buy-one-get-one Dakota deal has the Wizard pulling double duty.',
+            'season_number': 2,
+            'episode_number': 3,
+        },
+        'skip': 'Available for Premium users',
+    }, {
+        'url': 'https://watch.motortrend.com/video/car-issues-motortrend-atve-us/double-dakotas',
+        'only_matching': True,
+    }]
+
+    _PRODUCT = 'vel'
+    _DISCO_API_PARAMS = {
+        'disco_host': 'us1-prod-direct.watch.motortrend.com',
+        'realm': 'go',
+        'country': 'us',
+    }
+
+
 class DiscoveryPlusIE(DiscoveryPlusBaseIE):
    _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:\w{2}/)?video' + DPlayBaseIE._PATH_REGEX
    _TESTS = [{
--- a/yt_dlp/extractor/twitch.py
+++ b/yt_dlp/extractor/twitch.py
@ -12,10 +12,11 @@ from ..compat import (
    compat_urllib_parse_urlparse,
 )
 from ..utils import (
+    ExtractorError,
+    UserNotLive,
    base_url,
    clean_html,
    dict_get,
-    ExtractorError,
    float_or_none,
    int_or_none,
    parse_duration,
@ -940,7 +941,7 @@ class TwitchStreamIE(TwitchBaseIE):
        stream = user['stream']

        if not stream:
-            raise ExtractorError('%s is offline' % channel_name, expected=True)
+            raise UserNotLive(video_id=channel_name)

        access_token = self._download_access_token(
            channel_name, 'stream', 'channelName')
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@ -22,6 +22,7 @@ from ..jsinterp import JSInterpreter
 from ..utils import (
    NO_DEFAULT,
    ExtractorError,
+    UserNotLive,
    bug_reports_message,
    classproperty,
    clean_html,
@ -5383,9 +5384,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
                selected_tab_name = 'featured'
            requested_tab_name = mobj['tab'][1:]
            if 'no-youtube-channel-redirect' not in compat_opts:
-                if requested_tab_name == 'live':
-                    # Live tab should have redirected to the video
-                    raise ExtractorError('The channel is not currently live', expected=True)
+                if requested_tab_name == 'live':  # Live tab should have redirected to the video
+                    raise UserNotLive(video_id=mobj['id'])
                if requested_tab_name not in ('', selected_tab_name):
                    redirect_warning = f'The channel does not have a {requested_tab_name} tab'
                    if not original_tab_name:
--- a/yt_dlp/postprocessor/ffmpeg.py
+++ b/yt_dlp/postprocessor/ffmpeg.py
@ -1149,9 +1149,9 @@ class FFmpegConcatPP(FFmpegPostProcessor):
        if len(in_files) < len(entries):
            raise PostProcessingError('Aborting concatenation because some downloads failed')

-        ie_copy = self._downloader._playlist_infodict(info)
        exts = traverse_obj(entries, (..., 'requested_downloads', 0, 'ext'), (..., 'ext'))
-        ie_copy['ext'] = exts[0] if len(set(exts)) == 1 else 'mkv'
+        ie_copy = collections.ChainMap({'ext': exts[0] if len(set(exts)) == 1 else 'mkv'},
+                                       info, self._downloader._playlist_infodict(info))
        out_file = self._downloader.prepare_filename(ie_copy, 'pl_video')

        files_to_delete = self.concat_files(in_files, out_file)
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@ -1072,6 +1072,14 @@ class GeoRestrictedError(ExtractorError):
        self.countries = countries


+class UserNotLive(ExtractorError):
+    """Error when a channel/user is not live"""
+
+    def __init__(self, msg=None, **kwargs):
+        kwargs['expected'] = True
+        super().__init__(msg or 'The channel is not currently live', **kwargs)
+
+
 class DownloadError(YoutubeDLError):
    """Download Error exception.

@ -3666,7 +3674,7 @@ def match_filter_func(filters):
        if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
            return NO_DEFAULT if interactive and not incomplete else None
        else:
-            video_title = info_dict.get('title') or info_dict.get('id') or 'video'
+            video_title = info_dict.get('title') or info_dict.get('id') or 'entry'
            filter_str = ') | ('.join(map(str.strip, filters))
            return f'{video_title} does not pass filter ({filter_str}), skipping ..'
    return _match_func
Author	SHA1	Message	Date
pukkandan	693f060040	[youtube,twitch] Allow waiting for channels to become live Closes #2597	2022-07-26 09:33:19 +05:30
pukkandan	3bec830a59	Reject entire playlists faster with `--match-filter` Rejected based on `playlist_id` etc can be checked before any entries are extracted Related: #4383	2022-07-26 09:33:12 +05:30
Burve	7d0f6f0c45	[extractor/Crunchyroll] Handle missing metadata correctly (#4405 ) Closes #4399 Authored by pukkandan, Burve	2022-07-26 09:11:52 +05:30
Sipherdrakon	26bafe7028	[extractor/dplay] Add MotorTrend extractor (#4446 ) Authored by: Sipherdrakon	2022-07-26 09:09:40 +05:30