Compare commits

...

4 Commits

Author SHA1 Message Date
pukkandan
693f060040
[youtube,twitch] Allow waiting for channels to become live
Closes #2597
2022-07-26 09:33:19 +05:30
pukkandan
3bec830a59
Reject entire playlists faster with --match-filter
Rejected based on `playlist_id` etc can be checked before any entries are extracted

Related: #4383
2022-07-26 09:33:12 +05:30
Burve
7d0f6f0c45
[extractor/Crunchyroll] Handle missing metadata correctly (#4405)
Closes #4399

Authored by pukkandan, Burve
2022-07-26 09:11:52 +05:30
Sipherdrakon
26bafe7028
[extractor/dplay] Add MotorTrend extractor (#4446)
Authored by: Sipherdrakon
2022-07-26 09:09:40 +05:30
8 changed files with 116 additions and 74 deletions

View File

@ -80,6 +80,7 @@ from .utils import (
RejectedVideoReached, RejectedVideoReached,
SameFileError, SameFileError,
UnavailableVideoError, UnavailableVideoError,
UserNotLive,
YoutubeDLCookieProcessor, YoutubeDLCookieProcessor,
YoutubeDLHandler, YoutubeDLHandler,
YoutubeDLRedirectHandler, YoutubeDLRedirectHandler,
@ -1309,7 +1310,7 @@ class YoutubeDL:
def _match_entry(self, info_dict, incomplete=False, silent=False): def _match_entry(self, info_dict, incomplete=False, silent=False):
""" Returns None if the file should be downloaded """ """ Returns None if the file should be downloaded """
video_title = info_dict.get('title', info_dict.get('id', 'video')) video_title = info_dict.get('title', info_dict.get('id', 'entry'))
def check_filter(): def check_filter():
if 'title' in info_dict: if 'title' in info_dict:
@ -1456,7 +1457,7 @@ class YoutubeDL:
break break
return wrapper return wrapper
def _wait_for_video(self, ie_result): def _wait_for_video(self, ie_result={}):
if (not self.params.get('wait_for_video') if (not self.params.get('wait_for_video')
or ie_result.get('_type', 'video') != 'video' or ie_result.get('_type', 'video') != 'video'
or ie_result.get('formats') or ie_result.get('url')): or ie_result.get('formats') or ie_result.get('url')):
@ -1480,7 +1481,7 @@ class YoutubeDL:
if diff is None and ie_result.get('live_status') == 'is_upcoming': if diff is None and ie_result.get('live_status') == 'is_upcoming':
diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0) diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
self.report_warning('Release time of video is not known') self.report_warning('Release time of video is not known')
elif (diff or 0) <= 0: elif ie_result and (diff or 0) <= 0:
self.report_warning('Video should already be available according to extracted info') self.report_warning('Video should already be available according to extracted info')
diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf')) diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now') self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
@ -1504,7 +1505,14 @@ class YoutubeDL:
@_handle_extraction_exceptions @_handle_extraction_exceptions
def __extract_info(self, url, ie, download, extra_info, process): def __extract_info(self, url, ie, download, extra_info, process):
ie_result = ie.extract(url) try:
ie_result = ie.extract(url)
except UserNotLive as e:
if process:
if self.params.get('wait_for_video'):
self.report_warning(e)
self._wait_for_video()
raise
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here) if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}') self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
return return
@ -1677,23 +1685,37 @@ class YoutubeDL:
return make_dir(path, self.report_error) return make_dir(path, self.report_error)
@staticmethod @staticmethod
def _playlist_infodict(ie_result, **kwargs): def _playlist_infodict(ie_result, strict=False, **kwargs):
return { info = {
**ie_result, 'playlist_count': ie_result.get('playlist_count'),
'playlist': ie_result.get('title') or ie_result.get('id'), 'playlist': ie_result.get('title') or ie_result.get('id'),
'playlist_id': ie_result.get('id'), 'playlist_id': ie_result.get('id'),
'playlist_title': ie_result.get('title'), 'playlist_title': ie_result.get('title'),
'playlist_uploader': ie_result.get('uploader'), 'playlist_uploader': ie_result.get('uploader'),
'playlist_uploader_id': ie_result.get('uploader_id'), 'playlist_uploader_id': ie_result.get('uploader_id'),
'playlist_index': 0,
**kwargs, **kwargs,
} }
if strict:
return info
return {
**info,
'playlist_index': 0,
'__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),
'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
'webpage_url_basename': url_basename(ie_result['webpage_url']),
'webpage_url_domain': get_domain(ie_result['webpage_url']),
'extractor_key': ie_result['extractor_key'],
}
def __process_playlist(self, ie_result, download): def __process_playlist(self, ie_result, download):
"""Process each entry in the playlist""" """Process each entry in the playlist"""
assert ie_result['_type'] in ('playlist', 'multi_video') assert ie_result['_type'] in ('playlist', 'multi_video')
title = ie_result.get('title') or ie_result.get('id') or '<Untitled>' common_info = self._playlist_infodict(ie_result, strict=True)
title = common_info.get('title') or '<Untitled>'
if self._match_entry(common_info, incomplete=True) is not None:
return
self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}') self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
all_entries = PlaylistEntries(self, ie_result) all_entries = PlaylistEntries(self, ie_result)
@ -1711,12 +1733,14 @@ class YoutubeDL:
# Better to do this after potentially exhausting entries # Better to do this after potentially exhausting entries
ie_result['playlist_count'] = all_entries.get_full_count() ie_result['playlist_count'] = all_entries.get_full_count()
common_info = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
ie_copy = collections.ChainMap(ie_result, common_info)
_infojson_written = False _infojson_written = False
write_playlist_files = self.params.get('allow_playlist_files', True) write_playlist_files = self.params.get('allow_playlist_files', True)
if write_playlist_files and self.params.get('list_thumbnails'): if write_playlist_files and self.params.get('list_thumbnails'):
self.list_thumbnails(ie_result) self.list_thumbnails(ie_result)
if write_playlist_files and not self.params.get('simulate'): if write_playlist_files and not self.params.get('simulate'):
ie_copy = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
_infojson_written = self._write_info_json( _infojson_written = self._write_info_json(
'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson')) 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
if _infojson_written is None: if _infojson_written is None:
@ -1725,7 +1749,7 @@ class YoutubeDL:
self.prepare_filename(ie_copy, 'pl_description')) is None: self.prepare_filename(ie_copy, 'pl_description')) is None:
return return
# TODO: This should be passed to ThumbnailsConvertor if necessary # TODO: This should be passed to ThumbnailsConvertor if necessary
self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail')) self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
if lazy: if lazy:
if self.params.get('playlistreverse') or self.params.get('playlistrandom'): if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
@ -1749,35 +1773,26 @@ class YoutubeDL:
for i, (playlist_index, entry) in enumerate(entries): for i, (playlist_index, entry) in enumerate(entries):
if lazy: if lazy:
resolved_entries.append((playlist_index, entry)) resolved_entries.append((playlist_index, entry))
if not entry:
# TODO: Add auto-generated fields
if not entry or self._match_entry(entry, incomplete=True) is not None:
continue continue
self.to_screen('[download] Downloading video %s of %s' % (
self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip') entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
if not lazy and 'playlist-index' in self.params.get('compat_opts', []): if not lazy and 'playlist-index' in self.params.get('compat_opts', []):
playlist_index = ie_result['requested_entries'][i] playlist_index = ie_result['requested_entries'][i]
entry_result = self.__process_iterable_entry(entry, download, { extra = {
'n_entries': int_or_none(n_entries), **common_info,
'__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),
'playlist_count': ie_result.get('playlist_count'),
'playlist_index': playlist_index, 'playlist_index': playlist_index,
'playlist_autonumber': i + 1, 'playlist_autonumber': i + 1,
'playlist': title, }
'playlist_id': ie_result.get('id'),
'playlist_title': ie_result.get('title'), if self._match_entry(collections.ChainMap(entry, extra), incomplete=True) is not None:
'playlist_uploader': ie_result.get('uploader'), continue
'playlist_uploader_id': ie_result.get('uploader_id'),
'extractor': ie_result['extractor'], self.to_screen('[download] Downloading video %s of %s' % (
'webpage_url': ie_result['webpage_url'], self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
'webpage_url_basename': url_basename(ie_result['webpage_url']),
'webpage_url_domain': get_domain(ie_result['webpage_url']), entry_result = self.__process_iterable_entry(entry, download, extra)
'extractor_key': ie_result['extractor_key'],
})
if not entry_result: if not entry_result:
failures += 1 failures += 1
if failures >= max_failures: if failures >= max_failures:

View File

@ -408,6 +408,7 @@ from .dplay import (
DiscoveryLifeIE, DiscoveryLifeIE,
AnimalPlanetIE, AnimalPlanetIE,
TLCIE, TLCIE,
MotorTrendIE,
DiscoveryPlusIndiaIE, DiscoveryPlusIndiaIE,
DiscoveryNetworksDeIE, DiscoveryNetworksDeIE,
DiscoveryPlusItalyIE, DiscoveryPlusItalyIE,

View File

@ -813,56 +813,36 @@ class CrunchyrollBetaIE(CrunchyrollBetaBaseIE):
episode_response = self._download_json( episode_response = self._download_json(
f'{api_domain}/cms/v2{bucket}/episodes/{internal_id}', display_id, f'{api_domain}/cms/v2{bucket}/episodes/{internal_id}', display_id,
note='Retrieving episode metadata', note='Retrieving episode metadata', query=params)
query=params)
if episode_response.get('is_premium_only') and not episode_response.get('playback'): if episode_response.get('is_premium_only') and not episode_response.get('playback'):
raise ExtractorError('This video is for premium members only.', expected=True) raise ExtractorError('This video is for premium members only.', expected=True)
stream_response = self._download_json(
episode_response['playback'], display_id,
note='Retrieving stream info')
thumbnails = [] stream_response = self._download_json(episode_response['playback'], display_id, note='Retrieving stream info')
for thumbnails_data in traverse_obj(episode_response, ('images', 'thumbnail')): get_streams = lambda name: (traverse_obj(stream_response, name) or {}).items()
for thumbnail_data in thumbnails_data:
thumbnails.append({
'url': thumbnail_data.get('source'),
'width': thumbnail_data.get('width'),
'height': thumbnail_data.get('height'),
})
subtitles = {}
for lang, subtitle_data in stream_response.get('subtitles').items():
subtitles[lang] = [{
'url': subtitle_data.get('url'),
'ext': subtitle_data.get('format')
}]
requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])] requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
hardsub_preference = qualities(requested_hardsubs[::-1]) hardsub_preference = qualities(requested_hardsubs[::-1])
requested_formats = self._configuration_arg('format') or ['adaptive_hls'] requested_formats = self._configuration_arg('format') or ['adaptive_hls']
formats = [] formats = []
for stream_type, streams in stream_response.get('streams', {}).items(): for stream_type, streams in get_streams('streams'):
if stream_type not in requested_formats: if stream_type not in requested_formats:
continue continue
for stream in streams.values(): for stream in streams.values():
hardsub_lang = stream.get('hardsub_locale') or '' hardsub_lang = stream.get('hardsub_locale') or ''
if hardsub_lang.lower() not in requested_hardsubs: if hardsub_lang.lower() not in requested_hardsubs:
continue continue
format_id = join_nonempty( format_id = join_nonempty(stream_type, format_field(stream, 'hardsub_locale', 'hardsub-%s'))
stream_type,
format_field(stream, 'hardsub_locale', 'hardsub-%s'))
if not stream.get('url'): if not stream.get('url'):
continue continue
if stream_type.split('_')[-1] == 'hls': if stream_type.endswith('hls'):
adaptive_formats = self._extract_m3u8_formats( adaptive_formats = self._extract_m3u8_formats(
stream['url'], display_id, 'mp4', m3u8_id=format_id, stream['url'], display_id, 'mp4', m3u8_id=format_id,
note='Downloading %s information' % format_id, fatal=False, note=f'Downloading {format_id} HLS manifest')
fatal=False) elif stream_type.endswith('dash'):
elif stream_type.split('_')[-1] == 'dash':
adaptive_formats = self._extract_mpd_formats( adaptive_formats = self._extract_mpd_formats(
stream['url'], display_id, mpd_id=format_id, stream['url'], display_id, mpd_id=format_id,
note='Downloading %s information' % format_id, fatal=False, note=f'Downloading {format_id} MPD manifest')
fatal=False)
for f in adaptive_formats: for f in adaptive_formats:
if f.get('acodec') != 'none': if f.get('acodec') != 'none':
f['language'] = stream_response.get('audio_locale') f['language'] = stream_response.get('audio_locale')
@ -872,10 +852,10 @@ class CrunchyrollBetaIE(CrunchyrollBetaBaseIE):
return { return {
'id': internal_id, 'id': internal_id,
'title': '%s Episode %s %s' % (episode_response.get('season_title'), episode_response.get('episode'), episode_response.get('title')), 'title': '%s Episode %s %s' % (
'description': episode_response.get('description').replace(r'\r\n', '\n'), episode_response.get('season_title'), episode_response.get('episode'), episode_response.get('title')),
'description': try_get(episode_response, lambda x: x['description'].replace(r'\r\n', '\n')),
'duration': float_or_none(episode_response.get('duration_ms'), 1000), 'duration': float_or_none(episode_response.get('duration_ms'), 1000),
'thumbnails': thumbnails,
'series': episode_response.get('series_title'), 'series': episode_response.get('series_title'),
'series_id': episode_response.get('series_id'), 'series_id': episode_response.get('series_id'),
'season': episode_response.get('season_title'), 'season': episode_response.get('season_title'),
@ -883,8 +863,18 @@ class CrunchyrollBetaIE(CrunchyrollBetaBaseIE):
'season_number': episode_response.get('season_number'), 'season_number': episode_response.get('season_number'),
'episode': episode_response.get('title'), 'episode': episode_response.get('title'),
'episode_number': episode_response.get('sequence_number'), 'episode_number': episode_response.get('sequence_number'),
'subtitles': subtitles, 'formats': formats,
'formats': formats 'thumbnails': [{
'url': thumb.get('source'),
'width': thumb.get('width'),
'height': thumb.get('height'),
} for thumb in traverse_obj(episode_response, ('images', 'thumbnail', ..., ...)) or []],
'subtitles': {
lang: [{
'url': subtitle_data.get('url'),
'ext': subtitle_data.get('format')
}] for lang, subtitle_data in get_streams('subtitles')
},
} }

View File

@ -718,6 +718,33 @@ class TLCIE(DiscoveryPlusBaseIE):
} }
class MotorTrendIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:watch\.)?motortrend\.com/video' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
'url': 'https://watch.motortrend.com/video/car-issues-motortrend-atve-us/double-dakotas',
'info_dict': {
'id': '"4859182"',
'display_id': 'double-dakotas',
'ext': 'mp4',
'title': 'Double Dakotas',
'description': 'Tylers buy-one-get-one Dakota deal has the Wizard pulling double duty.',
'season_number': 2,
'episode_number': 3,
},
'skip': 'Available for Premium users',
}, {
'url': 'https://watch.motortrend.com/video/car-issues-motortrend-atve-us/double-dakotas',
'only_matching': True,
}]
_PRODUCT = 'vel'
_DISCO_API_PARAMS = {
'disco_host': 'us1-prod-direct.watch.motortrend.com',
'realm': 'go',
'country': 'us',
}
class DiscoveryPlusIE(DiscoveryPlusBaseIE): class DiscoveryPlusIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:\w{2}/)?video' + DPlayBaseIE._PATH_REGEX _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:\w{2}/)?video' + DPlayBaseIE._PATH_REGEX
_TESTS = [{ _TESTS = [{

View File

@ -12,10 +12,11 @@ from ..compat import (
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
) )
from ..utils import ( from ..utils import (
ExtractorError,
UserNotLive,
base_url, base_url,
clean_html, clean_html,
dict_get, dict_get,
ExtractorError,
float_or_none, float_or_none,
int_or_none, int_or_none,
parse_duration, parse_duration,
@ -940,7 +941,7 @@ class TwitchStreamIE(TwitchBaseIE):
stream = user['stream'] stream = user['stream']
if not stream: if not stream:
raise ExtractorError('%s is offline' % channel_name, expected=True) raise UserNotLive(video_id=channel_name)
access_token = self._download_access_token( access_token = self._download_access_token(
channel_name, 'stream', 'channelName') channel_name, 'stream', 'channelName')

View File

@ -22,6 +22,7 @@ from ..jsinterp import JSInterpreter
from ..utils import ( from ..utils import (
NO_DEFAULT, NO_DEFAULT,
ExtractorError, ExtractorError,
UserNotLive,
bug_reports_message, bug_reports_message,
classproperty, classproperty,
clean_html, clean_html,
@ -5383,9 +5384,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
selected_tab_name = 'featured' selected_tab_name = 'featured'
requested_tab_name = mobj['tab'][1:] requested_tab_name = mobj['tab'][1:]
if 'no-youtube-channel-redirect' not in compat_opts: if 'no-youtube-channel-redirect' not in compat_opts:
if requested_tab_name == 'live': if requested_tab_name == 'live': # Live tab should have redirected to the video
# Live tab should have redirected to the video raise UserNotLive(video_id=mobj['id'])
raise ExtractorError('The channel is not currently live', expected=True)
if requested_tab_name not in ('', selected_tab_name): if requested_tab_name not in ('', selected_tab_name):
redirect_warning = f'The channel does not have a {requested_tab_name} tab' redirect_warning = f'The channel does not have a {requested_tab_name} tab'
if not original_tab_name: if not original_tab_name:

View File

@ -1149,9 +1149,9 @@ class FFmpegConcatPP(FFmpegPostProcessor):
if len(in_files) < len(entries): if len(in_files) < len(entries):
raise PostProcessingError('Aborting concatenation because some downloads failed') raise PostProcessingError('Aborting concatenation because some downloads failed')
ie_copy = self._downloader._playlist_infodict(info)
exts = traverse_obj(entries, (..., 'requested_downloads', 0, 'ext'), (..., 'ext')) exts = traverse_obj(entries, (..., 'requested_downloads', 0, 'ext'), (..., 'ext'))
ie_copy['ext'] = exts[0] if len(set(exts)) == 1 else 'mkv' ie_copy = collections.ChainMap({'ext': exts[0] if len(set(exts)) == 1 else 'mkv'},
info, self._downloader._playlist_infodict(info))
out_file = self._downloader.prepare_filename(ie_copy, 'pl_video') out_file = self._downloader.prepare_filename(ie_copy, 'pl_video')
files_to_delete = self.concat_files(in_files, out_file) files_to_delete = self.concat_files(in_files, out_file)

View File

@ -1072,6 +1072,14 @@ class GeoRestrictedError(ExtractorError):
self.countries = countries self.countries = countries
class UserNotLive(ExtractorError):
"""Error when a channel/user is not live"""
def __init__(self, msg=None, **kwargs):
kwargs['expected'] = True
super().__init__(msg or 'The channel is not currently live', **kwargs)
class DownloadError(YoutubeDLError): class DownloadError(YoutubeDLError):
"""Download Error exception. """Download Error exception.
@ -3666,7 +3674,7 @@ def match_filter_func(filters):
if not filters or any(match_str(f, info_dict, incomplete) for f in filters): if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
return NO_DEFAULT if interactive and not incomplete else None return NO_DEFAULT if interactive and not incomplete else None
else: else:
video_title = info_dict.get('title') or info_dict.get('id') or 'video' video_title = info_dict.get('title') or info_dict.get('id') or 'entry'
filter_str = ') | ('.join(map(str.strip, filters)) filter_str = ') | ('.join(map(str.strip, filters))
return f'{video_title} does not pass filter ({filter_str}), skipping ..' return f'{video_title} does not pass filter ({filter_str}), skipping ..'
return _match_func return _match_func