Compare commits

...

4 Commits

Author SHA1 Message Date
pukkandan
693f060040
[youtube,twitch] Allow waiting for channels to become live
Closes #2597
2022-07-26 09:33:19 +05:30
pukkandan
3bec830a59
Reject entire playlists faster with --match-filter
Rejected based on `playlist_id` etc can be checked before any entries are extracted

Related: #4383
2022-07-26 09:33:12 +05:30
Burve
7d0f6f0c45
[extractor/Crunchyroll] Handle missing metadata correctly (#4405)
Closes #4399

Authored by pukkandan, Burve
2022-07-26 09:11:52 +05:30
Sipherdrakon
26bafe7028
[extractor/dplay] Add MotorTrend extractor (#4446)
Authored by: Sipherdrakon
2022-07-26 09:09:40 +05:30
8 changed files with 116 additions and 74 deletions

View File

@ -80,6 +80,7 @@ from .utils import (
RejectedVideoReached,
SameFileError,
UnavailableVideoError,
UserNotLive,
YoutubeDLCookieProcessor,
YoutubeDLHandler,
YoutubeDLRedirectHandler,
@ -1309,7 +1310,7 @@ class YoutubeDL:
def _match_entry(self, info_dict, incomplete=False, silent=False):
""" Returns None if the file should be downloaded """
video_title = info_dict.get('title', info_dict.get('id', 'video'))
video_title = info_dict.get('title', info_dict.get('id', 'entry'))
def check_filter():
if 'title' in info_dict:
@ -1456,7 +1457,7 @@ class YoutubeDL:
break
return wrapper
def _wait_for_video(self, ie_result):
def _wait_for_video(self, ie_result={}):
if (not self.params.get('wait_for_video')
or ie_result.get('_type', 'video') != 'video'
or ie_result.get('formats') or ie_result.get('url')):
@ -1480,7 +1481,7 @@ class YoutubeDL:
if diff is None and ie_result.get('live_status') == 'is_upcoming':
diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
self.report_warning('Release time of video is not known')
elif (diff or 0) <= 0:
elif ie_result and (diff or 0) <= 0:
self.report_warning('Video should already be available according to extracted info')
diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
@ -1504,7 +1505,14 @@ class YoutubeDL:
@_handle_extraction_exceptions
def __extract_info(self, url, ie, download, extra_info, process):
try:
ie_result = ie.extract(url)
except UserNotLive as e:
if process:
if self.params.get('wait_for_video'):
self.report_warning(e)
self._wait_for_video()
raise
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
return
@ -1677,23 +1685,37 @@ class YoutubeDL:
return make_dir(path, self.report_error)
@staticmethod
def _playlist_infodict(ie_result, **kwargs):
return {
**ie_result,
def _playlist_infodict(ie_result, strict=False, **kwargs):
info = {
'playlist_count': ie_result.get('playlist_count'),
'playlist': ie_result.get('title') or ie_result.get('id'),
'playlist_id': ie_result.get('id'),
'playlist_title': ie_result.get('title'),
'playlist_uploader': ie_result.get('uploader'),
'playlist_uploader_id': ie_result.get('uploader_id'),
'playlist_index': 0,
**kwargs,
}
if strict:
return info
return {
**info,
'playlist_index': 0,
'__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),
'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
'webpage_url_basename': url_basename(ie_result['webpage_url']),
'webpage_url_domain': get_domain(ie_result['webpage_url']),
'extractor_key': ie_result['extractor_key'],
}
def __process_playlist(self, ie_result, download):
"""Process each entry in the playlist"""
assert ie_result['_type'] in ('playlist', 'multi_video')
title = ie_result.get('title') or ie_result.get('id') or '<Untitled>'
common_info = self._playlist_infodict(ie_result, strict=True)
title = common_info.get('title') or '<Untitled>'
if self._match_entry(common_info, incomplete=True) is not None:
return
self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
all_entries = PlaylistEntries(self, ie_result)
@ -1711,12 +1733,14 @@ class YoutubeDL:
# Better to do this after potentially exhausting entries
ie_result['playlist_count'] = all_entries.get_full_count()
common_info = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
ie_copy = collections.ChainMap(ie_result, common_info)
_infojson_written = False
write_playlist_files = self.params.get('allow_playlist_files', True)
if write_playlist_files and self.params.get('list_thumbnails'):
self.list_thumbnails(ie_result)
if write_playlist_files and not self.params.get('simulate'):
ie_copy = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
_infojson_written = self._write_info_json(
'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
if _infojson_written is None:
@ -1725,7 +1749,7 @@ class YoutubeDL:
self.prepare_filename(ie_copy, 'pl_description')) is None:
return
# TODO: This should be passed to ThumbnailsConvertor if necessary
self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
if lazy:
if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
@ -1749,35 +1773,26 @@ class YoutubeDL:
for i, (playlist_index, entry) in enumerate(entries):
if lazy:
resolved_entries.append((playlist_index, entry))
# TODO: Add auto-generated fields
if not entry or self._match_entry(entry, incomplete=True) is not None:
if not entry:
continue
self.to_screen('[download] Downloading video %s of %s' % (
self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
if not lazy and 'playlist-index' in self.params.get('compat_opts', []):
playlist_index = ie_result['requested_entries'][i]
entry_result = self.__process_iterable_entry(entry, download, {
'n_entries': int_or_none(n_entries),
'__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),
'playlist_count': ie_result.get('playlist_count'),
extra = {
**common_info,
'playlist_index': playlist_index,
'playlist_autonumber': i + 1,
'playlist': title,
'playlist_id': ie_result.get('id'),
'playlist_title': ie_result.get('title'),
'playlist_uploader': ie_result.get('uploader'),
'playlist_uploader_id': ie_result.get('uploader_id'),
'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
'webpage_url_basename': url_basename(ie_result['webpage_url']),
'webpage_url_domain': get_domain(ie_result['webpage_url']),
'extractor_key': ie_result['extractor_key'],
})
}
if self._match_entry(collections.ChainMap(entry, extra), incomplete=True) is not None:
continue
self.to_screen('[download] Downloading video %s of %s' % (
self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
entry_result = self.__process_iterable_entry(entry, download, extra)
if not entry_result:
failures += 1
if failures >= max_failures:

View File

@ -408,6 +408,7 @@ from .dplay import (
DiscoveryLifeIE,
AnimalPlanetIE,
TLCIE,
MotorTrendIE,
DiscoveryPlusIndiaIE,
DiscoveryNetworksDeIE,
DiscoveryPlusItalyIE,

View File

@ -813,56 +813,36 @@ class CrunchyrollBetaIE(CrunchyrollBetaBaseIE):
episode_response = self._download_json(
f'{api_domain}/cms/v2{bucket}/episodes/{internal_id}', display_id,
note='Retrieving episode metadata',
query=params)
note='Retrieving episode metadata', query=params)
if episode_response.get('is_premium_only') and not episode_response.get('playback'):
raise ExtractorError('This video is for premium members only.', expected=True)
stream_response = self._download_json(
episode_response['playback'], display_id,
note='Retrieving stream info')
thumbnails = []
for thumbnails_data in traverse_obj(episode_response, ('images', 'thumbnail')):
for thumbnail_data in thumbnails_data:
thumbnails.append({
'url': thumbnail_data.get('source'),
'width': thumbnail_data.get('width'),
'height': thumbnail_data.get('height'),
})
subtitles = {}
for lang, subtitle_data in stream_response.get('subtitles').items():
subtitles[lang] = [{
'url': subtitle_data.get('url'),
'ext': subtitle_data.get('format')
}]
stream_response = self._download_json(episode_response['playback'], display_id, note='Retrieving stream info')
get_streams = lambda name: (traverse_obj(stream_response, name) or {}).items()
requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
hardsub_preference = qualities(requested_hardsubs[::-1])
requested_formats = self._configuration_arg('format') or ['adaptive_hls']
formats = []
for stream_type, streams in stream_response.get('streams', {}).items():
for stream_type, streams in get_streams('streams'):
if stream_type not in requested_formats:
continue
for stream in streams.values():
hardsub_lang = stream.get('hardsub_locale') or ''
if hardsub_lang.lower() not in requested_hardsubs:
continue
format_id = join_nonempty(
stream_type,
format_field(stream, 'hardsub_locale', 'hardsub-%s'))
format_id = join_nonempty(stream_type, format_field(stream, 'hardsub_locale', 'hardsub-%s'))
if not stream.get('url'):
continue
if stream_type.split('_')[-1] == 'hls':
if stream_type.endswith('hls'):
adaptive_formats = self._extract_m3u8_formats(
stream['url'], display_id, 'mp4', m3u8_id=format_id,
note='Downloading %s information' % format_id,
fatal=False)
elif stream_type.split('_')[-1] == 'dash':
fatal=False, note=f'Downloading {format_id} HLS manifest')
elif stream_type.endswith('dash'):
adaptive_formats = self._extract_mpd_formats(
stream['url'], display_id, mpd_id=format_id,
note='Downloading %s information' % format_id,
fatal=False)
fatal=False, note=f'Downloading {format_id} MPD manifest')
for f in adaptive_formats:
if f.get('acodec') != 'none':
f['language'] = stream_response.get('audio_locale')
@ -872,10 +852,10 @@ class CrunchyrollBetaIE(CrunchyrollBetaBaseIE):
return {
'id': internal_id,
'title': '%s Episode %s %s' % (episode_response.get('season_title'), episode_response.get('episode'), episode_response.get('title')),
'description': episode_response.get('description').replace(r'\r\n', '\n'),
'title': '%s Episode %s %s' % (
episode_response.get('season_title'), episode_response.get('episode'), episode_response.get('title')),
'description': try_get(episode_response, lambda x: x['description'].replace(r'\r\n', '\n')),
'duration': float_or_none(episode_response.get('duration_ms'), 1000),
'thumbnails': thumbnails,
'series': episode_response.get('series_title'),
'series_id': episode_response.get('series_id'),
'season': episode_response.get('season_title'),
@ -883,8 +863,18 @@ class CrunchyrollBetaIE(CrunchyrollBetaBaseIE):
'season_number': episode_response.get('season_number'),
'episode': episode_response.get('title'),
'episode_number': episode_response.get('sequence_number'),
'subtitles': subtitles,
'formats': formats
'formats': formats,
'thumbnails': [{
'url': thumb.get('source'),
'width': thumb.get('width'),
'height': thumb.get('height'),
} for thumb in traverse_obj(episode_response, ('images', 'thumbnail', ..., ...)) or []],
'subtitles': {
lang: [{
'url': subtitle_data.get('url'),
'ext': subtitle_data.get('format')
}] for lang, subtitle_data in get_streams('subtitles')
},
}

View File

@ -718,6 +718,33 @@ class TLCIE(DiscoveryPlusBaseIE):
}
class MotorTrendIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:watch\.)?motortrend\.com/video' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
'url': 'https://watch.motortrend.com/video/car-issues-motortrend-atve-us/double-dakotas',
'info_dict': {
'id': '"4859182"',
'display_id': 'double-dakotas',
'ext': 'mp4',
'title': 'Double Dakotas',
'description': 'Tylers buy-one-get-one Dakota deal has the Wizard pulling double duty.',
'season_number': 2,
'episode_number': 3,
},
'skip': 'Available for Premium users',
}, {
'url': 'https://watch.motortrend.com/video/car-issues-motortrend-atve-us/double-dakotas',
'only_matching': True,
}]
_PRODUCT = 'vel'
_DISCO_API_PARAMS = {
'disco_host': 'us1-prod-direct.watch.motortrend.com',
'realm': 'go',
'country': 'us',
}
class DiscoveryPlusIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:\w{2}/)?video' + DPlayBaseIE._PATH_REGEX
_TESTS = [{

View File

@ -12,10 +12,11 @@ from ..compat import (
compat_urllib_parse_urlparse,
)
from ..utils import (
ExtractorError,
UserNotLive,
base_url,
clean_html,
dict_get,
ExtractorError,
float_or_none,
int_or_none,
parse_duration,
@ -940,7 +941,7 @@ class TwitchStreamIE(TwitchBaseIE):
stream = user['stream']
if not stream:
raise ExtractorError('%s is offline' % channel_name, expected=True)
raise UserNotLive(video_id=channel_name)
access_token = self._download_access_token(
channel_name, 'stream', 'channelName')

View File

@ -22,6 +22,7 @@ from ..jsinterp import JSInterpreter
from ..utils import (
NO_DEFAULT,
ExtractorError,
UserNotLive,
bug_reports_message,
classproperty,
clean_html,
@ -5383,9 +5384,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
selected_tab_name = 'featured'
requested_tab_name = mobj['tab'][1:]
if 'no-youtube-channel-redirect' not in compat_opts:
if requested_tab_name == 'live':
# Live tab should have redirected to the video
raise ExtractorError('The channel is not currently live', expected=True)
if requested_tab_name == 'live': # Live tab should have redirected to the video
raise UserNotLive(video_id=mobj['id'])
if requested_tab_name not in ('', selected_tab_name):
redirect_warning = f'The channel does not have a {requested_tab_name} tab'
if not original_tab_name:

View File

@ -1149,9 +1149,9 @@ class FFmpegConcatPP(FFmpegPostProcessor):
if len(in_files) < len(entries):
raise PostProcessingError('Aborting concatenation because some downloads failed')
ie_copy = self._downloader._playlist_infodict(info)
exts = traverse_obj(entries, (..., 'requested_downloads', 0, 'ext'), (..., 'ext'))
ie_copy['ext'] = exts[0] if len(set(exts)) == 1 else 'mkv'
ie_copy = collections.ChainMap({'ext': exts[0] if len(set(exts)) == 1 else 'mkv'},
info, self._downloader._playlist_infodict(info))
out_file = self._downloader.prepare_filename(ie_copy, 'pl_video')
files_to_delete = self.concat_files(in_files, out_file)

View File

@ -1072,6 +1072,14 @@ class GeoRestrictedError(ExtractorError):
self.countries = countries
class UserNotLive(ExtractorError):
"""Error when a channel/user is not live"""
def __init__(self, msg=None, **kwargs):
kwargs['expected'] = True
super().__init__(msg or 'The channel is not currently live', **kwargs)
class DownloadError(YoutubeDLError):
"""Download Error exception.
@ -3666,7 +3674,7 @@ def match_filter_func(filters):
if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
return NO_DEFAULT if interactive and not incomplete else None
else:
video_title = info_dict.get('title') or info_dict.get('id') or 'video'
video_title = info_dict.get('title') or info_dict.get('id') or 'entry'
filter_str = ') | ('.join(map(str.strip, filters))
return f'{video_title} does not pass filter ({filter_str}), skipping ..'
return _match_func