Compare commits

...

4 Commits

Author SHA1 Message Date
coletdev
ad210f4fd4
[youtube:search] Support hashtag entries (#3265)
Authored-by: coletdjnz
2022-04-02 06:11:14 +00:00
coletdjnz
c8e856a551
[web.archive:youtube] Make CDX API requests non-fatal
Partial fix for https://github.com/yt-dlp/yt-dlp/issues/3278
Authored-by: coletdjnz
2022-04-02 19:07:13 +13:00
nixxo
c085e4ec47
[rai] Fix extraction of http formats (#3272)
Closes #3270
Authored by: nixxo
2022-04-01 22:57:56 -07:00
pukkandan
4c268f9cb7
[Nebula] Fix bug in 52efa4b312 2022-04-02 11:22:17 +05:30
4 changed files with 25 additions and 5 deletions

View File

@ -457,7 +457,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
_OLDEST_CAPTURE_DATE = 20050214000000
_NEWEST_CAPTURE_DATE = 20500101000000
def _call_cdx_api(self, item_id, url, filters: list = None, collapse: list = None, query: dict = None, note='Downloading CDX API JSON'):
def _call_cdx_api(self, item_id, url, filters: list = None, collapse: list = None, query: dict = None, note=None, fatal=False):
# CDX docs: https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md
query = {
'url': url,
@ -468,7 +468,9 @@ class YoutubeWebArchiveIE(InfoExtractor):
'collapse': collapse or [],
**(query or {})
}
res = self._download_json('https://web.archive.org/cdx/search/cdx', item_id, note, query=query)
res = self._download_json(
'https://web.archive.org/cdx/search/cdx', item_id,
note or 'Downloading CDX API JSON', query=query, fatal=fatal)
if isinstance(res, list) and len(res) >= 2:
# format response to make it easier to use
return list(dict(zip(res[0], v)) for v in res[1:])

View File

@ -86,7 +86,7 @@ class NebulaBaseIE(InfoExtractor):
# if 401 or 403, attempt credential re-auth and retry
if exc.cause and isinstance(exc.cause, urllib.error.HTTPError) and exc.cause.code in (401, 403):
self.to_screen(f'Reauthenticating to Nebula and retrying, because last {auth_type} call resulted in error {exc.cause.code}')
self._login()
self._perform_login()
return inner_call()
else:
raise

View File

@ -118,7 +118,7 @@ class RaiBaseIE(InfoExtractor):
})
def _create_http_urls(self, relinker_url, fmts):
_RELINKER_REG = r'https?://(?P<host>[^/]+?)/(?:i/)?(?P<extra>[^/]+?)/(?P<path>.+?)/(?P<id>\d+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4|/playlist\.m3u8).+?'
_RELINKER_REG = r'https?://(?P<host>[^/]+?)/(?:i/)?(?P<extra>[^/]+?)/(?P<path>.+?)/(?P<id>\w+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4|/playlist\.m3u8).+?'
_MP4_TMPL = '%s&overrideUserAgentRule=mp4-%s'
_QUALITY = {
# tbr: w, h

View File

@ -3903,6 +3903,13 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
if video_id:
return self._extract_video(video_renderer)
def _hashtag_tile_entry(self, hashtag_tile_renderer):
url = urljoin('https://youtube.com', traverse_obj(
hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
if url:
return self.url_result(
url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
def _post_thread_entries(self, post_thread_renderer):
post_renderer = try_get(
post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
@ -3991,6 +3998,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
'videoRenderer': lambda x: [self._video_entry(x)],
'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
}
for key, renderer in isr_content.items():
if key not in known_renderers:
@ -5520,7 +5528,17 @@ class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
'id': 'python',
'title': 'python',
}
}, {
'url': 'https://www.youtube.com/results?search_query=%23cats',
'playlist_mincount': 1,
'info_dict': {
'id': '#cats',
'title': '#cats',
'entries': [{
'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
'title': '#cats',
}],
},
}, {
'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
'only_matching': True,