Compare commits

..

No commits in common. "0c36dc00d7b9f43238bacb0e03730f31117d0b38" and "1ac4fd80c87d4e566ae680076e788a63d187199b" have entirely different histories.

3 changed files with 33 additions and 27 deletions

View File

@ -403,8 +403,6 @@ def validate_options(opts):
default_downloader = None default_downloader = None
for proto, path in opts.external_downloader.items(): for proto, path in opts.external_downloader.items():
if path == 'native':
continue
ed = get_external_downloader(path) ed = get_external_downloader(path)
if ed is None: if ed is None:
raise ValueError( raise ValueError(

View File

@ -1392,25 +1392,27 @@ class InfoExtractor:
return self._html_search_meta('twitter:player', html, return self._html_search_meta('twitter:player', html,
'twitter card player') 'twitter card player')
def _yield_json_ld(self, html, video_id, *, fatal=True, default=NO_DEFAULT): def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
"""Yield all json ld objects in the html""" json_ld_list = list(re.finditer(JSON_LD_RE, html))
if default is not NO_DEFAULT: default = kwargs.get('default', NO_DEFAULT)
fatal = False # JSON-LD may be malformed and thus `fatal` should be respected.
for mobj in re.finditer(JSON_LD_RE, html): # At the same time `default` may be passed that assumes `fatal=False`
json_ld_item = self._parse_json(mobj.group('json_ld'), video_id, fatal=fatal) # for _search_regex. Let's simulate the same behavior here as well.
for json_ld in variadic(json_ld_item): fatal = kwargs.get('fatal', True) if default is NO_DEFAULT else False
if isinstance(json_ld, dict): json_ld = []
yield json_ld for mobj in json_ld_list:
json_ld_item = self._parse_json(
def _search_json_ld(self, html, video_id, expected_type=None, *, fatal=True, default=NO_DEFAULT): mobj.group('json_ld'), video_id, fatal=fatal)
"""Search for a video in any json ld in the html""" if not json_ld_item:
if default is not NO_DEFAULT: continue
fatal = False if isinstance(json_ld_item, dict):
info = self._json_ld( json_ld.append(json_ld_item)
list(self._yield_json_ld(html, video_id, fatal=fatal, default=default)), elif isinstance(json_ld_item, (list, tuple)):
video_id, fatal=fatal, expected_type=expected_type) json_ld.extend(json_ld_item)
if info: if json_ld:
return info json_ld = self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type)
if json_ld:
return json_ld
if default is not NO_DEFAULT: if default is not NO_DEFAULT:
return default return default
elif fatal: elif fatal:
@ -1498,7 +1500,7 @@ class InfoExtractor:
assert is_type(e, 'VideoObject') assert is_type(e, 'VideoObject')
author = e.get('author') author = e.get('author')
info.update({ info.update({
'url': url_or_none(e.get('contentUrl')), 'url': traverse_obj(e, 'contentUrl', 'embedUrl', expected_type=url_or_none),
'title': unescapeHTML(e.get('name')), 'title': unescapeHTML(e.get('name')),
'description': unescapeHTML(e.get('description')), 'description': unescapeHTML(e.get('description')),
'thumbnails': [{'url': url} 'thumbnails': [{'url': url}

View File

@ -1,5 +1,9 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none, qualities, traverse_obj, url_or_none from ..utils import (
int_or_none,
qualities,
url_or_none,
)
class NprIE(InfoExtractor): class NprIE(InfoExtractor):
@ -70,6 +74,10 @@ class NprIE(InfoExtractor):
})['list']['story'][0] })['list']['story'][0]
playlist_title = story.get('title', {}).get('$text') playlist_title = story.get('title', {}).get('$text')
# Fetch the JSON-LD from the npr page.
json_ld = self._search_json_ld(
self._download_webpage(url, playlist_id), playlist_id, 'NewsArticle', fatal=False)
KNOWN_FORMATS = ('threegp', 'm3u8', 'smil', 'mp4', 'mp3') KNOWN_FORMATS = ('threegp', 'm3u8', 'smil', 'mp4', 'mp3')
quality = qualities(KNOWN_FORMATS) quality = qualities(KNOWN_FORMATS)
@ -116,10 +124,8 @@ class NprIE(InfoExtractor):
stream_url, stream_id, 'mp4', 'm3u8_native', stream_url, stream_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False)) m3u8_id='hls', fatal=False))
if not formats: if not formats and json_ld.get('url'):
raw_json_ld = self._yield_json_ld(self._download_webpage(url, playlist_id), playlist_id, fatal=False) formats.extend(self._extract_m3u8_formats(json_ld['url'], media_id, 'mp4', m3u8_id='hls', fatal=False))
m3u8_url = traverse_obj(list(raw_json_ld), (..., 'subjectOf', ..., 'embedUrl'), get_all=False)
formats = self._extract_m3u8_formats(m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)
self._sort_formats(formats) self._sort_formats(formats)