Compare commits

...

3 Commits

Author SHA1 Message Date
Elyse
a076c1f97a
[extractor] Update manifest_urls after redirect (#3575)
Authored by: elyse0
2022-04-27 15:50:01 -07:00
Evan Spensley
b3602f6824
[InfoQ] Don't fail on missing audio format (#3573)
Closes #3441 
Authored by: evansp
2022-04-27 14:30:24 -07:00
Elyse
779da8e31b
[extractor] Update dash manifest_url after redirects (#3563)
Closes #2696 
Authored by: elyse0
2022-04-27 11:01:35 -07:00
2 changed files with 34 additions and 14 deletions

View File

@ -1982,17 +1982,19 @@ class InfoExtractor:
def _extract_f4m_formats(self, manifest_url, video_id, preference=None, quality=None, f4m_id=None,
transform_source=lambda s: fix_xml_ampersands(s).strip(),
fatal=True, m3u8_id=None, data=None, headers={}, query={}):
manifest = self._download_xml(
res = self._download_xml_handle(
manifest_url, video_id, 'Downloading f4m manifest',
'Unable to download f4m manifest',
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
# (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244)
transform_source=transform_source,
fatal=fatal, data=data, headers=headers, query=query)
if manifest is False:
if res is False:
return []
manifest, urlh = res
manifest_url = urlh.geturl()
return self._parse_f4m_formats(
manifest, manifest_url, video_id, preference=preference, quality=quality, f4m_id=f4m_id,
transform_source=transform_source, fatal=fatal, m3u8_id=m3u8_id)
@ -2400,12 +2402,14 @@ class InfoExtractor:
return '/'.join(out)
def _extract_smil_formats_and_subtitles(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None):
smil = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source)
if smil is False:
res = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source)
if res is False:
assert not fatal
return [], {}
smil, urlh = res
smil_url = urlh.geturl()
namespace = self._parse_smil_namespace(smil)
fmts = self._parse_smil_formats(
@ -2422,13 +2426,17 @@ class InfoExtractor:
return fmts
def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None):
smil = self._download_smil(smil_url, video_id, fatal=fatal)
if smil is False:
res = self._download_smil(smil_url, video_id, fatal=fatal)
if res is False:
return {}
smil, urlh = res
smil_url = urlh.geturl()
return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params)
def _download_smil(self, smil_url, video_id, fatal=True, transform_source=None):
return self._download_xml(
return self._download_xml_handle(
smil_url, video_id, 'Downloading SMIL file',
'Unable to download SMIL file', fatal=fatal, transform_source=transform_source)
@ -2607,11 +2615,15 @@ class InfoExtractor:
return subtitles
def _extract_xspf_playlist(self, xspf_url, playlist_id, fatal=True):
xspf = self._download_xml(
res = self._download_xml_handle(
xspf_url, playlist_id, 'Downloading xpsf playlist',
'Unable to download xspf manifest', fatal=fatal)
if xspf is False:
if res is False:
return []
xspf, urlh = res
xspf_url = urlh.geturl()
return self._parse_xspf(
xspf, playlist_id, xspf_url=xspf_url,
xspf_base_url=base_url(xspf_url))
@ -2676,7 +2688,10 @@ class InfoExtractor:
mpd_doc, urlh = res
if mpd_doc is None:
return [], {}
mpd_base_url = base_url(urlh.geturl())
# We could have been redirected to a new url when we retrieved our mpd file.
mpd_url = urlh.geturl()
mpd_base_url = base_url(mpd_url)
return self._parse_mpd_formats_and_subtitles(
mpd_doc, mpd_id, mpd_base_url, mpd_url)

View File

@ -4,8 +4,10 @@ from ..compat import (
compat_urlparse,
)
from ..utils import (
ExtractorError,
determine_ext,
update_url_query,
traverse_obj,
)
from .bokecc import BokeCCBaseIE
@ -34,6 +36,7 @@ class InfoQIE(BokeCCBaseIE):
'ext': 'flv',
'description': 'md5:308d981fb28fa42f49f9568322c683ff',
},
'skip': 'Sorry, the page you visited does not exist',
}, {
'url': 'https://www.infoq.com/presentations/Simple-Made-Easy',
'md5': '0e34642d4d9ef44bf86f66f6399672db',
@ -86,8 +89,10 @@ class InfoQIE(BokeCCBaseIE):
}]
def _extract_http_audio(self, webpage, video_id):
fields = self._form_hidden_inputs('mp3Form', webpage)
http_audio_url = fields.get('filename')
try:
http_audio_url = traverse_obj(self._form_hidden_inputs('mp3Form', webpage), 'filename')
except ExtractorError:
http_audio_url = None
if not http_audio_url:
return []