[ie/facebook] Fix extraction (#10531)

Closes #10532
Authored by: bashonly
This commit is contained in:
bashonly 2024-07-23 18:08:24 -05:00 committed by GitHub
parent a0a1bc3d8d
commit 1a34a802f4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -571,16 +571,21 @@ class FacebookIE(InfoExtractor):
# Formats larger than ~500MB will return error 403 unless chunk size is regulated # Formats larger than ~500MB will return error 403 unless chunk size is regulated
f.setdefault('downloader_options', {})['http_chunk_size'] = 250 << 20 f.setdefault('downloader_options', {})['http_chunk_size'] = 250 << 20
def extract_relay_data(_filter): def yield_all_relay_data(_filter):
return self._parse_json(self._search_regex( for relay_data in re.findall(rf'data-sjs>({{.*?{_filter}.*?}})</script>', webpage):
rf'data-sjs>({{.*?{_filter}.*?}})</script>', yield self._parse_json(relay_data, video_id, fatal=False) or {}
webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
def extract_relay_prefetched_data(_filter): def extract_relay_data(_filter):
return traverse_obj(extract_relay_data(_filter), ( return next(filter(None, yield_all_relay_data(_filter)), {})
'require', (None, (..., ..., ..., '__bbox', 'require')),
def extract_relay_prefetched_data(_filter, target_keys=None):
path = 'data'
if target_keys is not None:
path = lambda k, v: k == 'data' and any(target in v for target in variadic(target_keys))
return traverse_obj(yield_all_relay_data(_filter), (
..., 'require', (None, (..., ..., ..., '__bbox', 'require')),
lambda _, v: any(key.startswith('RelayPrefetchedStreamCache') for key in v), lambda _, v: any(key.startswith('RelayPrefetchedStreamCache') for key in v),
..., ..., '__bbox', 'result', 'data', {dict}), get_all=False) or {} ..., ..., '__bbox', 'result', path, {dict}), get_all=False) or {}
if not video_data: if not video_data:
server_js_data = self._parse_json(self._search_regex([ server_js_data = self._parse_json(self._search_regex([
@ -591,7 +596,8 @@ class FacebookIE(InfoExtractor):
if not video_data: if not video_data:
data = extract_relay_prefetched_data( data = extract_relay_prefetched_data(
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)') r'"(?:dash_manifest|playable_url(?:_quality_hd)?)',
target_keys=('video', 'event', 'nodes', 'node', 'mediaset'))
if data: if data:
entries = [] entries = []