mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-09 10:50:38 +00:00
[ie/facebook:reel] Improve metadata extraction
Closes #9057, Closes #10824 Authored by: lengzuo
This commit is contained in:
parent
3dfd720d09
commit
0e1b941c6b
@ -84,7 +84,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
'timestamp': 1692346159,
|
'timestamp': 1692346159,
|
||||||
'thumbnail': r're:^https?://.*',
|
'thumbnail': r're:^https?://.*',
|
||||||
'uploader_id': '100063551323670',
|
'uploader_id': '100063551323670',
|
||||||
'duration': 3132.184,
|
'duration': 3133.583,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'concurrent_view_count': 0,
|
'concurrent_view_count': 0,
|
||||||
},
|
},
|
||||||
@ -112,9 +112,10 @@ class FacebookIE(InfoExtractor):
|
|||||||
'upload_date': '20140506',
|
'upload_date': '20140506',
|
||||||
'timestamp': 1399398998,
|
'timestamp': 1399398998,
|
||||||
'thumbnail': r're:^https?://.*',
|
'thumbnail': r're:^https?://.*',
|
||||||
'uploader_id': 'pfbid028wxorhX2ErLFJ578N6P3crHD3PHmXTCqCvfBpsnbSLmbokwSY75p5hWBjHGkG4zxl',
|
'uploader_id': 'pfbid05AzrFTXgY37tqwaSgbFTTEpCLBjjEJHkigogwGiRPtKEpAsJYJpzE94H1RxYXWEtl',
|
||||||
'duration': 131.03,
|
'duration': 131.03,
|
||||||
'concurrent_view_count': int,
|
'concurrent_view_count': int,
|
||||||
|
'view_count': int,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'note': 'Video with DASH manifest',
|
'note': 'Video with DASH manifest',
|
||||||
@ -167,7 +168,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
# have 1080P, but only up to 720p in swf params
|
# have 1080P, but only up to 720p in swf params
|
||||||
# data.video.story.attachments[].media
|
# data.video.story.attachments[].media
|
||||||
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
|
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
|
||||||
'md5': 'ca63897a90c9452efee5f8c40d080e25',
|
'md5': '1659aa21fb3dd1585874f668e81a72c8',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '10155529876156509',
|
'id': '10155529876156509',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -180,9 +181,10 @@ class FacebookIE(InfoExtractor):
|
|||||||
'view_count': int,
|
'view_count': int,
|
||||||
'uploader_id': '100059479812265',
|
'uploader_id': '100059479812265',
|
||||||
'concurrent_view_count': int,
|
'concurrent_view_count': int,
|
||||||
'duration': 44.478,
|
'duration': 44.181,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
|
# FIXME: unable to extract uploader, no formats found
|
||||||
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
|
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
|
||||||
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
|
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
|
||||||
'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
|
'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
|
||||||
@ -241,9 +243,9 @@ class FacebookIE(InfoExtractor):
|
|||||||
'timestamp': 1511548260,
|
'timestamp': 1511548260,
|
||||||
'upload_date': '20171124',
|
'upload_date': '20171124',
|
||||||
'uploader': 'Vickie Gentry',
|
'uploader': 'Vickie Gentry',
|
||||||
'uploader_id': 'pfbid0FuZhHCeWDAxWxEbr3yKPFaRstXvRxgsp9uCPG6GjD4J2AitB35NUAuJ4Q75KcjiDl',
|
'uploader_id': 'pfbid0FkkycT95ySNNyfCw4Cho6u5G7WbbZEcxT496Hq8rtx1K3LcTCATpR3wnyYhmyGC5l',
|
||||||
'thumbnail': r're:^https?://.*',
|
'thumbnail': r're:^https?://.*',
|
||||||
'duration': 148.435,
|
'duration': 148.224,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# data.node.comet_sections.content.story.attachments[].styles.attachment.media
|
# data.node.comet_sections.content.story.attachments[].styles.attachment.media
|
||||||
@ -271,7 +273,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
'description': 'Today Makkovik\'s own Pilot Mandy Smith made her inaugural landing on the airstrip in her hometown. What a proud moment as we all cheered and...',
|
'description': 'Today Makkovik\'s own Pilot Mandy Smith made her inaugural landing on the airstrip in her hometown. What a proud moment as we all cheered and...',
|
||||||
'thumbnail': r're:^https?://.*',
|
'thumbnail': r're:^https?://.*',
|
||||||
'uploader': 'Lela Evans',
|
'uploader': 'Lela Evans',
|
||||||
'uploader_id': 'pfbid0shZJipuigyy5mqrUJn9ub5LJFWNHvan5prtyi3LrDuuuJ4NwrURgnQHYR9fywBepl',
|
'uploader_id': 'pfbid0swT2y7t6TAsZVBvcyeYPdhTMefGaS26mzUwML3vd1ma6ndGZKxsyS4Ssu3jitZLXl',
|
||||||
'upload_date': '20231228',
|
'upload_date': '20231228',
|
||||||
'timestamp': 1703804085,
|
'timestamp': 1703804085,
|
||||||
'duration': 394.347,
|
'duration': 394.347,
|
||||||
@ -322,7 +324,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
'upload_date': '20180523',
|
'upload_date': '20180523',
|
||||||
'uploader': 'ESL One Dota 2',
|
'uploader': 'ESL One Dota 2',
|
||||||
'uploader_id': '100066514874195',
|
'uploader_id': '100066514874195',
|
||||||
'duration': 4524.212,
|
'duration': 4524.001,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'thumbnail': r're:^https?://.*',
|
'thumbnail': r're:^https?://.*',
|
||||||
'concurrent_view_count': int,
|
'concurrent_view_count': int,
|
||||||
@ -339,9 +341,9 @@ class FacebookIE(InfoExtractor):
|
|||||||
'title': 'Josef',
|
'title': 'Josef',
|
||||||
'thumbnail': r're:^https?://.*',
|
'thumbnail': r're:^https?://.*',
|
||||||
'concurrent_view_count': int,
|
'concurrent_view_count': int,
|
||||||
'uploader_id': 'pfbid0cibUN6tV7DYgdbJdsUFN46wc4jKpVSPAvJQhFofGqBGmVn3V3JtAs2tfUwziw2hUl',
|
'uploader_id': 'pfbid02gpfwRM2XvdEJfsERupwQiNmBiDArc38RMRYZnap372q6Vs7MtFTVy72mmFWpJBTKl',
|
||||||
'timestamp': 1549275572,
|
'timestamp': 1549275572,
|
||||||
'duration': 3.413,
|
'duration': 3.283,
|
||||||
'uploader': 'Josef Novak',
|
'uploader': 'Josef Novak',
|
||||||
'description': '',
|
'description': '',
|
||||||
'upload_date': '20190204',
|
'upload_date': '20190204',
|
||||||
@ -396,6 +398,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
'playlist_count': 1,
|
'playlist_count': 1,
|
||||||
'skip': 'Requires logging in',
|
'skip': 'Requires logging in',
|
||||||
}, {
|
}, {
|
||||||
|
# FIXME: Cannot parse data error
|
||||||
# data.event.cover_media_renderer.cover_video
|
# data.event.cover_media_renderer.cover_video
|
||||||
'url': 'https://m.facebook.com/events/1509582499515440',
|
'url': 'https://m.facebook.com/events/1509582499515440',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -498,7 +501,8 @@ class FacebookIE(InfoExtractor):
|
|||||||
or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', lambda k, v: k == 'owner' and v['name']))
|
or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', lambda k, v: k == 'owner' and v['name']))
|
||||||
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
|
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
|
||||||
or get_first(post, ('node', 'actors', ..., {dict}))
|
or get_first(post, ('node', 'actors', ..., {dict}))
|
||||||
or get_first(post, ('event', 'event_creator', {dict})) or {})
|
or get_first(post, ('event', 'event_creator', {dict}))
|
||||||
|
or get_first(post, ('video', 'creation_story', 'short_form_video_context', 'video_owner', {dict})) or {})
|
||||||
uploader = uploader_data.get('name') or (
|
uploader = uploader_data.get('name') or (
|
||||||
clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
|
clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
|
||||||
or self._search_regex(
|
or self._search_regex(
|
||||||
@ -524,6 +528,11 @@ class FacebookIE(InfoExtractor):
|
|||||||
webpage, 'view count', default=None)),
|
webpage, 'view count', default=None)),
|
||||||
'concurrent_view_count': get_first(post, (
|
'concurrent_view_count': get_first(post, (
|
||||||
('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})),
|
('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})),
|
||||||
|
**traverse_obj(post, (lambda _, v: video_id in v['url'], 'feedback', {
|
||||||
|
'like_count': ('likers', 'count', {int}),
|
||||||
|
'comment_count': ('total_comment_count', {int}),
|
||||||
|
'repost_count': ('share_count_reduced', {parse_count}),
|
||||||
|
}), get_all=False),
|
||||||
}
|
}
|
||||||
|
|
||||||
info_json_ld = self._search_json_ld(webpage, video_id, default={})
|
info_json_ld = self._search_json_ld(webpage, video_id, default={})
|
||||||
@ -932,18 +941,21 @@ class FacebookReelIE(InfoExtractor):
|
|||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.facebook.com/reel/1195289147628387',
|
'url': 'https://www.facebook.com/reel/1195289147628387',
|
||||||
'md5': 'f13dd37f2633595982db5ed8765474d3',
|
'md5': 'a53256d10fc2105441fe0c4212ed8cea',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1195289147628387',
|
'id': '1195289147628387',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'md5:b05800b5b1ad56c0ca78bd3807b6a61e',
|
'title': r're:9\.6K views · 355 reactions .+ Let the “Slapathon” commence!! .+ LL COOL J · Mama Said Knock You Out$',
|
||||||
'description': 'md5:22f03309b216ac84720183961441d8db',
|
'description': r're:When your trying to help your partner .+ LL COOL J · Mama Said Knock You Out$',
|
||||||
'uploader': 'md5:723e6cb3091241160f20b3c5dc282af1',
|
'uploader': 'Beast Camp Training',
|
||||||
'uploader_id': '100040874179269',
|
'uploader_id': '100040874179269',
|
||||||
'duration': 9.579,
|
'duration': 9.579,
|
||||||
'timestamp': 1637502609,
|
'timestamp': 1637502609,
|
||||||
'upload_date': '20211121',
|
'upload_date': '20211121',
|
||||||
'thumbnail': r're:^https?://.*',
|
'thumbnail': r're:^https?://.*',
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'repost_count': int,
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user