Compare commits

...

2 Commits

Author SHA1 Message Date
coletdjnz
cea4b857f0
[patreon] Ignore erroneous media attachments (#4638)
Fixes https://github.com/yt-dlp/yt-dlp/issues/4608
Authored by: coletdjnz
2022-08-13 00:25:20 +00:00
shirt
ffcd62c289
[extractor/tubitv] Extract additional formats (#4646)
Authored by: shirt-dev
2022-08-13 05:10:49 +05:30
2 changed files with 38 additions and 10 deletions

View File

@ -154,6 +154,28 @@ class PatreonIE(PatreonBaseIE):
'channel_url': 'https://www.patreon.com/loish', 'channel_url': 'https://www.patreon.com/loish',
'channel_follower_count': int, 'channel_follower_count': int,
} }
}, {
# bad videos under media (if media is included). Real one is under post_file
'url': 'https://www.patreon.com/posts/premium-access-70282931',
'info_dict': {
'id': '70282931',
'ext': 'mp4',
'title': '[Premium Access + Uncut] The Office - 2x6 The Fight - Group Reaction',
'channel_url': 'https://www.patreon.com/thenormies',
'channel_id': '573397',
'uploader_id': '2929435',
'uploader': 'The Normies',
'description': 'md5:79c9fd8778e2cef84049a94c058a5e23',
'comment_count': int,
'upload_date': '20220809',
'thumbnail': r're:^https?://.*$',
'channel_follower_count': int,
'like_count': int,
'timestamp': 1660052820,
'tags': ['The Office', 'early access', 'uncut'],
'uploader_url': 'https://www.patreon.com/thenormies',
},
'skip': 'Patron-only content',
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -166,7 +188,7 @@ class PatreonIE(PatreonBaseIE):
'fields[post_tag]': 'value', 'fields[post_tag]': 'value',
'fields[campaign]': 'url,name,patron_count', 'fields[campaign]': 'url,name,patron_count',
'json-api-use-default-includes': 'false', 'json-api-use-default-includes': 'false',
'include': 'media,user,user_defined_tags,campaign', 'include': 'audio,user,user_defined_tags,campaign,attachments_media',
}) })
attributes = post['data']['attributes'] attributes = post['data']['attributes']
title = attributes['title'].strip() title = attributes['title'].strip()
@ -190,11 +212,16 @@ class PatreonIE(PatreonBaseIE):
media_attributes = i.get('attributes') or {} media_attributes = i.get('attributes') or {}
download_url = media_attributes.get('download_url') download_url = media_attributes.get('download_url')
ext = mimetype2ext(media_attributes.get('mimetype')) ext = mimetype2ext(media_attributes.get('mimetype'))
if download_url and ext in KNOWN_EXTENSIONS:
# if size_bytes is None, this media file is likely unavailable
# See: https://github.com/yt-dlp/yt-dlp/issues/4608
size_bytes = int_or_none(media_attributes.get('size_bytes'))
if download_url and ext in KNOWN_EXTENSIONS and size_bytes is not None:
# XXX: what happens if there are multiple attachments?
return { return {
**info, **info,
'ext': ext, 'ext': ext,
'filesize': int_or_none(media_attributes.get('size_bytes')), 'filesize': size_bytes,
'url': download_url, 'url': download_url,
} }
elif i_type == 'user': elif i_type == 'user':

View File

@ -70,16 +70,17 @@ class TubiTvIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video_data = self._download_json( video_data = self._download_json(
'http://tubitv.com/oz/videos/%s/content' % video_id, video_id) 'https://tubitv.com/oz/videos/%s/content?video_resources=dash&video_resources=hlsv3&video_resources=hlsv6' % video_id, video_id)
title = video_data['title'] title = video_data['title']
formats = [] formats = []
url = video_data['url']
# URL can be sometimes empty. Does this only happen when there is DRM? for resource in video_data['video_resources']:
if url: if resource['type'] in ('dash', ):
formats = self._extract_m3u8_formats( formats += self._extract_mpd_formats(resource['manifest']['url'], video_id, mpd_id=resource['type'], fatal=False)
self._proto_relative_url(url), elif resource['type'] in ('hlsv3', 'hlsv6'):
video_id, 'mp4', 'm3u8_native') formats += self._extract_m3u8_formats(resource['manifest']['url'], video_id, 'mp4', m3u8_id=resource['type'], fatal=False)
self._sort_formats(formats) self._sort_formats(formats)
thumbnails = [] thumbnails = []