[patreon] Ignore erroneous media attachments (#4638 )

Fixes https://github.com/yt-dlp/yt-dlp/issues/4608 Authored by: coletdjnz
[extractor/tubitv] Extract additional formats (#4646 )
2024-11-15 13:43:04 +00:00 · 2022-08-13 00:25:20 +00:00 · 2022-08-13 05:10:49 +05:30
2 changed files with 38 additions and 10 deletions
--- a/yt_dlp/extractor/patreon.py
+++ b/yt_dlp/extractor/patreon.py
@ -154,6 +154,28 @@ class PatreonIE(PatreonBaseIE):
            'channel_url': 'https://www.patreon.com/loish',
            'channel_follower_count': int,
        }
+    }, {
+        # bad videos under media (if media is included). Real one is under post_file
+        'url': 'https://www.patreon.com/posts/premium-access-70282931',
+        'info_dict': {
+            'id': '70282931',
+            'ext': 'mp4',
+            'title': '[Premium Access + Uncut] The Office - 2x6 The Fight - Group Reaction',
+            'channel_url': 'https://www.patreon.com/thenormies',
+            'channel_id': '573397',
+            'uploader_id': '2929435',
+            'uploader': 'The Normies',
+            'description': 'md5:79c9fd8778e2cef84049a94c058a5e23',
+            'comment_count': int,
+            'upload_date': '20220809',
+            'thumbnail': r're:^https?://.*$',
+            'channel_follower_count': int,
+            'like_count': int,
+            'timestamp': 1660052820,
+            'tags': ['The Office', 'early access', 'uncut'],
+            'uploader_url': 'https://www.patreon.com/thenormies',
+        },
+        'skip': 'Patron-only content',
    }]

    def _real_extract(self, url):
@ -166,7 +188,7 @@ class PatreonIE(PatreonBaseIE):
                'fields[post_tag]': 'value',
                'fields[campaign]': 'url,name,patron_count',
                'json-api-use-default-includes': 'false',
-                'include': 'media,user,user_defined_tags,campaign',
+                'include': 'audio,user,user_defined_tags,campaign,attachments_media',
            })
        attributes = post['data']['attributes']
        title = attributes['title'].strip()
@ -190,11 +212,16 @@ class PatreonIE(PatreonBaseIE):
                media_attributes = i.get('attributes') or {}
                download_url = media_attributes.get('download_url')
                ext = mimetype2ext(media_attributes.get('mimetype'))
-                if download_url and ext in KNOWN_EXTENSIONS:
+
+                # if size_bytes is None, this media file is likely unavailable
+                # See: https://github.com/yt-dlp/yt-dlp/issues/4608
+                size_bytes = int_or_none(media_attributes.get('size_bytes'))
+                if download_url and ext in KNOWN_EXTENSIONS and size_bytes is not None:
+                    # XXX: what happens if there are multiple attachments?
                    return {
                        **info,
                        'ext': ext,
-                        'filesize': int_or_none(media_attributes.get('size_bytes')),
+                        'filesize': size_bytes,
                        'url': download_url,
                    }
            elif i_type == 'user':
--- a/yt_dlp/extractor/tubitv.py
+++ b/yt_dlp/extractor/tubitv.py
@ -70,16 +70,17 @@ class TubiTvIE(InfoExtractor):
    def _real_extract(self, url):
        video_id = self._match_id(url)
        video_data = self._download_json(
-            'http://tubitv.com/oz/videos/%s/content' % video_id, video_id)
+            'https://tubitv.com/oz/videos/%s/content?video_resources=dash&video_resources=hlsv3&video_resources=hlsv6' % video_id, video_id)
        title = video_data['title']

        formats = []
-        url = video_data['url']
-        # URL can be sometimes empty. Does this only happen when there is DRM?
-        if url:
-            formats = self._extract_m3u8_formats(
-                self._proto_relative_url(url),
-                video_id, 'mp4', 'm3u8_native')
+
+        for resource in video_data['video_resources']:
+            if resource['type'] in ('dash', ):
+                formats += self._extract_mpd_formats(resource['manifest']['url'], video_id, mpd_id=resource['type'], fatal=False)
+            elif resource['type'] in ('hlsv3', 'hlsv6'):
+                formats += self._extract_m3u8_formats(resource['manifest']['url'], video_id, 'mp4', m3u8_id=resource['type'], fatal=False)
+
        self._sort_formats(formats)

        thumbnails = []
Author	SHA1	Message	Date
coletdjnz	cea4b857f0	[patreon] Ignore erroneous media attachments (#4638 ) Fixes https://github.com/yt-dlp/yt-dlp/issues/4608 Authored by: coletdjnz	2022-08-13 00:25:20 +00:00
shirt	ffcd62c289	[extractor/tubitv] Extract additional formats (#4646 ) Authored by: shirt-dev	2022-08-13 05:10:49 +05:30