[extractor/Dumpert] Fix m3u8 and support new URL pattern (#6091)

Authored by: DataGhost, pukkandan
Closes #5032
This commit is contained in:
DataGhost 2023-06-11 17:17:26 +02:00 committed by GitHub
parent b4a252fba8
commit f8ae441501
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

41
yt_dlp/extractor/dumpert.py Normal file → Executable file
View File

@ -1,12 +1,17 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext,
int_or_none, int_or_none,
qualities, qualities,
) )
class DumpertIE(InfoExtractor): class DumpertIE(InfoExtractor):
_VALID_URL = r'(?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl/(?:mediabase|embed|item)/(?P<id>[0-9]+[/_][0-9a-zA-Z]+)' _VALID_URL = r'''(?x)
(?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl(?:
/(?:mediabase|embed|item)/|
(?:/toppers|/latest|/?)\?selectedId=
)(?P<id>[0-9]+[/_][0-9a-zA-Z]+)'''
_TESTS = [{ _TESTS = [{
'url': 'https://www.dumpert.nl/item/6646981_951bc60f', 'url': 'https://www.dumpert.nl/item/6646981_951bc60f',
'md5': '1b9318d7d5054e7dcb9dc7654f21d643', 'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
@ -16,6 +21,9 @@ class DumpertIE(InfoExtractor):
'title': 'Ik heb nieuws voor je', 'title': 'Ik heb nieuws voor je',
'description': 'Niet schrikken hoor', 'description': 'Niet schrikken hoor',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 9,
'view_count': int,
'like_count': int,
} }
}, { }, {
'url': 'https://www.dumpert.nl/embed/6675421_dc440fe7', 'url': 'https://www.dumpert.nl/embed/6675421_dc440fe7',
@ -26,6 +34,28 @@ class DumpertIE(InfoExtractor):
}, { }, {
'url': 'http://legacy.dumpert.nl/embed/6675421/dc440fe7', 'url': 'http://legacy.dumpert.nl/embed/6675421/dc440fe7',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.dumpert.nl/item/100031688_b317a185',
'info_dict': {
'id': '100031688/b317a185',
'ext': 'mp4',
'title': 'Epic schijnbeweging',
'description': '<p>Die zag je niet eh</p>',
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
'duration': 12,
'view_count': int,
'like_count': int,
},
'params': {'skip_download': 'm3u8'}
}, {
'url': 'https://www.dumpert.nl/toppers?selectedId=100031688_b317a185',
'only_matching': True,
}, {
'url': 'https://www.dumpert.nl/latest?selectedId=100031688_b317a185',
'only_matching': True,
}, {
'url': 'https://www.dumpert.nl/?selectedId=100031688_b317a185',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -36,17 +66,22 @@ class DumpertIE(InfoExtractor):
title = item['title'] title = item['title']
media = next(m for m in item['media'] if m.get('mediatype') == 'VIDEO') media = next(m for m in item['media'] if m.get('mediatype') == 'VIDEO')
quality = qualities(['flv', 'mobile', 'tablet', '720p']) quality = qualities(['flv', 'mobile', 'tablet', '720p', '1080p'])
formats = [] formats = []
for variant in media.get('variants', []): for variant in media.get('variants', []):
uri = variant.get('uri') uri = variant.get('uri')
if not uri: if not uri:
continue continue
version = variant.get('version') version = variant.get('version')
preference = quality(version)
if determine_ext(uri) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
uri, video_id, 'mp4', m3u8_id=version, quality=preference))
else:
formats.append({ formats.append({
'url': uri, 'url': uri,
'format_id': version, 'format_id': version,
'quality': quality(version), 'quality': preference,
}) })
thumbnails = [] thumbnails = []