Compare commits

...

2 Commits

Author SHA1 Message Date
mehq
4d57133095
[Jable] Add extractor (#3341)
Closes #3284
Authored by: mehq
2022-04-07 23:49:14 -07:00
Alexander Seiler
9b8b7a7b5e
[Zattoo] Fix extractors (#2288)
Closes: #1244
Authored by: goggle
2022-04-07 23:44:58 -07:00
3 changed files with 280 additions and 127 deletions

View File

@ -701,6 +701,10 @@ from .ivi import (
from .ivideon import IvideonIE
from .iwara import IwaraIE
from .izlesene import IzleseneIE
from .jable import (
JableIE,
JablePlaylistIE,
)
from .jamendo import (
JamendoIE,
JamendoAlbumIE,
@ -2119,18 +2123,17 @@ from .zattoo import (
EWETVIE,
GlattvisionTVIE,
MNetTVIE,
MyVisionTVIE,
NetPlusIE,
OsnatelTVIE,
QuantumTVIE,
QuicklineIE,
QuicklineLiveIE,
SaltTVIE,
SAKTVIE,
VTXTVIE,
WalyTVIE,
ZattooIE,
ZattooLiveIE,
ZattooMoviesIE,
ZattooRecordingsIE,
)
from .zdf import ZDFIE, ZDFChannelIE
from .zee5 import (

107
yt_dlp/extractor/jable.py Normal file
View File

@ -0,0 +1,107 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
InAdvancePagedList,
int_or_none,
orderedSet,
unified_strdate,
)
class JableIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?jable.tv/videos/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://jable.tv/videos/pppd-812/',
'md5': 'f1537283a9bc073c31ff86ca35d9b2a6',
'info_dict': {
'id': 'pppd-812',
'ext': 'mp4',
'title': 'PPPD-812 只要表現好巨乳女教師吉根柚莉愛就獎勵學生們在白虎穴內射出精液',
'description': 'md5:5b6d4199a854f62c5e56e26ccad19967',
'thumbnail': r're:^https?://.*\.jpg$',
'age_limit': 18,
'like_count': int,
'view_count': int,
},
}, {
'url': 'https://jable.tv/videos/apak-220/',
'md5': '71f9239d69ced58ab74a816908847cc1',
'info_dict': {
'id': 'apak-220',
'ext': 'mp4',
'title': 'md5:5c3861b7cf80112a6e2b70bccf170824',
'description': '',
'thumbnail': r're:^https?://.*\.jpg$',
'age_limit': 18,
'like_count': int,
'view_count': int,
'upload_date': '20220319',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
formats = self._extract_m3u8_formats(
self._search_regex(r'var\s+hlsUrl\s*=\s*\'([^\']+)', webpage, 'hls_url'), video_id, 'mp4', m3u8_id='hls')
self._sort_formats(formats)
return {
'id': video_id,
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage, default=''),
'thumbnail': self._og_search_thumbnail(webpage, default=None),
'formats': formats,
'age_limit': 18,
'upload_date': unified_strdate(self._search_regex(
r'class="inactive-color">\D+\s+(\d{4}-\d+-\d+)', webpage, 'upload_date', default=None)),
'view_count': int_or_none(self._search_regex(
r'#icon-eye"></use></svg>\n*<span class="mr-3">([\d ]+)',
webpage, 'view_count', default='').replace(' ', '')),
'like_count': int_or_none(self._search_regex(
r'#icon-heart"></use></svg><span class="count">(\d+)', webpage, 'link_count', default=None)),
}
class JablePlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?jable.tv/(?:categories|models|tags)/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://jable.tv/models/kaede-karen/',
'info_dict': {
'id': 'kaede-karen',
'title': '楓カレン',
},
'playlist_count': 34,
}, {
'url': 'https://jable.tv/categories/roleplay/',
'only_matching': True,
}, {
'url': 'https://jable.tv/tags/girl/',
'only_matching': True,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
def page_func(page_num):
return [
self.url_result(player_url, JableIE)
for player_url in orderedSet(re.findall(
r'href="(https://jable.tv/videos/[\w-]+/?)"',
self._download_webpage(url, playlist_id, query={
'mode': 'async',
'from': page_num + 1,
'function': 'get_block',
'block_id': 'list_videos_common_videos_list',
}, note=f'Downloading page {page_num + 1}')))]
return self.playlist_result(
InAdvancePagedList(page_func, int_or_none(self._search_regex(
r'from:(\d+)">[^<]+\s*&raquo;', webpage, 'last page number', default=1)), 24),
playlist_id, self._search_regex(
r'<h2 class="h3-md mb-1">([^<]+)', webpage, 'playlist title', default=None))

View File

@ -51,25 +51,30 @@ class ZattooPlatformBaseIE(InfoExtractor):
self._power_guide_hash = data['session']['power_guide_hash']
def _initialize_pre_login(self):
webpage = self._download_webpage(
self._host_url(), None, 'Downloading app token')
app_token = self._html_search_regex(
r'appToken\s*=\s*(["\'])(?P<token>(?:(?!\1).)+?)\1',
webpage, 'app token', group='token')
app_version = self._html_search_regex(
r'<!--\w+-(.+?)-', webpage, 'app version', default='2.8.2')
session_token = self._download_json(
f'{self._host_url()}/token.json', None, 'Downloading session token')['session_token']
# Will setup appropriate cookies
self._request_webpage(
'%s/zapi/v2/session/hello' % self._host_url(), None,
'%s/zapi/v3/session/hello' % self._host_url(), None,
'Opening session', data=urlencode_postdata({
'client_app_token': app_token,
'uuid': compat_str(uuid4()),
'lang': 'en',
'app_version': app_version,
'app_version': '1.8.2',
'format': 'json',
'client_app_token': session_token,
}))
def _extract_video_id_from_recording(self, recid):
playlist = self._download_json(
f'{self._host_url()}/zapi/v2/playlist', recid, 'Downloading playlist')
try:
return next(
str(item['program_id']) for item in playlist['recordings']
if item.get('program_id') and str(item.get('id')) == recid)
except (StopIteration, KeyError):
raise ExtractorError('Could not extract video id from recording')
def _extract_cid(self, video_id, channel_name):
channel_groups = self._download_json(
'%s/zapi/v2/cached/channels/%s' % (self._host_url(),
@ -118,7 +123,26 @@ class ZattooPlatformBaseIE(InfoExtractor):
return cid, info_dict
def _extract_formats(self, cid, video_id, record_id=None, is_live=False):
def _extract_ondemand_info(self, ondemand_id):
"""
@returns (ondemand_token, ondemand_type, info_dict)
"""
data = self._download_json(
'%s/zapi/vod/movies/%s' % (self._host_url(), ondemand_id),
ondemand_id, 'Downloading ondemand information')
info_dict = {
'id': ondemand_id,
'title': data.get('title'),
'description': data.get('description'),
'duration': int_or_none(data.get('duration')),
'release_year': int_or_none(data.get('year')),
'episode_number': int_or_none(data.get('episode_number')),
'season_number': int_or_none(data.get('season_number')),
'categories': try_get(data, lambda x: x['categories'], list),
}
return data['terms_catalog'][0]['terms'][0]['token'], data['type'], info_dict
def _extract_formats(self, cid, video_id, record_id=None, ondemand_id=None, ondemand_termtoken=None, ondemand_type=None, is_live=False):
postdata_common = {
'https_watch_urls': True,
}
@ -128,11 +152,18 @@ class ZattooPlatformBaseIE(InfoExtractor):
url = '%s/zapi/watch/live/%s' % (self._host_url(), cid)
elif record_id:
url = '%s/zapi/watch/recording/%s' % (self._host_url(), record_id)
elif ondemand_id:
postdata_common.update({
'teasable_id': ondemand_id,
'term_token': ondemand_termtoken,
'teasable_type': ondemand_type
})
url = '%s/zapi/watch/vod/video' % self._host_url()
else:
url = '%s/zapi/watch/recall/%s/%s' % (self._host_url(), cid, video_id)
url = '%s/zapi/v3/watch/replay/%s/%s' % (self._host_url(), cid, video_id)
formats = []
for stream_type in ('dash', 'hls', 'hls5', 'hds'):
subtitles = {}
for stream_type in ('dash', 'hls7'):
postdata = postdata_common.copy()
postdata['stream_type'] = stream_type
@ -156,14 +187,16 @@ class ZattooPlatformBaseIE(InfoExtractor):
audio_channel = watch.get('audio_channel')
preference = 1 if audio_channel == 'A' else None
format_id = join_nonempty(stream_type, watch.get('maxrate'), audio_channel)
if stream_type in ('dash', 'dash_widevine', 'dash_playready'):
this_formats = self._extract_mpd_formats(
if stream_type.startswith('dash'):
this_formats, subs = self._extract_mpd_formats_and_subtitles(
watch_url, video_id, mpd_id=format_id, fatal=False)
elif stream_type in ('hls', 'hls5', 'hls5_fairplay'):
this_formats = self._extract_m3u8_formats(
self._merge_subtitles(subs, target=subtitles)
elif stream_type.startswith('hls'):
this_formats, subs = self._extract_m3u8_formats_and_subtitles(
watch_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id=format_id,
fatal=False)
self._merge_subtitles(subs, target=subtitles)
elif stream_type == 'hds':
this_formats = self._extract_f4m_formats(
watch_url, video_id, f4m_id=format_id, fatal=False)
@ -176,109 +209,131 @@ class ZattooPlatformBaseIE(InfoExtractor):
this_format['quality'] = preference
formats.extend(this_formats)
self._sort_formats(formats)
return formats
return formats, subtitles
def _extract_video(self, channel_name, video_id, record_id=None, is_live=False):
if is_live:
cid = self._extract_cid(video_id, channel_name)
info_dict = {
'id': channel_name,
'title': channel_name,
'is_live': True,
}
else:
cid, info_dict = self._extract_cid_and_video_info(video_id)
formats = self._extract_formats(
cid, video_id, record_id=record_id, is_live=is_live)
info_dict['formats'] = formats
def _extract_video(self, video_id, record_id=None):
cid, info_dict = self._extract_cid_and_video_info(video_id)
info_dict['formats'], info_dict['subtitles'] = self._extract_formats(cid, video_id, record_id=record_id)
return info_dict
def _extract_live(self, channel_name):
cid = self._extract_cid(channel_name, channel_name)
formats, subtitles = self._extract_formats(cid, cid, is_live=True)
return {
'id': channel_name,
'title': channel_name,
'is_live': True,
'format': formats,
'subtitles': subtitles
}
def _extract_record(self, record_id):
video_id = self._extract_video_id_from_recording(record_id)
cid, info_dict = self._extract_cid_and_video_info(video_id)
info_dict['formats'], info_dict['subtitles'] = self._extract_formats(cid, video_id, record_id=record_id)
return info_dict
def _extract_ondemand(self, ondemand_id):
ondemand_termtoken, ondemand_type, info_dict = self._extract_ondemand_info(ondemand_id)
info_dict['formats'], info_dict['subtitles'] = self._extract_formats(
None, ondemand_id, ondemand_id=ondemand_id,
ondemand_termtoken=ondemand_termtoken, ondemand_type=ondemand_type)
return info_dict
class QuicklineBaseIE(ZattooPlatformBaseIE):
_NETRC_MACHINE = 'quickline'
_HOST = 'mobiltv.quickline.com'
class QuicklineIE(QuicklineBaseIE):
_VALID_URL = r'https?://(?:www\.)?%s/watch/(?P<channel>[^/]+)/(?P<id>[0-9]+)' % re.escape(QuicklineBaseIE._HOST)
_TEST = {
'url': 'https://mobiltv.quickline.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste',
'only_matching': True,
}
def _real_extract(self, url):
channel_name, video_id = self._match_valid_url(url).groups()
return self._extract_video(channel_name, video_id)
class QuicklineLiveIE(QuicklineBaseIE):
_VALID_URL = r'https?://(?:www\.)?%s/watch/(?P<id>[^/]+)' % re.escape(QuicklineBaseIE._HOST)
_TEST = {
'url': 'https://mobiltv.quickline.com/watch/srf1',
'only_matching': True,
}
@classmethod
def suitable(cls, url):
return False if QuicklineIE.suitable(url) else super(QuicklineLiveIE, cls).suitable(url)
def _real_extract(self, url):
channel_name = video_id = self._match_id(url)
return self._extract_video(channel_name, video_id, is_live=True)
def _make_valid_url(host):
return rf'https?://(?:www\.)?{re.escape(host)}/watch/[^/]+?/(?P<id>[0-9]+)[^/]+(?:/(?P<recid>[0-9]+))?'
class ZattooBaseIE(ZattooPlatformBaseIE):
_NETRC_MACHINE = 'zattoo'
_HOST = 'zattoo.com'
@staticmethod
def _create_valid_url(match, qs, base_re=None):
match_base = fr'|{base_re}/(?P<vid1>{match})' if base_re else '(?P<vid1>)'
return rf'''(?x)https?://(?:www\.)?zattoo\.com/(?:
[^?#]+\?(?:[^#]+&)?{qs}=(?P<vid2>{match})
{match_base}
)'''
def _make_valid_url(tmpl, host):
return tmpl % re.escape(host)
def _real_extract(self, url):
vid1, vid2 = self._match_valid_url(url).group('vid1', 'vid2')
return getattr(self, f'_extract_{self._TYPE}')(vid1 or vid2)
class ZattooIE(ZattooBaseIE):
_VALID_URL_TEMPLATE = r'https?://(?:www\.)?%s/watch/(?P<channel>[^/]+?)/(?P<id>[0-9]+)[^/]+(?:/(?P<recid>[0-9]+))?'
_VALID_URL = _make_valid_url(_VALID_URL_TEMPLATE, ZattooBaseIE._HOST)
# Since regular videos are only available for 7 days and recorded videos
# are only available for a specific user, we cannot have detailed tests.
_VALID_URL = ZattooBaseIE._create_valid_url(r'\d+', 'program', '(?:program|watch)/[^/]+')
_TYPE = 'video'
_TESTS = [{
'url': 'https://zattoo.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste',
'url': 'https://zattoo.com/program/zdf/250170418',
'info_dict': {
'id': '250170418',
'ext': 'mp4',
'title': 'Markus Lanz',
'description': 'md5:e41cb1257de008ca62a73bb876ffa7fc',
'thumbnail': 're:http://images.zattic.com/cms/.+/format_480x360.jpg',
'creator': 'ZDF HD',
'release_year': 2022,
'episode': 'Folge 1655',
'categories': 'count:1',
'tags': 'count:2'
},
'params': {'skip_download': 'm3u8'}
}, {
'url': 'https://zattoo.com/program/daserste/210177916',
'only_matching': True,
}, {
'url': 'https://zattoo.com/watch/srf_zwei/132905652-eishockey-spengler-cup/102791477/1512211800000/1514433500000/92000',
'url': 'https://zattoo.com/guide/german?channel=srf1&program=169860555',
'only_matching': True,
}]
def _real_extract(self, url):
channel_name, video_id, record_id = self._match_valid_url(url).groups()
return self._extract_video(channel_name, video_id, record_id)
class ZattooLiveIE(ZattooBaseIE):
_VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P<id>[^/]+)'
_TEST = {
'url': 'https://zattoo.com/watch/srf1',
_VALID_URL = ZattooBaseIE._create_valid_url(r'[^/?&#]+', 'channel', 'live')
_TYPE = 'live'
_TESTS = [{
'url': 'https://zattoo.com/channels/german?channel=srf_zwei',
'only_matching': True,
}
}, {
'url': 'https://zattoo.com/live/srf1',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return False if ZattooIE.suitable(url) else super(ZattooLiveIE, cls).suitable(url)
def _real_extract(self, url):
channel_name = video_id = self._match_id(url)
return self._extract_video(channel_name, video_id, is_live=True)
return False if ZattooIE.suitable(url) else super().suitable(url)
class NetPlusIE(ZattooIE):
class ZattooMoviesIE(ZattooBaseIE):
_VALID_URL = ZattooBaseIE._create_valid_url(r'\w+', 'movie_id', 'vod/movies')
_TYPE = 'ondemand'
_TESTS = [{
'url': 'https://zattoo.com/vod/movies/7521',
'only_matching': True,
}, {
'url': 'https://zattoo.com/ondemand?movie_id=7521&term_token=9f00f43183269484edde',
'only_matching': True,
}]
class ZattooRecordingsIE(ZattooBaseIE):
_VALID_URL = ZattooBaseIE._create_valid_url(r'\d+', 'recording')
_TYPE = 'record'
_TESTS = [{
'url': 'https://zattoo.com/recordings?recording=193615508',
'only_matching': True,
}, {
'url': 'https://zattoo.com/tc/ptc_recordings_all_recordings?recording=193615420',
'only_matching': True,
}]
class NetPlusIE(ZattooPlatformBaseIE):
_NETRC_MACHINE = 'netplus'
_HOST = 'netplus.tv'
_API_HOST = 'www.%s' % _HOST
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_VALID_URL = _make_valid_url(_HOST)
_TESTS = [{
'url': 'https://www.netplus.tv/watch/abc/123-abc',
@ -286,10 +341,10 @@ class NetPlusIE(ZattooIE):
}]
class MNetTVIE(ZattooIE):
class MNetTVIE(ZattooPlatformBaseIE):
_NETRC_MACHINE = 'mnettv'
_HOST = 'tvplus.m-net.de'
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_VALID_URL = _make_valid_url(_HOST)
_TESTS = [{
'url': 'https://tvplus.m-net.de/watch/abc/123-abc',
@ -297,10 +352,10 @@ class MNetTVIE(ZattooIE):
}]
class WalyTVIE(ZattooIE):
class WalyTVIE(ZattooPlatformBaseIE):
_NETRC_MACHINE = 'walytv'
_HOST = 'player.waly.tv'
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_VALID_URL = _make_valid_url(_HOST)
_TESTS = [{
'url': 'https://player.waly.tv/watch/abc/123-abc',
@ -308,11 +363,11 @@ class WalyTVIE(ZattooIE):
}]
class BBVTVIE(ZattooIE):
class BBVTVIE(ZattooPlatformBaseIE):
_NETRC_MACHINE = 'bbvtv'
_HOST = 'bbv-tv.net'
_API_HOST = 'www.%s' % _HOST
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_VALID_URL = _make_valid_url(_HOST)
_TESTS = [{
'url': 'https://www.bbv-tv.net/watch/abc/123-abc',
@ -320,11 +375,11 @@ class BBVTVIE(ZattooIE):
}]
class VTXTVIE(ZattooIE):
class VTXTVIE(ZattooPlatformBaseIE):
_NETRC_MACHINE = 'vtxtv'
_HOST = 'vtxtv.ch'
_API_HOST = 'www.%s' % _HOST
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_VALID_URL = _make_valid_url(_HOST)
_TESTS = [{
'url': 'https://www.vtxtv.ch/watch/abc/123-abc',
@ -332,22 +387,10 @@ class VTXTVIE(ZattooIE):
}]
class MyVisionTVIE(ZattooIE):
_NETRC_MACHINE = 'myvisiontv'
_HOST = 'myvisiontv.ch'
_API_HOST = 'www.%s' % _HOST
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_TESTS = [{
'url': 'https://www.myvisiontv.ch/watch/abc/123-abc',
'only_matching': True,
}]
class GlattvisionTVIE(ZattooIE):
class GlattvisionTVIE(ZattooPlatformBaseIE):
_NETRC_MACHINE = 'glattvisiontv'
_HOST = 'iptv.glattvision.ch'
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_VALID_URL = _make_valid_url(_HOST)
_TESTS = [{
'url': 'https://iptv.glattvision.ch/watch/abc/123-abc',
@ -355,11 +398,11 @@ class GlattvisionTVIE(ZattooIE):
}]
class SAKTVIE(ZattooIE):
class SAKTVIE(ZattooPlatformBaseIE):
_NETRC_MACHINE = 'saktv'
_HOST = 'saktv.ch'
_API_HOST = 'www.%s' % _HOST
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_VALID_URL = _make_valid_url(_HOST)
_TESTS = [{
'url': 'https://www.saktv.ch/watch/abc/123-abc',
@ -367,10 +410,10 @@ class SAKTVIE(ZattooIE):
}]
class EWETVIE(ZattooIE):
class EWETVIE(ZattooPlatformBaseIE):
_NETRC_MACHINE = 'ewetv'
_HOST = 'tvonline.ewe.de'
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_VALID_URL = _make_valid_url(_HOST)
_TESTS = [{
'url': 'https://tvonline.ewe.de/watch/abc/123-abc',
@ -378,11 +421,11 @@ class EWETVIE(ZattooIE):
}]
class QuantumTVIE(ZattooIE):
class QuantumTVIE(ZattooPlatformBaseIE):
_NETRC_MACHINE = 'quantumtv'
_HOST = 'quantum-tv.com'
_API_HOST = 'www.%s' % _HOST
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_VALID_URL = _make_valid_url(_HOST)
_TESTS = [{
'url': 'https://www.quantum-tv.com/watch/abc/123-abc',
@ -390,10 +433,10 @@ class QuantumTVIE(ZattooIE):
}]
class OsnatelTVIE(ZattooIE):
class OsnatelTVIE(ZattooPlatformBaseIE):
_NETRC_MACHINE = 'osnateltv'
_HOST = 'tvonline.osnatel.de'
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_VALID_URL = _make_valid_url(_HOST)
_TESTS = [{
'url': 'https://tvonline.osnatel.de/watch/abc/123-abc',
@ -401,11 +444,11 @@ class OsnatelTVIE(ZattooIE):
}]
class EinsUndEinsTVIE(ZattooIE):
class EinsUndEinsTVIE(ZattooPlatformBaseIE):
_NETRC_MACHINE = '1und1tv'
_HOST = '1und1.tv'
_API_HOST = 'www.%s' % _HOST
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_VALID_URL = _make_valid_url(_HOST)
_TESTS = [{
'url': 'https://www.1und1.tv/watch/abc/123-abc',
@ -413,10 +456,10 @@ class EinsUndEinsTVIE(ZattooIE):
}]
class SaltTVIE(ZattooIE):
class SaltTVIE(ZattooPlatformBaseIE):
_NETRC_MACHINE = 'salttv'
_HOST = 'tv.salt.ch'
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_VALID_URL = _make_valid_url(_HOST)
_TESTS = [{
'url': 'https://tv.salt.ch/watch/abc/123-abc',