Compare commits

..

6 Commits

Author SHA1 Message Date
Tim Weber
eb2333bce1
[extractor/StarTrek] Add extractor (#4191)
Authored by: scy
2022-07-13 23:59:44 +05:30
u-spec-png
660c0c4efd
[extractor/Trovo] Fix extractor (#4208)
Authored by: u-spec-png
2022-07-13 23:46:47 +05:30
Locke
fe588ce8ef
[extractor/acfun] Add extractors (#4228)
Closes #3545
Authored by: lockmatrix
2022-07-13 23:32:18 +05:30
HobbyistDev
26b92a919d
[extractor/tviplayer] Add extractor (#4281)
Closes #2134
Authored by: HobbyistDev
2022-07-13 23:26:57 +05:30
HobbyistDev
8f47b39b27
[extractor/detik] Add extractor (#4284)
Closes #4283
Authored by: HobbyistDev
2022-07-13 23:25:45 +05:30
llamasblade
2f1b7afe32
[extractor/hytale] Add extractor (#4326)
Authored by: llamasblade, pukkandan
2022-07-13 23:23:22 +05:30
8 changed files with 571 additions and 3 deletions

View File

@ -22,6 +22,7 @@ from .acast import (
ACastIE, ACastIE,
ACastChannelIE, ACastChannelIE,
) )
from .acfun import AcFunVideoIE, AcFunBangumiIE
from .adn import ADNIE from .adn import ADNIE
from .adobeconnect import AdobeConnectIE from .adobeconnect import AdobeConnectIE
from .adobetv import ( from .adobetv import (
@ -381,6 +382,7 @@ from .deezer import (
DeezerAlbumIE, DeezerAlbumIE,
) )
from .democracynow import DemocracynowIE from .democracynow import DemocracynowIE
from .detik import Detik20IE
from .dfb import DFBIE from .dfb import DFBIE
from .dhm import DHMIE from .dhm import DHMIE
from .digg import DiggIE from .digg import DiggIE
@ -665,6 +667,7 @@ from .hungama import (
HungamaAlbumPlaylistIE, HungamaAlbumPlaylistIE,
) )
from .hypem import HypemIE from .hypem import HypemIE
from .hytale import HytaleIE
from .icareus import IcareusIE from .icareus import IcareusIE
from .ichinanalive import ( from .ichinanalive import (
IchinanaLiveIE, IchinanaLiveIE,
@ -1615,6 +1618,7 @@ from .spike import (
BellatorIE, BellatorIE,
ParamountNetworkIE, ParamountNetworkIE,
) )
from .startrek import StarTrekIE
from .stitcher import ( from .stitcher import (
StitcherIE, StitcherIE,
StitcherShowIE, StitcherShowIE,
@ -1832,6 +1836,7 @@ from .tvc import (
) )
from .tver import TVerIE from .tver import TVerIE
from .tvigle import TvigleIE from .tvigle import TvigleIE
from .tviplayer import TVIPlayerIE
from .tvland import TVLandIE from .tvland import TVLandIE
from .tvn24 import TVN24IE from .tvn24 import TVN24IE
from .tvnet import TVNetIE from .tvnet import TVNetIE

200
yt_dlp/extractor/acfun.py Normal file
View File

@ -0,0 +1,200 @@
from .common import InfoExtractor
from ..utils import (
float_or_none,
format_field,
int_or_none,
traverse_obj,
parse_codecs,
parse_qs,
)
class AcFunVideoBaseIE(InfoExtractor):
def _extract_metadata(self, video_id, video_info):
playjson = self._parse_json(video_info['ksPlayJson'], video_id)
formats, subtitles = [], {}
for video in traverse_obj(playjson, ('adaptationSet', 0, 'representation')):
fmts, subs = self._extract_m3u8_formats_and_subtitles(video['url'], video_id, 'mp4', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
for f in fmts:
f.update({
'fps': float_or_none(video.get('frameRate')),
'width': int_or_none(video.get('width')),
'height': int_or_none(video.get('height')),
'tbr': float_or_none(video.get('avgBitrate')),
**parse_codecs(video.get('codecs', ''))
})
self._sort_formats(formats)
return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
'duration': float_or_none(video_info.get('durationMillis'), 1000),
'timestamp': int_or_none(video_info.get('uploadTime'), 1000),
'http_headers': {'Referer': 'https://www.acfun.cn/'},
}
class AcFunVideoIE(AcFunVideoBaseIE):
_VALID_URL = r'https?://www\.acfun\.cn/v/ac(?P<id>[_\d]+)'
_TESTS = [{
'url': 'https://www.acfun.cn/v/ac35457073',
'info_dict': {
'id': '35457073',
'ext': 'mp4',
'duration': 174.208,
'timestamp': 1656403967,
'title': '1 8 岁 现 状',
'description': '“赶紧回去!班主任查班了!”',
'uploader': '锤子game',
'uploader_id': '51246077',
'thumbnail': r're:^https?://.*\.(jpg|jpeg)',
'upload_date': '20220628',
'like_count': int,
'view_count': int,
'comment_count': int,
'tags': list,
},
}, {
# example for len(video_list) > 1
'url': 'https://www.acfun.cn/v/ac35468952_2',
'info_dict': {
'id': '35468952_2',
'ext': 'mp4',
'title': '【动画剧集】Rocket & Groot Season 12022/火箭浣熊与格鲁特第1季 P02 S01E02 十拿九穩',
'duration': 90.459,
'uploader': '比令',
'uploader_id': '37259967',
'upload_date': '20220629',
'timestamp': 1656479962,
'tags': list,
'like_count': int,
'view_count': int,
'comment_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg)',
'description': 'md5:67583aaf3a0f933bd606bc8a2d3ebb17',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
json_all = self._search_json(r'window.videoInfo\s*=\s*', webpage, 'videoInfo', video_id)
title = json_all.get('title')
video_list = json_all.get('videoList') or []
video_internal_id = traverse_obj(json_all, ('currentVideoInfo', 'id'))
if video_internal_id and len(video_list) > 1:
part_idx, part_video_info = next(
(idx + 1, v) for (idx, v) in enumerate(video_list)
if v['id'] == video_internal_id)
title = f'{title} P{part_idx:02d} {part_video_info["title"]}'
return {
**self._extract_metadata(video_id, json_all['currentVideoInfo']),
'title': title,
'thumbnail': json_all.get('coverUrl'),
'description': json_all.get('description'),
'uploader': traverse_obj(json_all, ('user', 'name')),
'uploader_id': traverse_obj(json_all, ('user', 'href')),
'tags': traverse_obj(json_all, ('tagList', ..., 'name')),
'view_count': int_or_none(json_all.get('viewCount')),
'like_count': int_or_none(json_all.get('likeCountShow')),
'comment_count': int_or_none(json_all.get('commentCountShow')),
}
class AcFunBangumiIE(AcFunVideoBaseIE):
_VALID_URL = r'https?://www\.acfun\.cn/bangumi/(?P<id>aa[_\d]+)'
_TESTS = [{
'url': 'https://www.acfun.cn/bangumi/aa6002917_36188_1745457?ac=2',
'info_dict': {
'id': 'aa6002917_36188_1745457__2',
'ext': 'mp4',
'title': '【7月】租借女友 水原千鹤角色曲『DATE』特别PV',
'upload_date': '20200916',
'timestamp': 1600243813,
'duration': 92.091,
},
}, {
'url': 'https://www.acfun.cn/bangumi/aa5023171_36188_1750645',
'info_dict': {
'id': 'aa5023171_36188_1750645',
'ext': 'mp4',
'title': '红孩儿之趴趴蛙寻石记 第5话 ',
'duration': 760.0,
'season': '红孩儿之趴趴蛙寻石记',
'season_id': 5023171,
'season_number': 1, # series has only 1 season
'episode': 'Episode 5',
'episode_number': 5,
'upload_date': '20181223',
'timestamp': 1545552185,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
'comment_count': int,
},
}, {
'url': 'https://www.acfun.cn/bangumi/aa6065485_36188_1885061',
'info_dict': {
'id': 'aa6065485_36188_1885061',
'ext': 'mp4',
'title': '叽歪老表(第二季) 第5话 坚不可摧',
'season': '叽歪老表(第二季)',
'season_number': 2,
'season_id': 6065485,
'episode': '坚不可摧',
'episode_number': 5,
'upload_date': '20220324',
'timestamp': 1648082786,
'duration': 105.002,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
'comment_count': int,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
ac_idx = parse_qs(url).get('ac', [None])[-1]
video_id = f'{video_id}{format_field(ac_idx, template="__%s")}'
webpage = self._download_webpage(url, video_id)
json_bangumi_data = self._search_json(r'window.bangumiData\s*=\s*', webpage, 'bangumiData', video_id)
if ac_idx:
video_info = json_bangumi_data['hlVideoInfo']
return {
**self._extract_metadata(video_id, video_info),
'title': video_info.get('title'),
}
video_info = json_bangumi_data['currentVideoInfo']
season_id = json_bangumi_data.get('bangumiId')
season_number = season_id and next((
idx for idx, v in enumerate(json_bangumi_data.get('relatedBangumis') or [], 1)
if v.get('id') == season_id), 1)
json_bangumi_list = self._search_json(
r'window\.bangumiList\s*=\s*', webpage, 'bangumiList', video_id, fatal=False)
video_internal_id = int_or_none(traverse_obj(json_bangumi_data, ('currentVideoInfo', 'id')))
episode_number = video_internal_id and next((
idx for idx, v in enumerate(json_bangumi_list.get('items') or [], 1)
if v.get('videoId') == video_internal_id), None)
return {
**self._extract_metadata(video_id, video_info),
'title': json_bangumi_data.get('showTitle'),
'thumbnail': json_bangumi_data.get('image'),
'season': json_bangumi_data.get('bangumiTitle'),
'season_id': season_id,
'season_number': season_number,
'episode': json_bangumi_data.get('title'),
'episode_number': episode_number,
'comment_count': int_or_none(json_bangumi_data.get('commentCount')),
}

View File

@ -1506,7 +1506,7 @@ class InfoExtractor:
'url': url_or_none(e.get('contentUrl')), 'url': url_or_none(e.get('contentUrl')),
'title': unescapeHTML(e.get('name')), 'title': unescapeHTML(e.get('name')),
'description': unescapeHTML(e.get('description')), 'description': unescapeHTML(e.get('description')),
'thumbnails': [{'url': url} 'thumbnails': [{'url': unescapeHTML(url)}
for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL')) for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL'))
if url_or_none(url)], if url_or_none(url)],
'duration': parse_duration(e.get('duration')), 'duration': parse_duration(e.get('duration')),

122
yt_dlp/extractor/detik.py Normal file
View File

@ -0,0 +1,122 @@
from .common import InfoExtractor
from ..utils import merge_dicts, str_or_none
class Detik20IE(InfoExtractor):
IE_NAME = '20.detik.com'
_VALID_URL = r'https?://20\.detik\.com/((?!program)[\w-]+)/[\d-]+/(?P<id>[\w-]+)'
_TESTS = [{
# detikflash
'url': 'https://20.detik.com/detikflash/20220705-220705098/zulhas-klaim-sukses-turunkan-harga-migor-jawa-bali',
'info_dict': {
'id': '220705098',
'ext': 'mp4',
'duration': 157,
'thumbnail': 'https://cdnv.detik.com/videoservice/AdminTV/2022/07/05/bfe0384db04f4bbb9dd5efc869c5d4b1-20220705164334-0s.jpg?w=650&q=80',
'description': 'md5:ac18dcee5b107abbec1ed46e0bf400e3',
'title': 'Zulhas Klaim Sukses Turunkan Harga Migor Jawa-Bali',
'tags': ['zulkifli hasan', 'menteri perdagangan', 'minyak goreng'],
'timestamp': 1657039548,
'upload_date': '20220705'
}
}, {
# e-flash
'url': 'https://20.detik.com/e-flash/20220705-220705109/ahli-level-ppkm-jadi-payung-strategi-protokol-kesehatan',
'info_dict': {
'id': '220705109',
'ext': 'mp4',
'tags': ['ppkm jabodetabek', 'dicky budiman', 'ppkm'],
'upload_date': '20220705',
'duration': 110,
'title': 'Ahli: Level PPKM Jadi Payung Strategi Protokol Kesehatan',
'thumbnail': 'https://cdnv.detik.com/videoservice/AdminTV/2022/07/05/Ahli-_Level_PPKM_Jadi_Payung_Strat_jOgUMCN-20220705182313-custom.jpg?w=650&q=80',
'description': 'md5:4eb825a9842e6bdfefd66f47b364314a',
'timestamp': 1657045255,
}
}, {
# otobuzz
'url': 'https://20.detik.com/otobuzz/20220704-220704093/mulai-rp-10-jutaan-ini-skema-kredit-mitsubishi-pajero-sport',
'info_dict': {
'id': '220704093',
'ext': 'mp4',
'tags': ['cicilan mobil', 'mitsubishi pajero sport', 'mitsubishi', 'pajero sport'],
'timestamp': 1656951521,
'duration': 83,
'upload_date': '20220704',
'thumbnail': 'https://cdnv.detik.com/videoservice/AdminTV/2022/07/04/5d6187e402ec4a91877755a5886ff5b6-20220704161859-0s.jpg?w=650&q=80',
'description': 'md5:9b2257341b6f375cdcf90106146d5ffb',
'title': 'Mulai Rp 10 Jutaan! Ini Skema Kredit Mitsubishi Pajero Sport',
}
}, {
# sport-buzz
'url': 'https://20.detik.com/sport-buzz/20220704-220704054/crash-crash-horor-di-paruh-pertama-motogp-2022',
'info_dict': {
'id': '220704054',
'ext': 'mp4',
'thumbnail': 'https://cdnv.detik.com/videoservice/AdminTV/2022/07/04/6b172c6fb564411996ea145128315630-20220704090746-0s.jpg?w=650&q=80',
'title': 'Crash-crash Horor di Paruh Pertama MotoGP 2022',
'description': 'md5:fbcc6687572ad7d16eb521b76daa50e4',
'timestamp': 1656925591,
'duration': 107,
'tags': ['marc marquez', 'fabio quartararo', 'francesco bagnaia', 'motogp crash', 'motogp 2022'],
'upload_date': '20220704',
}
}, {
# adu-perspektif
'url': 'https://20.detik.com/adu-perspektif/20220518-220518144/24-tahun-reformasi-dan-alarm-demokrasi-dari-filipina',
'info_dict': {
'id': '220518144',
'ext': 'mp4',
'title': '24 Tahun Reformasi dan Alarm Demokrasi dari Filipina',
'upload_date': '20220518',
'timestamp': 1652913823,
'duration': 185.0,
'tags': ['politik', 'adu perspektif', 'indonesia', 'filipina', 'demokrasi'],
'description': 'md5:8eaaf440b839c3d02dca8c9bbbb099a9',
'thumbnail': 'https://cdnv.detik.com/videoservice/AdminTV/2022/05/18/adpers_18_mei_compressed-20220518230458-custom.jpg?w=650&q=80',
}
}, {
# sosok
'url': 'https://20.detik.com/sosok/20220702-220703032/resa-boenard-si-princess-bantar-gebang',
'info_dict': {
'id': '220703032',
'ext': 'mp4',
'timestamp': 1656824438,
'thumbnail': 'https://cdnv.detik.com/videoservice/AdminTV/2022/07/02/SOSOK_BGBJ-20220702191138-custom.jpg?w=650&q=80',
'title': 'Resa Boenard Si \'Princess Bantar Gebang\'',
'description': 'md5:84ea66306a0285330de6a13fc6218b78',
'tags': ['sosok', 'sosok20d', 'bantar gebang', 'bgbj', 'resa boenard', 'bantar gebang bgbj', 'bgbj bantar gebang', 'sosok bantar gebang', 'sosok bgbj', 'bgbj resa boenard'],
'upload_date': '20220703',
'duration': 650,
}
}, {
# viral
'url': 'https://20.detik.com/viral/20220603-220603135/merasakan-bus-imut-tanpa-pengemudi-muter-muter-di-kawasan-bsd-city',
'info_dict': {
'id': '220603135',
'ext': 'mp4',
'description': 'md5:4771fe101aa303edb829c59c26f9e7c6',
'timestamp': 1654304305,
'title': 'Merasakan Bus Imut Tanpa Pengemudi, Muter-muter di Kawasan BSD City',
'tags': ['viral', 'autonomous vehicle', 'electric', 'shuttle bus'],
'thumbnail': 'https://cdnv.detik.com/videoservice/AdminTV/2022/06/03/VIRAL_BUS_NO_SUPIR-20220604004707-custom.jpg?w=650&q=80',
'duration': 593,
'upload_date': '20220604',
}
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
json_ld_data = self._search_json_ld(webpage, display_id)
video_url = self._html_search_regex(
r'videoUrl\s*:\s*"(?P<video_url>[^"]+)', webpage, 'videoUrl')
formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, display_id, ext='mp4')
return merge_dicts(json_ld_data, {
'id': self._html_search_meta('video_id', webpage),
'formats': formats,
'subtitles': subtitles,
'tags': str_or_none(self._html_search_meta(['keywords', 'keyword', 'dtk:keywords'], webpage), '').split(','),
})

View File

@ -0,0 +1,58 @@
import re
from .common import InfoExtractor
from ..utils import traverse_obj
class HytaleIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?hytale\.com/news/\d+/\d+/(?P<id>[a-z0-9-]+)'
_TESTS = [{
'url': 'https://hytale.com/news/2021/07/summer-2021-development-update',
'info_dict': {
'id': 'summer-2021-development-update',
'title': 'Summer 2021 Development Update',
},
'playlist_count': 4,
'playlist': [{
'md5': '0854ebe347d233ee19b86ab7b2ead610',
'info_dict': {
'id': 'ed51a2609d21bad6e14145c37c334999',
'ext': 'mp4',
'title': 'Avatar Personalization',
'thumbnail': r're:https://videodelivery\.net/\w+/thumbnails/thumbnail\.jpg',
}
}]
}, {
'url': 'https://www.hytale.com/news/2019/11/hytale-graphics-update',
'info_dict': {
'id': 'hytale-graphics-update',
'title': 'Hytale graphics update',
},
'playlist_count': 2,
}]
def _real_initialize(self):
media_webpage = self._download_webpage(
'https://hytale.com/media', None, note='Downloading list of media', fatal=False) or ''
clips_json = traverse_obj(
self._search_json(
r'window\.__INITIAL_COMPONENTS_STATE__\s*=\s*\[',
media_webpage, 'clips json', None),
('media', 'clips')) or []
self._titles = {clip.get('src'): clip.get('caption') for clip in clips_json}
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
entries = [
self.url_result(
f'https://cloudflarestream.com/{video_hash}/manifest/video.mpd?parentOrigin=https%3A%2F%2Fhytale.com',
title=self._titles.get(video_hash), url_transparent=True)
for video_hash in re.findall(
r'<stream\s+class\s*=\s*"ql-video\s+cf-stream"\s+src\s*=\s*"([a-f0-9]{32})"',
webpage)
]
return self.playlist_result(entries, playlist_id, self._og_search_title(webpage))

View File

@ -0,0 +1,76 @@
from .common import InfoExtractor
from ..utils import int_or_none, urljoin
class StarTrekIE(InfoExtractor):
_VALID_URL = r'(?P<base>https?://(?:intl|www)\.startrek\.com)/videos/(?P<id>[^/]+)'
_TESTS = [{
'url': 'https://intl.startrek.com/videos/watch-welcoming-jess-bush-to-the-ready-room',
'md5': '491df5035c9d4dc7f63c79caaf9c839e',
'info_dict': {
'id': 'watch-welcoming-jess-bush-to-the-ready-room',
'ext': 'mp4',
'title': 'WATCH: Welcoming Jess Bush to The Ready Room',
'duration': 1888,
'timestamp': 1655388000,
'upload_date': '20220616',
'description': 'md5:1ffee884e3920afbdd6dd04e926a1221',
'thumbnail': r're:https://(?:intl|www)\.startrek\.com/sites/default/files/styles/video_1920x1080/public/images/2022-06/pp_14794_rr_thumb_107_yt_16x9\.jpg(?:\?.+)?',
'subtitles': {'en-US': [{
'url': r're:https://(?:intl|www)\.startrek\.com/sites/default/files/video/captions/2022-06/TRR_SNW_107_v4\.vtt',
}, {
'url': 'https://media.startrek.com/2022/06/16/2043801155561/1069981_hls/trr_snw_107_v4-c4bfc25d/stream_vtt.m3u8',
}]},
}
}, {
'url': 'https://www.startrek.com/videos/watch-ethan-peck-and-gia-sandhu-beam-down-to-the-ready-room',
'md5': 'f5ad74fbb86e91e0882fc0a333178d1d',
'info_dict': {
'id': 'watch-ethan-peck-and-gia-sandhu-beam-down-to-the-ready-room',
'ext': 'mp4',
'title': 'WATCH: Ethan Peck and Gia Sandhu Beam Down to The Ready Room',
'duration': 1986,
'timestamp': 1654221600,
'upload_date': '20220603',
'description': 'md5:b3aa0edacfe119386567362dec8ed51b',
'thumbnail': r're:https://www\.startrek\.com/sites/default/files/styles/video_1920x1080/public/images/2022-06/pp_14792_rr_thumb_105_yt_16x9_1.jpg(?:\?.+)?',
'subtitles': {'en-US': [{
'url': r're:https://(?:intl|www)\.startrek\.com/sites/default/files/video/captions/2022-06/TRR_SNW_105_v5\.vtt',
}]},
}
}]
def _real_extract(self, url):
urlbase, video_id = self._match_valid_url(url).group('base', 'id')
webpage = self._download_webpage(url, video_id)
player = self._search_regex(
r'(<\s*div\s+id\s*=\s*"cvp-player-[^<]+<\s*/div\s*>)', webpage, 'player')
hls = self._html_search_regex(r'\bdata-hls\s*=\s*"([^"]+)"', player, 'HLS URL')
formats, subtitles = self._extract_m3u8_formats_and_subtitles(hls, video_id, 'mp4')
self._sort_formats(formats)
captions = self._html_search_regex(
r'\bdata-captions-url\s*=\s*"([^"]+)"', player, 'captions URL', fatal=False)
if captions:
subtitles.setdefault('en-US', [])[:0] = [{'url': urljoin(urlbase, captions)}]
# NB: Most of the data in the json_ld is undesirable
json_ld = self._search_json_ld(webpage, video_id, fatal=False)
return {
'id': video_id,
'title': self._html_search_regex(
r'\bdata-title\s*=\s*"([^"]+)"', player, 'title', json_ld.get('title')),
'description': self._html_search_regex(
r'(?s)<\s*div\s+class\s*=\s*"header-body"\s*>(.+?)<\s*/div\s*>',
webpage, 'description', fatal=False),
'duration': int_or_none(self._html_search_regex(
r'\bdata-duration\s*=\s*"(\d+)"', player, 'duration', fatal=False)),
'formats': formats,
'subtitles': subtitles,
'thumbnail': urljoin(urlbase, self._html_search_regex(
r'\bdata-poster-url\s*=\s*"([^"]+)"', player, 'thumbnail', fatal=False)),
'timestamp': json_ld.get('timestamp'),
}

View File

@ -43,7 +43,27 @@ class TrovoBaseIE(InfoExtractor):
class TrovoIE(TrovoBaseIE): class TrovoIE(TrovoBaseIE):
_VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?!(?:clip|video)/)(?P<id>[^/?&#]+)' _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:s/)?(?!(?:clip|video)/)(?P<id>(?!s/)[^/?&#]+(?![^#]+[?&]vid=))'
_TESTS = [{
'url': 'https://trovo.live/Exsl',
'only_matching': True,
}, {
'url': 'https://trovo.live/s/SkenonSLive/549759191497',
'only_matching': True,
}, {
'url': 'https://trovo.live/s/zijo987/208251706',
'info_dict': {
'id': '104125853_104125853_1656439572',
'ext': 'flv',
'uploader_url': 'https://trovo.live/zijo987',
'uploader_id': '104125853',
'thumbnail': 'https://livecover.trovo.live/screenshot/73846_104125853_104125853-2022-06-29-04-00-22-852x480.jpg',
'uploader': 'zijo987',
'title': '💥IGRAMO IGRICE UPADAJTE💥2500/5000 2022-06-28 22:01',
'live_status': 'is_live',
},
'skip': 'May not be live'
}]
def _real_extract(self, url): def _real_extract(self, url):
username = self._match_id(url) username = self._match_id(url)
@ -71,6 +91,7 @@ class TrovoIE(TrovoBaseIE):
'format_id': format_id, 'format_id': format_id,
'height': int_or_none(format_id[:-1]) if format_id else None, 'height': int_or_none(format_id[:-1]) if format_id else None,
'url': play_url, 'url': play_url,
'tbr': stream_info.get('bitrate'),
'http_headers': self._HEADERS, 'http_headers': self._HEADERS,
}) })
self._sort_formats(formats) self._sort_formats(formats)
@ -87,7 +108,7 @@ class TrovoIE(TrovoBaseIE):
class TrovoVodIE(TrovoBaseIE): class TrovoVodIE(TrovoBaseIE):
_VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:clip|video)/(?P<id>[^/?&#]+)' _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:clip|video|s)/(?:[^/]+/\d+[^#]*[?&]vid=)?(?P<id>(?<!/s/)[^/?&#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://trovo.live/clip/lc-5285890818705062210?ltab=videos', 'url': 'https://trovo.live/clip/lc-5285890818705062210?ltab=videos',
'params': {'getcomments': True}, 'params': {'getcomments': True},
@ -108,9 +129,30 @@ class TrovoVodIE(TrovoBaseIE):
'uploader_url': 'https://trovo.live/OneTappedYou', 'uploader_url': 'https://trovo.live/OneTappedYou',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
}, },
}, {
'url': 'https://trovo.live/s/SkenonSLive/549759191497?vid=ltv-100829718_100829718_387702301737980280',
'info_dict': {
'id': 'ltv-100829718_100829718_387702301737980280',
'ext': 'mp4',
'timestamp': 1654909624,
'thumbnail': 'http://vod.trovo.live/1f09baf0vodtransger1301120758/ef9ea3f0387702301737980280/coverBySnapshot/coverBySnapshot_10_0.jpg',
'uploader_id': '100829718',
'uploader': 'SkenonSLive',
'title': 'Trovo u secanju, uz par modova i muzike :)',
'uploader_url': 'https://trovo.live/SkenonSLive',
'duration': 10830,
'view_count': int,
'like_count': int,
'upload_date': '20220611',
'comment_count': int,
'categories': ['Minecraft'],
}
}, { }, {
'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043', 'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://trovo.live/s/SkenonSLive/549759191497?foo=bar&vid=ltv-100829718_100829718_387702301737980280',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -0,0 +1,65 @@
from .common import InfoExtractor
from ..utils import traverse_obj
class TVIPlayerIE(InfoExtractor):
_VALID_URL = r'https?://tviplayer\.iol\.pt(/programa/[\w-]+/[a-f0-9]+)?/video/(?P<id>[a-f0-9]+)'
_TESTS = [{
'url': 'https://tviplayer.iol.pt/programa/jornal-das-8/53c6b3903004dc006243d0cf/video/61c8e8b90cf2c7ea0f0f71a9',
'info_dict': {
'id': '61c8e8b90cf2c7ea0f0f71a9',
'ext': 'mp4',
'duration': 4167,
'title': 'Jornal das 8 - 26 de dezembro de 2021',
'thumbnail': 'https://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/61c8ee630cf2cc58e7d98d9f/',
'season_number': 8,
'season': 'Season 8',
}
}, {
'url': 'https://tviplayer.iol.pt/programa/isabel/62b471090cf26256cd2a8594/video/62be445f0cf2ea4f0a5218e5',
'info_dict': {
'id': '62be445f0cf2ea4f0a5218e5',
'ext': 'mp4',
'duration': 3255,
'season': 'Season 1',
'title': 'Isabel - Episódio 1',
'thumbnail': 'https://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/62beac200cf2f9a86eab856b/',
'season_number': 1,
}
}, {
'url': 'https://tviplayer.iol.pt/video/62c4131c0cf2f9a86eac06bb',
'info_dict': {
'id': '62c4131c0cf2f9a86eac06bb',
'ext': 'mp4',
'title': 'David e Mickael Carreira respondem: «Qual é o próximo a ser pai?»',
'thumbnail': 'https://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/62c416490cf2ea367d4433fd/',
'season': 'Season 2',
'duration': 148,
'season_number': 2,
}
}]
def _real_initialize(self):
self.wms_auth_sign_token = self._download_webpage(
'https://services.iol.pt/matrix?userId=', 'wmsAuthSign',
note='Trying to get wmsAuthSign token')
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
json_data = self._search_json(
r'<script>\s*jsonData\s*=\s*', webpage, 'json_data', video_id)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
f'{json_data["videoUrl"]}?wmsAuthSign={self.wms_auth_sign_token}',
video_id, ext='mp4')
return {
'id': video_id,
'title': json_data.get('title') or self._og_search_title(webpage),
'thumbnail': json_data.get('cover') or self._og_search_thumbnail(webpage),
'duration': json_data.get('duration'),
'formats': formats,
'subtitles': subtitles,
'season_number': traverse_obj(json_data, ('program', 'seasonNum')),
}