Compare commits

...

26 Commits

Author SHA1 Message Date
Laurent FAVOLE
581dc9dff3
Merge 0588bd7c82 into a06bb58679 2024-09-16 01:27:01 +05:30
Khaoklong51
a06bb58679
[ie/BiliIntl] Fix referer header (#11003)
Closes #10996
Authored by: Khaoklong51
2024-09-14 16:19:17 +00:00
bashonly
a555389c9b
[ie/HGTVDe] Fix extractor (#10992)
Closes #10984
Authored by: bashonly, rdamas

Co-authored-by: Robert Damas <robert.damas@byom.de>
2024-09-14 00:23:22 +00:00
N/Ame
173d54c151
[ie/kick:vod] Support new URL format (#10988)
Closes #10975
Authored by: grqz, bashonly

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2024-09-14 00:21:07 +00:00
Oto Valek
4a27b8f092
[ie/IPrima] Fix zoom URL support (#10959)
Closes #6100
Authored by: otovalek
2024-09-14 00:19:03 +00:00
sepro
41a241ca6f
[ie/Sen] Add extractor (#10952)
Closes #10951
Authored by: seproDev
2024-09-14 00:16:34 +00:00
sepro
3aa0156e05
[ie/Xinpianchang] Fix extractor (#10950)
Authored by: seproDev
2024-09-14 00:15:07 +00:00
sepro
300c91274f
[ie/Servus] Fix extractor (#10944)
Closes #10941
Authored by: seproDev
2024-09-14 00:14:09 +00:00
aarubui
d8d473002b
[ie/tenplay] Fix extractor (#10928)
Closes #10926
Authored by: aarubui
2024-09-14 00:09:15 +00:00
naglis
36f9e602ad
[ie/screenrec] Add extractor (#10917)
Closes #9780
Authored by: naglis
2024-09-13 23:27:10 +00:00
ischmidt20
7adff8caf1
[ie/WatchESPN] Improve auth support (#10910)
Authored by: ischmidt20
2024-09-13 23:25:12 +00:00
naglis
fa83d0b36b
[ie/LnkGo] Remove extractor (#10904)
Authored by: naglis
2024-09-13 23:23:19 +00:00
Sahil Singh
c8c078fe28
[ie/pinterest] Extend _VALID_URL (#10867)
Closes #10850
Authored by: sahilsinghss73, bashonly

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2024-09-13 23:22:14 +00:00
bashonly
325001317d
[ie] Handle decode errors when reading responses (#10868)
Authored by: bashonly
2024-09-13 23:20:17 +00:00
bashonly
cc85596d5b
[utils] mimetype2ext: Recognize aacp as aac (#10860)
Authored by: bashonly
2024-09-13 23:19:18 +00:00
Leng
0e1b941c6b
[ie/facebook:reel] Improve metadata extraction
Closes #9057, Closes #10824
Authored by: lengzuo
2024-09-13 23:18:13 +00:00
Xingchen Song(宋星辰)
3dfd720d09
[ie/ximalaya] Add VIP support (#10832)
Closes #6928
Authored by: xingchensong, seproDev

Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
2024-09-13 23:16:34 +00:00
hugepower
25c1cdaa26
[ie/huya:video] Add extractor (#10686)
Closes #10679
Authored by: hugepower
2024-09-13 23:12:38 +00:00
Cosmin Tanislav
d02df303d8
[ie/RTP] Support more subpages (#10787)
Authored by: Demon000
2024-09-13 23:09:52 +00:00
Scott Robinson
5d0176547f
[ie/Bandcamp:user] Fix extraction (#10328)
Authored by: quad, bashonly

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
2024-09-13 23:02:54 +00:00
sepro
409f8e9e3b
[ie] Fix JW Player format parsing (#10956)
Authored by: seproDev
2024-09-13 22:54:41 +00:00
Deukhoofd
b4760c778d
[ie/beacon] Add extractor (#9901)
Authored by: Deukhoofd
2024-09-13 22:50:15 +00:00
sepro
9431777b4c
[ie/youtube:tab] Fix shorts tab extraction (#10938)
Closes #10936
Authored by: seproDev
2024-09-13 22:46:44 +00:00
sepro
3a3bd00037
[ie/youtube] Add po_token, visitor_data, data_sync_id extractor args (#10648)
Authored by:  seproDev, coletdjnz, bashonly
2024-09-13 22:51:58 +12:00
bashonly
0588bd7c82
Merge branch 'master' into digiview-extractor 2024-05-30 22:36:11 -05:00
Laurent FAVOLE
205826121d
Add Digiview extractor 2024-05-10 16:01:57 +02:00
24 changed files with 896 additions and 329 deletions

View File

@ -1777,6 +1777,9 @@ The following extractors use this feature:
* `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
* `innertube_key`: Innertube API key to use for all API requests. By default, no API key is used
* `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning
* `data_sync_id`: Overrides the account Data Sync ID used in Innertube API requests. This may be needed if you are using an account with `youtube:player_skip=webpage,configs` or `youtubetab:skip=webpage`
* `visitor_data`: Overrides the Visitor Data used in Innertube API requests. This should be used with `player_skip=webpage,configs` and without cookies. Note: this may have adverse effects if used improperly. If a session from a browser is wanted, you should pass cookies instead (which contain the Visitor ID)
* `po_token`: Proof of Origin (PO) Token(s) to use for requesting video playback. Comma seperated list of PO Tokens in the format `CLIENT+PO_TOKEN`, e.g. `youtube:po_token=web+XXX,android+YYY`
#### youtubetab (YouTube playlists, channels, feeds, etc.)
* `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details)

View File

@ -217,6 +217,7 @@ from .bbc import (
BBCCoUkIPlayerGroupIE,
BBCCoUkPlaylistIE,
)
from .beacon import BeaconTvIE
from .beatbump import (
BeatBumpPlaylistIE,
BeatBumpVideoIE,
@ -503,6 +504,7 @@ from .dfb import DFBIE
from .dhm import DHMIE
from .digitalconcerthall import DigitalConcertHallIE
from .digiteka import DigitekaIE
from .digiview import DigiviewIE
from .discogs import DiscogsReleasePlaylistIE
from .disney import DisneyIE
from .dispeak import DigitallySpeakingIE
@ -822,7 +824,10 @@ from .hungama import (
HungamaIE,
HungamaSongIE,
)
from .huya import HuyaLiveIE
from .huya import (
HuyaLiveIE,
HuyaVideoIE,
)
from .hypem import HypemIE
from .hypergryph import MonsterSirenHypergryphMusicIE
from .hytale import HytaleIE
@ -1037,10 +1042,7 @@ from .livestream import (
LivestreamShortenerIE,
)
from .livestreamfails import LivestreamfailsIE
from .lnkgo import (
LnkGoIE,
LnkIE,
)
from .lnk import LnkIE
from .loom import (
LoomFolderIE,
LoomIE,
@ -1811,6 +1813,7 @@ from .screen9 import Screen9IE
from .screencast import ScreencastIE
from .screencastify import ScreencastifyIE
from .screencastomatic import ScreencastOMaticIE
from .screenrec import ScreenRecIE
from .scrippsnetworks import (
ScrippsNetworksIE,
ScrippsNetworksWatchIE,
@ -1821,6 +1824,7 @@ from .scte import (
SCTECourseIE,
)
from .sejmpl import SejmIE
from .sen import SenIE
from .senalcolombia import SenalColombiaLiveIE
from .senategov import (
SenateGovIE,

View File

@ -1,3 +1,5 @@
import functools
import json
import random
import re
import time
@ -6,7 +8,9 @@ from .common import InfoExtractor
from ..utils import (
KNOWN_EXTENSIONS,
ExtractorError,
extract_attributes,
float_or_none,
get_element_html_by_id,
int_or_none,
parse_filesize,
str_or_none,
@ -17,6 +21,7 @@ from ..utils import (
url_or_none,
urljoin,
)
from ..utils.traversal import traverse_obj
class BandcampIE(InfoExtractor):
@ -459,7 +464,7 @@ class BandcampUserIE(InfoExtractor):
},
}, {
'url': 'https://coldworldofficial.bandcamp.com/music',
'playlist_mincount': 10,
'playlist_mincount': 7,
'info_dict': {
'id': 'coldworldofficial',
'title': 'Discography of coldworldofficial',
@ -473,12 +478,19 @@ class BandcampUserIE(InfoExtractor):
},
}]
def _yield_items(self, webpage):
yield from (
re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\'](?![^"\'/]*?/merch)([^"\']+)', webpage)
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
yield from traverse_obj(webpage, (
{functools.partial(get_element_html_by_id, 'music-grid')}, {extract_attributes},
'data-client-items', {json.loads}, ..., 'page_url', {str}))
def _real_extract(self, url):
uploader = self._match_id(url)
webpage = self._download_webpage(url, uploader)
discography_data = (re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\'](?![^"\'/]*?/merch)([^"\']+)', webpage)
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
return self.playlist_from_matches(
discography_data, uploader, f'Discography of {uploader}', getter=lambda x: urljoin(url, x))
self._yield_items(webpage), uploader, f'Discography of {uploader}',
getter=functools.partial(urljoin, url))

View File

@ -0,0 +1,68 @@
import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
parse_iso8601,
traverse_obj,
)
class BeaconTvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?beacon\.tv/content/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://beacon.tv/content/welcome-to-beacon',
'md5': 'b3f5932d437f288e662f10f3bfc5bd04',
'info_dict': {
'id': 'welcome-to-beacon',
'ext': 'mp4',
'upload_date': '20240509',
'description': 'md5:ea2bd32e71acf3f9fca6937412cc3563',
'thumbnail': 'https://cdn.jwplayer.com/v2/media/I4CkkEvN/poster.jpg?width=720',
'title': 'Your home for Critical Role!',
'timestamp': 1715227200,
'duration': 105.494,
},
}, {
'url': 'https://beacon.tv/content/re-slayers-take-trailer',
'md5': 'd879b091485dbed2245094c8152afd89',
'info_dict': {
'id': 're-slayers-take-trailer',
'ext': 'mp4',
'title': 'The Re-Slayers Take | Official Trailer',
'timestamp': 1715189040,
'upload_date': '20240508',
'duration': 53.249,
'thumbnail': 'https://cdn.jwplayer.com/v2/media/PW5ApIw3/poster.jpg?width=720',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
content_data = traverse_obj(self._search_nextjs_data(webpage, video_id), (
'props', 'pageProps', '__APOLLO_STATE__',
lambda k, v: k.startswith('Content:') and v['slug'] == video_id, any))
if not content_data:
raise ExtractorError('Failed to extract content data')
jwplayer_data = traverse_obj(content_data, (
(('contentVideo', 'video', 'videoData'),
('contentPodcast', 'podcast', 'audioData')), {json.loads}, {dict}, any))
if not jwplayer_data:
if content_data.get('contentType') not in ('videoPodcast', 'video', 'podcast'):
raise ExtractorError('Content is not a video/podcast', expected=True)
if traverse_obj(content_data, ('contentTier', '__ref')) != 'MemberTier:65b258d178f89be87b4dc0a4':
self.raise_login_required('This video/podcast is for members only')
raise ExtractorError('Failed to extract content')
return {
**self._parse_jwplayer_data(jwplayer_data, video_id),
**traverse_obj(content_data, {
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': ('publishedAt', {parse_iso8601}),
}),
}

View File

@ -1852,7 +1852,7 @@ class BiliBiliPlayerIE(InfoExtractor):
class BiliIntlBaseIE(InfoExtractor):
_API_URL = 'https://api.bilibili.tv/intl/gateway'
_NETRC_MACHINE = 'biliintl'
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
_HEADERS = {'Referer': 'https://www.bilibili.tv/'}
def _call_api(self, endpoint, *args, **kwargs):
json = self._download_json(self._API_URL + endpoint, *args, **kwargs)

View File

@ -35,6 +35,7 @@ from ..networking import HEADRequest, Request
from ..networking.exceptions import (
HTTPError,
IncompleteRead,
TransportError,
network_exceptions,
)
from ..networking.impersonate import ImpersonateTarget
@ -965,6 +966,9 @@ class InfoExtractor:
return False
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal,
encoding=encoding, data=data)
if content is False:
assert not fatal
return False
return (content, urlh)
@staticmethod
@ -1039,7 +1043,15 @@ class InfoExtractor:
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True,
prefix=None, encoding=None, data=None):
webpage_bytes = urlh.read()
try:
webpage_bytes = urlh.read()
except TransportError as err:
errmsg = f'{video_id}: Error reading response: {err.msg}'
if fatal:
raise ExtractorError(errmsg, cause=err)
self.report_warning(errmsg)
return False
if prefix is not None:
webpage_bytes = prefix + webpage_bytes
if self.get_param('dump_intermediate_pages', False):
@ -3489,7 +3501,7 @@ class InfoExtractor:
continue
urls.add(source_url)
source_type = source.get('type') or ''
ext = mimetype2ext(source_type) or determine_ext(source_url)
ext = determine_ext(source_url, default_ext=mimetype2ext(source_type))
if source_type == 'hls' or ext == 'm3u8' or 'format=m3u8-aapl' in source_url:
formats.extend(self._extract_m3u8_formats(
source_url, video_id, 'mp4', entry_protocol='m3u8_native',

View File

@ -0,0 +1,113 @@
import urllib.parse
from yt_dlp.utils import int_or_none
from ..networking import Request
from .youtube import YoutubeIE
class DigiviewIE(YoutubeIE):
IE_DESC = 'Digiview'
IE_NAME = 'digiview'
_VALID_URL = r'https?://(?:www\.)?ladigitale\.dev/digiview/#/v/(?P<id>[0-9a-f]+)'
_TESTS = [
{
# normal video
'url': 'https://ladigitale.dev/digiview/#/v/663e17b35e979',
'md5': 'acdf2c99c1e4d67664c9fbc5695986a9',
'info_dict': {
'id': 'BaW_jenozKc',
'ext': 'mp4',
'title': 'youtube-dl test video "\'/\\ä↭𝕐',
'channel': 'Philipp Hagemeister',
'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
'upload_date': '20121002',
'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
'categories': ['Science & Technology'],
'tags': ['youtube-dl'],
'duration': 10,
'view_count': int,
'like_count': int,
'availability': 'public',
'playable_in_embed': True,
'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
'live_status': 'not_live',
'age_limit': 0,
'comment_count': int,
'channel_follower_count': int,
'uploader': 'Philipp Hagemeister',
'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
'uploader_id': '@PhilippHagemeister',
'heatmap': 'count:100',
}
},
{
# cut video
'url': 'https://ladigitale.dev/digiview/#/v/663e17f2f3f18',
'md5': 'acdf2c99c1e4d67664c9fbc5695986a9',
'info_dict': {
'id': 'BaW_jenozKc',
'ext': 'mp4',
'title': 'youtube-dl test video "\'/\\ä↭𝕐',
'channel': 'Philipp Hagemeister',
'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
'upload_date': '20121002',
'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
'categories': ['Science & Technology'],
'tags': ['youtube-dl'],
'duration': 3,
'view_count': int,
'like_count': int,
'availability': 'public',
'playable_in_embed': True,
'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
'live_status': 'not_live',
'age_limit': 0,
'comment_count': int,
'channel_follower_count': int,
'uploader': 'Philipp Hagemeister',
'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
'uploader_id': '@PhilippHagemeister',
'heatmap': 'count:100',
}
},
]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage_data = self._download_json(
Request(
'https://ladigitale.dev/digiview/inc/recuperer_video.php',
data=urllib.parse.urlencode({'id': video_id}).encode(),
method='POST',
),
video_id,
)
youtube_ie = YoutubeIE()
youtube_ie.set_downloader(self._downloader)
info = youtube_ie._real_extract(webpage_data['videoId'])
# replace the YouTube metadata by the Digiview one
info['title'] = webpage_data.get('titre') or info['title']
info['description'] = webpage_data.get('description') or info['description']
ffmpeg_args = []
start_time = int_or_none(webpage_data.get('debut'))
if start_time is not None and start_time != 0:
ffmpeg_args.extend(['-ss', str(start_time)])
end_time = int_or_none(webpage_data.get('fin'))
if end_time is not None and end_time != info['duration']:
ffmpeg_args.extend(['-t', str(end_time - (start_time or 0))])
if ffmpeg_args and self._downloader:
# cut the video if specified in the Digiview webpage
ppargs = self._downloader.params.get("postprocessor_args")
ppargs.setdefault("merger", []).extend(ffmpeg_args)
return info

View File

@ -319,32 +319,6 @@ class DPlayIE(DPlayBaseIE):
url, display_id, host, 'dplay' + country, country, domain)
class HGTVDeIE(DPlayBaseIE):
_VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
'url': 'https://de.hgtv.com/sendungen/tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette/',
'info_dict': {
'id': '151205',
'display_id': 'tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette',
'ext': 'mp4',
'title': 'Wer braucht schon eine Toilette',
'description': 'md5:05b40a27e7aed2c9172de34d459134e2',
'duration': 1177.024,
'timestamp': 1595705400,
'upload_date': '20200725',
'creator': 'HGTV',
'series': 'Tiny House - klein, aber oho',
'season_number': 3,
'episode_number': 3,
},
}]
def _real_extract(self, url):
display_id = self._match_id(url)
return self._get_disco_api_info(
url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de')
class DiscoveryPlusBaseIE(DPlayBaseIE):
"""Subclasses must set _PRODUCT, _DISCO_API_PARAMS"""
@ -373,6 +347,45 @@ class DiscoveryPlusBaseIE(DPlayBaseIE):
return self._get_disco_api_info(url, self._match_id(url), **self._DISCO_API_PARAMS)
class HGTVDeIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
'url': 'https://de.hgtv.com/sendungen/mein-kleinstadt-traumhaus/vom-landleben-ins-loft',
'info_dict': {
'id': '7332936',
'ext': 'mp4',
'display_id': 'mein-kleinstadt-traumhaus/vom-landleben-ins-loft',
'title': 'Vom Landleben ins Loft',
'description': 'md5:e5f72c02c853970796dd3818f2e25745',
'episode': 'Episode 7',
'episode_number': 7,
'season': 'Season 7',
'season_number': 7,
'series': 'Mein Kleinstadt-Traumhaus',
'duration': 2645.0,
'timestamp': 1725998100,
'upload_date': '20240910',
'creators': ['HGTV'],
'tags': [],
'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/08/09/82a386b9-c688-32c7-b9ff-0b13865f0bae.jpeg',
},
}]
_PRODUCT = 'hgtv'
_DISCO_API_PARAMS = {
'disco_host': 'eu1-prod.disco-api.com',
'realm': 'hgtv',
'country': 'de',
}
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
headers.update({
'x-disco-params': f'realm={realm}',
'x-disco-client': 'Alps:HyogaPlayer:0.0.0',
'Authorization': self._get_auth(disco_base, display_id, realm),
})
class GoDiscoveryIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:go\.)?discovery\.com/video' + DPlayBaseIE._PATH_REGEX
_TESTS = [{

View File

@ -294,37 +294,37 @@ class ESPNCricInfoIE(InfoExtractor):
class WatchESPNIE(AdobePassIE):
_VALID_URL = r'https?://(?:www\.)?espn\.com/(?:watch|espnplus)/player/_/id/(?P<id>[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})'
_TESTS = [{
'url': 'https://www.espn.com/watch/player/_/id/dbbc6b1d-c084-4b47-9878-5f13c56ce309',
'url': 'https://www.espn.com/watch/player/_/id/11ce417a-6ac9-42b6-8a15-46aeb9ad5710',
'info_dict': {
'id': 'dbbc6b1d-c084-4b47-9878-5f13c56ce309',
'id': '11ce417a-6ac9-42b6-8a15-46aeb9ad5710',
'ext': 'mp4',
'title': 'Huddersfield vs. Burnley',
'duration': 7500,
'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/dbbc6b1d-c084-4b47-9878-5f13c56ce309/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
'title': 'Abilene Chrstn vs. Texas Tech',
'duration': 14166,
'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/11ce417a-6ac9-42b6-8a15-46aeb9ad5710/16x9.jpg?timestamp=202407252343&showBadge=true&cb=12&package=ESPN_PLUS',
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.espn.com/watch/player/_/id/a049a56e-a7ce-477e-aef3-c7e48ef8221c',
'url': 'https://www.espn.com/watch/player/_/id/90a2c85d-75e0-4b1e-a878-8e428a3cb2f3',
'info_dict': {
'id': 'a049a56e-a7ce-477e-aef3-c7e48ef8221c',
'id': '90a2c85d-75e0-4b1e-a878-8e428a3cb2f3',
'ext': 'mp4',
'title': 'Dynamo Dresden vs. VfB Stuttgart (Round #1) (German Cup)',
'duration': 8335,
'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/bd1f3d12-0654-47d9-852e-71b85ea695c7/16x9.jpg?timestamp=202201112217&showBadge=true&cb=12&package=ESPN_PLUS',
'title': 'UC Davis vs. California',
'duration': 9547,
'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/90a2c85d-75e0-4b1e-a878-8e428a3cb2f3/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.espn.com/espnplus/player/_/id/317f5fd1-c78a-4ebe-824a-129e0d348421',
'url': 'https://www.espn.com/watch/player/_/id/c4313bbe-95b5-4bb8-b251-ac143ea0fc54',
'info_dict': {
'id': '317f5fd1-c78a-4ebe-824a-129e0d348421',
'id': 'c4313bbe-95b5-4bb8-b251-ac143ea0fc54',
'ext': 'mp4',
'title': 'The Wheel - Episode 10',
'duration': 3352,
'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/317f5fd1-c78a-4ebe-824a-129e0d348421/16x9.jpg?timestamp=202205031523&showBadge=true&cb=12&package=ESPN_PLUS',
'title': 'The College Football Show',
'duration': 3639,
'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/c4313bbe-95b5-4bb8-b251-ac143ea0fc54/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
},
'params': {
'skip_download': True,
@ -353,6 +353,13 @@ class WatchESPNIE(AdobePassIE):
if not cookie:
self.raise_login_required(method='cookies')
jwt = self._search_regex(r'=([^|]+)\|', cookie.value, 'cookie jwt')
id_token = self._download_json(
'https://registerdisney.go.com/jgc/v6/client/ESPN-ONESITE.WEB-PROD/guest/refresh-auth',
None, 'Refreshing token', headers={'Content-Type': 'application/json'}, data=json.dumps({
'refreshToken': json.loads(base64.urlsafe_b64decode(f'{jwt}==='))['refresh_token'],
}).encode())['data']['token']['id_token']
assertion = self._call_bamgrid_api(
'devices', video_id,
headers={'Content-Type': 'application/json; charset=UTF-8'},
@ -371,7 +378,7 @@ class WatchESPNIE(AdobePassIE):
})['access_token']
assertion = self._call_bamgrid_api(
'accounts/grant', video_id, payload={'id_token': cookie.value.split('|')[1]},
'accounts/grant', video_id, payload={'id_token': id_token},
headers={
'Authorization': token,
'Content-Type': 'application/json; charset=UTF-8',

View File

@ -84,7 +84,7 @@ class FacebookIE(InfoExtractor):
'timestamp': 1692346159,
'thumbnail': r're:^https?://.*',
'uploader_id': '100063551323670',
'duration': 3132.184,
'duration': 3133.583,
'view_count': int,
'concurrent_view_count': 0,
},
@ -112,9 +112,10 @@ class FacebookIE(InfoExtractor):
'upload_date': '20140506',
'timestamp': 1399398998,
'thumbnail': r're:^https?://.*',
'uploader_id': 'pfbid028wxorhX2ErLFJ578N6P3crHD3PHmXTCqCvfBpsnbSLmbokwSY75p5hWBjHGkG4zxl',
'uploader_id': 'pfbid05AzrFTXgY37tqwaSgbFTTEpCLBjjEJHkigogwGiRPtKEpAsJYJpzE94H1RxYXWEtl',
'duration': 131.03,
'concurrent_view_count': int,
'view_count': int,
},
}, {
'note': 'Video with DASH manifest',
@ -167,7 +168,7 @@ class FacebookIE(InfoExtractor):
# have 1080P, but only up to 720p in swf params
# data.video.story.attachments[].media
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
'md5': 'ca63897a90c9452efee5f8c40d080e25',
'md5': '1659aa21fb3dd1585874f668e81a72c8',
'info_dict': {
'id': '10155529876156509',
'ext': 'mp4',
@ -180,9 +181,10 @@ class FacebookIE(InfoExtractor):
'view_count': int,
'uploader_id': '100059479812265',
'concurrent_view_count': int,
'duration': 44.478,
'duration': 44.181,
},
}, {
# FIXME: unable to extract uploader, no formats found
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
@ -241,9 +243,9 @@ class FacebookIE(InfoExtractor):
'timestamp': 1511548260,
'upload_date': '20171124',
'uploader': 'Vickie Gentry',
'uploader_id': 'pfbid0FuZhHCeWDAxWxEbr3yKPFaRstXvRxgsp9uCPG6GjD4J2AitB35NUAuJ4Q75KcjiDl',
'uploader_id': 'pfbid0FkkycT95ySNNyfCw4Cho6u5G7WbbZEcxT496Hq8rtx1K3LcTCATpR3wnyYhmyGC5l',
'thumbnail': r're:^https?://.*',
'duration': 148.435,
'duration': 148.224,
},
}, {
# data.node.comet_sections.content.story.attachments[].styles.attachment.media
@ -271,7 +273,7 @@ class FacebookIE(InfoExtractor):
'description': 'Today Makkovik\'s own Pilot Mandy Smith made her inaugural landing on the airstrip in her hometown. What a proud moment as we all cheered and...',
'thumbnail': r're:^https?://.*',
'uploader': 'Lela Evans',
'uploader_id': 'pfbid0shZJipuigyy5mqrUJn9ub5LJFWNHvan5prtyi3LrDuuuJ4NwrURgnQHYR9fywBepl',
'uploader_id': 'pfbid0swT2y7t6TAsZVBvcyeYPdhTMefGaS26mzUwML3vd1ma6ndGZKxsyS4Ssu3jitZLXl',
'upload_date': '20231228',
'timestamp': 1703804085,
'duration': 394.347,
@ -322,7 +324,7 @@ class FacebookIE(InfoExtractor):
'upload_date': '20180523',
'uploader': 'ESL One Dota 2',
'uploader_id': '100066514874195',
'duration': 4524.212,
'duration': 4524.001,
'view_count': int,
'thumbnail': r're:^https?://.*',
'concurrent_view_count': int,
@ -339,9 +341,9 @@ class FacebookIE(InfoExtractor):
'title': 'Josef',
'thumbnail': r're:^https?://.*',
'concurrent_view_count': int,
'uploader_id': 'pfbid0cibUN6tV7DYgdbJdsUFN46wc4jKpVSPAvJQhFofGqBGmVn3V3JtAs2tfUwziw2hUl',
'uploader_id': 'pfbid02gpfwRM2XvdEJfsERupwQiNmBiDArc38RMRYZnap372q6Vs7MtFTVy72mmFWpJBTKl',
'timestamp': 1549275572,
'duration': 3.413,
'duration': 3.283,
'uploader': 'Josef Novak',
'description': '',
'upload_date': '20190204',
@ -396,6 +398,7 @@ class FacebookIE(InfoExtractor):
'playlist_count': 1,
'skip': 'Requires logging in',
}, {
# FIXME: Cannot parse data error
# data.event.cover_media_renderer.cover_video
'url': 'https://m.facebook.com/events/1509582499515440',
'info_dict': {
@ -498,7 +501,8 @@ class FacebookIE(InfoExtractor):
or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', lambda k, v: k == 'owner' and v['name']))
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
or get_first(post, ('node', 'actors', ..., {dict}))
or get_first(post, ('event', 'event_creator', {dict})) or {})
or get_first(post, ('event', 'event_creator', {dict}))
or get_first(post, ('video', 'creation_story', 'short_form_video_context', 'video_owner', {dict})) or {})
uploader = uploader_data.get('name') or (
clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
or self._search_regex(
@ -524,6 +528,11 @@ class FacebookIE(InfoExtractor):
webpage, 'view count', default=None)),
'concurrent_view_count': get_first(post, (
('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})),
**traverse_obj(post, (lambda _, v: video_id in v['url'], 'feedback', {
'like_count': ('likers', 'count', {int}),
'comment_count': ('total_comment_count', {int}),
'repost_count': ('share_count_reduced', {parse_count}),
}), get_all=False),
}
info_json_ld = self._search_json_ld(webpage, video_id, default={})
@ -932,18 +941,21 @@ class FacebookReelIE(InfoExtractor):
_TESTS = [{
'url': 'https://www.facebook.com/reel/1195289147628387',
'md5': 'f13dd37f2633595982db5ed8765474d3',
'md5': 'a53256d10fc2105441fe0c4212ed8cea',
'info_dict': {
'id': '1195289147628387',
'ext': 'mp4',
'title': 'md5:b05800b5b1ad56c0ca78bd3807b6a61e',
'description': 'md5:22f03309b216ac84720183961441d8db',
'uploader': 'md5:723e6cb3091241160f20b3c5dc282af1',
'title': r're:9\.6K views · 355 reactions .+ Let the “Slapathon” commence!! .+ LL COOL J · Mama Said Knock You Out$',
'description': r're:When your trying to help your partner .+ LL COOL J · Mama Said Knock You Out$',
'uploader': 'Beast Camp Training',
'uploader_id': '100040874179269',
'duration': 9.579,
'timestamp': 1637502609,
'upload_date': '20211121',
'thumbnail': r're:^https?://.*',
'like_count': int,
'comment_count': int,
'repost_count': int,
},
}]

View File

@ -8,15 +8,19 @@ from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
parse_duration,
str_or_none,
try_get,
unescapeHTML,
unified_strdate,
update_url_query,
url_or_none,
)
from ..utils.traversal import traverse_obj
class HuyaLiveIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?P<id>[^/#?&]+)(?:\D|$)'
_VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?!(?:video/play/))(?P<id>[^/#?&]+)(?:\D|$)'
IE_NAME = 'huya:live'
IE_DESC = 'huya.com'
TESTS = [{
@ -24,6 +28,7 @@ class HuyaLiveIE(InfoExtractor):
'info_dict': {
'id': '572329',
'title': str,
'ext': 'flv',
'description': str,
'is_live': True,
'view_count': int,
@ -131,3 +136,76 @@ class HuyaLiveIE(InfoExtractor):
fm = base64.b64decode(params['fm']).decode().split('_', 1)[0]
ss = hashlib.md5('|'.join([params['seqid'], params['ctype'], params['t']]))
return fm, ss
class HuyaVideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?huya\.com/video/play/(?P<id>\d+)\.html'
IE_NAME = 'huya:video'
IE_DESC = '虎牙视频'
_TESTS = [{
'url': 'https://www.huya.com/video/play/1002412640.html',
'info_dict': {
'id': '1002412640',
'ext': 'mp4',
'title': '8月3日',
'thumbnail': r're:https?://.*\.jpg',
'duration': 14,
'uploader': '虎牙-ATS欧卡车队青木',
'uploader_id': '1564376151',
'upload_date': '20240803',
'view_count': int,
'comment_count': int,
'like_count': int,
},
},
{
'url': 'https://www.huya.com/video/play/556054543.html',
'info_dict': {
'id': '556054543',
'ext': 'mp4',
'title': '我不挑事 也不怕事',
'thumbnail': r're:https?://.*\.jpg',
'duration': 1864,
'uploader': '卡尔',
'uploader_id': '367138632',
'upload_date': '20210811',
'view_count': int,
'comment_count': int,
'like_count': int,
},
}]
def _real_extract(self, url: str):
video_id = self._match_id(url)
video_data = self._download_json(
'https://liveapi.huya.com/moment/getMomentContent', video_id,
query={'videoId': video_id})['data']['moment']['videoInfo']
formats = []
for definition in traverse_obj(video_data, ('definitions', lambda _, v: url_or_none(v['url']))):
formats.append({
'url': definition['url'],
**traverse_obj(definition, {
'format_id': ('defName', {str}),
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
'filesize': ('size', {int_or_none}),
}),
})
return {
'id': video_id,
'formats': formats,
**traverse_obj(video_data, {
'title': ('videoTitle', {str}),
'thumbnail': ('videoCover', {url_or_none}),
'duration': ('videoDuration', {parse_duration}),
'uploader': ('nickName', {str}),
'uploader_id': ('uid', {str_or_none}),
'upload_date': ('videoUploadTime', {unified_strdate}),
'view_count': ('videoPlayNum', {int_or_none}),
'comment_count': ('videoCommentNum', {int_or_none}),
'like_count': ('favorCount', {int_or_none}),
}),
}

View File

@ -25,9 +25,29 @@ class IPrimaIE(InfoExtractor):
'id': 'p51388',
'ext': 'mp4',
'title': 'Partička (92)',
'description': 'md5:859d53beae4609e6dd7796413f1b6cac',
'upload_date': '20201103',
'timestamp': 1604437480,
'description': 'md5:57943f6a50d6188288c3a579d2fd5f01',
'episode': 'Partička (92)',
'season': 'Partička',
'series': 'Prima Partička',
'episode_number': 92,
'thumbnail': 'https://d31b9s05ygj54s.cloudfront.net/prima-plus/image/video-ef6cf9de-c980-4443-92e4-17fe8bccd45c-16x9.jpeg',
},
'params': {
'skip_download': True, # m3u8 download
},
}, {
'url': 'https://zoom.iprima.cz/porady/krasy-kanarskych-ostrovu/tenerife-v-risi-ohne',
'info_dict': {
'id': 'p1412199',
'ext': 'mp4',
'episode_number': 3,
'episode': 'Tenerife: V říši ohně',
'description': 'md5:4b4a05c574b5eaef130e68d4811c3f2c',
'duration': 3111.0,
'thumbnail': 'https://d31b9s05ygj54s.cloudfront.net/prima-plus/image/video-f66dd7fb-c1a0-47d1-b3bc-7db328d566c5-16x9-1711636518.jpg/t_16x9_medium_1366_768',
'title': 'Tenerife: V říši ohně',
'timestamp': 1711825800,
'upload_date': '20240330',
},
'params': {
'skip_download': True, # m3u8 download
@ -131,6 +151,7 @@ class IPrimaIE(InfoExtractor):
video_id = self._search_regex((
r'productId\s*=\s*([\'"])(?P<id>p\d+)\1',
r'pproduct_id\s*=\s*([\'"])(?P<id>p\d+)\1',
r'let\s+videos\s*=\s*([\'"])(?P<id>p\d+)\1',
), webpage, 'real id', group='id', default=None)
if not video_id:
@ -176,7 +197,7 @@ class IPrimaIE(InfoExtractor):
final_result = self._search_json_ld(webpage, video_id, default={})
final_result.update({
'id': video_id,
'title': title,
'title': final_result.get('title') or title,
'thumbnail': self._html_search_meta(
['thumbnail', 'og:image', 'twitter:image'],
webpage, 'thumbnail', default=None),

View File

@ -67,7 +67,7 @@ class KickIE(KickBaseIE):
@classmethod
def suitable(cls, url):
return False if KickClipIE.suitable(url) else super().suitable(url)
return False if (KickVODIE.suitable(url) or KickClipIE.suitable(url)) else super().suitable(url)
def _real_extract(self, url):
channel = self._match_id(url)
@ -98,25 +98,25 @@ class KickIE(KickBaseIE):
class KickVODIE(KickBaseIE):
IE_NAME = 'kick:vod'
_VALID_URL = r'https?://(?:www\.)?kick\.com/video/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
_VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+/videos/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
_TESTS = [{
'url': 'https://kick.com/video/e74614f4-5270-4319-90ad-32179f19a45c',
'url': 'https://kick.com/xqc/videos/8dd97a8d-e17f-48fb-8bc3-565f88dbc9ea',
'md5': '3870f94153e40e7121a6e46c068b70cb',
'info_dict': {
'id': 'e74614f4-5270-4319-90ad-32179f19a45c',
'id': '8dd97a8d-e17f-48fb-8bc3-565f88dbc9ea',
'ext': 'mp4',
'title': r're:❎ MEGA DRAMA ❎ LIVE ❎ CLICK ❎ ULTIMATE SKILLS .+',
'title': '18+ #ad 🛑LIVE🛑CLICK🛑DRAMA🛑NEWS🛑STUFF🛑REACT🛑GET IN HHERE🛑BOP BOP🛑WEEEE WOOOO🛑',
'description': 'THE BEST AT ABSOLUTELY EVERYTHING. THE JUICER. LEADER OF THE JUICERS.',
'channel': 'xqc',
'channel_id': '668',
'uploader': 'xQc',
'uploader_id': '676',
'upload_date': '20240724',
'timestamp': 1721796562,
'duration': 18566.0,
'upload_date': '20240909',
'timestamp': 1725919141,
'duration': 10155.0,
'thumbnail': r're:^https?://.*\.jpg',
'view_count': int,
'categories': ['VALORANT'],
'categories': ['Just Chatting'],
'age_limit': 0,
},
'params': {'skip_download': 'm3u8'},

View File

@ -1,86 +1,11 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
format_field,
int_or_none,
parse_iso8601,
unified_strdate,
)
class LnkGoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?lnk(?:go)?\.(?:alfa\.)?lt/(?:visi-video/[^/]+|video)/(?P<id>[A-Za-z0-9-]+)(?:/(?P<episode_id>\d+))?'
_TESTS = [{
'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai',
'info_dict': {
'id': '10809',
'ext': 'mp4',
'title': "Put'ka: Trys Klausimai",
'upload_date': '20161216',
'description': 'Seniai matytas Putka užduoda tris klausimėlius. Pabandykime surasti atsakymus.',
'age_limit': 18,
'duration': 117,
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1481904000,
},
'params': {
'skip_download': True, # HLS download
},
}, {
'url': 'http://lnkgo.alfa.lt/visi-video/aktualai-pratesimas/ziurek-nerdas-taiso-kompiuteri-2',
'info_dict': {
'id': '10467',
'ext': 'mp4',
'title': 'Nėrdas: Kompiuterio Valymas',
'upload_date': '20150113',
'description': 'md5:7352d113a242a808676ff17e69db6a69',
'age_limit': 18,
'duration': 346,
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1421164800,
},
'params': {
'skip_download': True, # HLS download
},
}, {
'url': 'https://lnk.lt/video/neigalieji-tv-bokste/37413',
'only_matching': True,
}]
_AGE_LIMITS = {
'N-7': 7,
'N-14': 14,
'S': 18,
}
_M3U8_TEMPL = 'https://vod.lnk.lt/lnk_vod/lnk/lnk/%s:%s/playlist.m3u8%s'
def _real_extract(self, url):
display_id, video_id = self._match_valid_url(url).groups()
video_info = self._download_json(
'https://lnk.lt/api/main/video-page/{}/{}/false'.format(display_id, video_id or '0'),
display_id)['videoConfig']['videoInfo']
video_id = str(video_info['id'])
title = video_info['title']
prefix = 'smil' if video_info.get('isQualityChangeAvailable') else 'mp4'
formats = self._extract_m3u8_formats(
self._M3U8_TEMPL % (prefix, video_info['videoUrl'], video_info.get('secureTokenParams') or ''),
video_id, 'mp4', 'm3u8_native')
return {
'id': video_id,
'display_id': display_id,
'title': title,
'formats': formats,
'thumbnail': format_field(video_info, 'posterImage', 'https://lnk.lt/all-images/%s'),
'duration': int_or_none(video_info.get('duration')),
'description': clean_html(video_info.get('htmlDescription')),
'age_limit': self._AGE_LIMITS.get(video_info.get('pgRating'), 0),
'timestamp': parse_iso8601(video_info.get('airDate')),
'view_count': int_or_none(video_info.get('viewsCount')),
}
class LnkIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?lnk\.lt/[^/]+/(?P<id>\d+)'

View File

@ -109,7 +109,7 @@ class PinterestBaseIE(InfoExtractor):
class PinterestIE(PinterestBaseIE):
_VALID_URL = rf'{PinterestBaseIE._VALID_URL_BASE}/pin/(?P<id>\d+)'
_VALID_URL = rf'{PinterestBaseIE._VALID_URL_BASE}/pin/(?:[\w-]+--)?(?P<id>\d+)'
_TESTS = [{
# formats found in data['videos']
'url': 'https://www.pinterest.com/pin/664281013778109217/',
@ -174,6 +174,25 @@ class PinterestIE(PinterestBaseIE):
}, {
'url': 'https://co.pinterest.com/pin/824721750502199491/',
'only_matching': True,
},
{
'url': 'https://pinterest.com/pin/dive-into-serenity-blue-lagoon-pedi-nails-for-a-tranquil-and-refreshing-spa-experience-video-in-2024--2885187256207927',
'info_dict': {
'id': '2885187256207927',
'ext': 'mp4',
'title': 'Dive into Serenity: Blue Lagoon Pedi Nails for a Tranquil and Refreshing Spa Experience! 💙💅',
'description': 'md5:5da41c767d2317e42e49b663b0b2150f',
'uploader': 'Glamour Artistry |Everyday Outfits, Luxury Fashion & Nail Designs',
'uploader_id': '1142999717836434688',
'upload_date': '20240702',
'timestamp': 1719939156,
'duration': 7.967,
'comment_count': int,
'repost_count': int,
'categories': 'count:9',
'tags': ['#BlueLagoonPediNails', '#SpaExperience'],
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
},
}]
def _real_extract(self, url):

View File

@ -8,7 +8,7 @@ from ..utils import js_to_json
class RTPIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?'
_VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/(?:(?:estudoemcasa|palco|zigzag)/)?p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas',
'md5': 'e736ce0c665e459ddb818546220b4ef8',
@ -19,9 +19,25 @@ class RTPIE(InfoExtractor):
'description': 'As paixões musicais de António Cartaxo e António Macedo',
'thumbnail': r're:^https?://.*\.jpg',
},
}, {
'url': 'https://www.rtp.pt/play/zigzag/p13166/e757904/25-curiosidades-25-de-abril',
'md5': '9a81ed53f2b2197cfa7ed455b12f8ade',
'info_dict': {
'id': 'e757904',
'ext': 'mp4',
'title': '25 Curiosidades, 25 de Abril',
'description': 'Estudar ou não estudar - Em cada um dos episódios descobrimos uma curiosidade acerca de como era viver em Portugal antes da revolução do 25 de abr',
'thumbnail': r're:^https?://.*\.jpg',
},
}, {
'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
'only_matching': True,
}, {
'url': 'https://www.rtp.pt/play/estudoemcasa/p7776/portugues-1-ano',
'only_matching': True,
}, {
'url': 'https://www.rtp.pt/play/palco/p13785/l7nnon',
'only_matching': True,
}]
_RX_OBFUSCATION = re.compile(r'''(?xs)
@ -49,17 +65,17 @@ class RTPIE(InfoExtractor):
f, config = self._search_regex(
r'''(?sx)
var\s+f\s*=\s*(?P<f>".*?"|{[^;]+?});\s*
(?:var\s+f\s*=\s*(?P<f>".*?"|{[^;]+?});\s*)?
var\s+player1\s+=\s+new\s+RTPPlayer\s*\((?P<config>{(?:(?!\*/).)+?})\);(?!\s*\*/)
''', webpage,
'player config', group=('f', 'config'))
f = self._parse_json(
f, video_id,
lambda data: self.__unobfuscate(data, video_id=video_id))
config = self._parse_json(
config, video_id,
lambda data: self.__unobfuscate(data, video_id=video_id))
f = config['file'] if not f else self._parse_json(
f, video_id,
lambda data: self.__unobfuscate(data, video_id=video_id))
formats = []
if isinstance(f, dict):

View File

@ -0,0 +1,33 @@
from .common import InfoExtractor
class ScreenRecIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?screenrec\.com/share/(?P<id>\w{10})'
_TESTS = [{
'url': 'https://screenrec.com/share/DasLtbknYo',
'info_dict': {
'id': 'DasLtbknYo',
'ext': 'mp4',
'title': '02.05.2024_03.01.25_REC',
'description': 'Recorded with ScreenRec',
'thumbnail': r're:^https?://.*\.gif$',
},
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
m3u8_url = self._search_regex(
r'customUrl\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'm3u8 URL', group='url')
return {
'id': video_id,
'title': self._og_search_title(webpage, default=None) or self._html_extract_title(webpage),
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4'),
}

36
yt_dlp/extractor/sen.py Normal file
View File

@ -0,0 +1,36 @@
from .common import InfoExtractor
from ..utils import url_or_none
from ..utils.traversal import traverse_obj
class SenIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?sen\.com/video/(?P<id>[0-9a-f-]+)'
_TEST = {
'url': 'https://www.sen.com/video/eef46eb1-4d79-4e28-be9d-bd937767f8c4',
'md5': 'ff615aca9691053c94f8f10d96cd7884',
'info_dict': {
'id': 'eef46eb1-4d79-4e28-be9d-bd937767f8c4',
'ext': 'mp4',
'description': 'Florida, 28 Sep 2022',
'title': 'Hurricane Ian',
'tags': ['North America', 'Storm', 'Weather'],
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
api_data = self._download_json(f'https://api.sen.com/content/public/video/{video_id}', video_id)
m3u8_url = (traverse_obj(api_data, (
'data', 'nodes', lambda _, v: v['id'] == 'player', 'video', 'url', {url_or_none}, any))
or f'https://vod.sen.com/videos/{video_id}/manifest.m3u8')
return {
'id': video_id,
'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4'),
**traverse_obj(api_data, ('data', 'nodes', lambda _, v: v['id'] == 'details', any, 'content', {
'title': ('title', 'text', {str}),
'description': ('descriptions', 0, 'text', {str}),
'tags': ('badges', ..., 'text', {str}),
})),
}

View File

@ -27,7 +27,7 @@ class ServusIE(InfoExtractor):
'info_dict': {
'id': 'AA-28BYCQNH92111',
'ext': 'mp4',
'title': 'Klettersteige in den Alpen',
'title': 'Vie Ferrate - Klettersteige in den Alpen',
'description': 'md5:25e47ddd83a009a0f9789ba18f2850ce',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 2823,
@ -38,6 +38,7 @@ class ServusIE(InfoExtractor):
'season_number': 11,
'episode': 'Episode 8 - Vie Ferrate Klettersteige in den Alpen',
'episode_number': 8,
'categories': ['Bergwelten'],
},
'params': {'skip_download': 'm3u8'},
}, {
@ -71,8 +72,11 @@ class ServusIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url).upper()
webpage = self._download_webpage(url, video_id)
next_data = self._search_nextjs_data(webpage, video_id, fatal=False)
video = self._download_json(
'https://api-player.redbull.com/stv/servus-tv?timeZone=Europe/Berlin',
'https://api-player.redbull.com/stv/servus-tv-playnet',
video_id, 'Downloading video JSON', query={'videoId': video_id})
if not video.get('videoUrl'):
self._report_errors(video)
@ -89,7 +93,7 @@ class ServusIE(InfoExtractor):
return {
'id': video_id,
'title': video.get('title'),
'description': self._get_description(video_id) or video.get('description'),
'description': self._get_description(next_data) or video.get('description'),
'thumbnail': video.get('poster'),
'duration': float_or_none(video.get('duration')),
'timestamp': unified_timestamp(video.get('currentSunrise')),
@ -100,16 +104,19 @@ class ServusIE(InfoExtractor):
'episode_number': episode_number,
'formats': formats,
'subtitles': subtitles,
**traverse_obj(next_data, ('props', 'pageProps', 'data', {
'title': ('title', 'rendered', {str}),
'timestamp': ('stv_date', 'raw', {int}),
'duration': ('stv_duration', {float_or_none}),
'categories': ('category_names', ..., {str}),
})),
}
def _get_description(self, video_id):
info = self._download_json(
f'https://backend.servustv.com/wp-json/rbmh/v2/media_asset/aa_id/{video_id}?fieldset=page',
video_id, fatal=False)
return join_nonempty(*traverse_obj(info, (
('stv_short_description', 'stv_long_description'),
{lambda x: unescapeHTML(x.replace('\n\n', '\n'))})), delim='\n\n')
def _get_description(self, next_data):
return join_nonempty(*traverse_obj(next_data, (
'props', 'pageProps', 'data',
('stv_short_description', 'stv_long_description'), {str},
{lambda x: x.replace('\n\n', '\n')}, {unescapeHTML})), delim='\n\n')
def _report_errors(self, video):
playability_errors = traverse_obj(video, ('playabilityErrors', ...))

View File

@ -1,33 +1,31 @@
import base64
import datetime as dt
import functools
import itertools
from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import int_or_none, traverse_obj, urlencode_postdata, urljoin
from ..utils import int_or_none, traverse_obj, url_or_none, urljoin
class TenPlayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?:[^/]+/)+(?P<id>tpv\d{6}[a-z]{5})'
_NETRC_MACHINE = '10play'
_TESTS = [{
'url': 'https://10play.com.au/neighbours/web-extras/season-39/nathan-borg-is-the-first-aussie-actor-with-a-cochlear-implant-to-join-neighbours/tpv210128qupwd',
'url': 'https://10play.com.au/neighbours/web-extras/season-41/heres-a-first-look-at-mischa-bartons-neighbours-debut/tpv230911hyxnz',
'info_dict': {
'id': '6226844312001',
'id': '6336940246112',
'ext': 'mp4',
'title': 'Nathan Borg Is The First Aussie Actor With A Cochlear Implant To Join Neighbours',
'alt_title': 'Nathan Borg Is The First Aussie Actor With A Cochlear Implant To Join Neighbours',
'description': 'md5:a02d0199c901c2dd4c796f1e7dd0de43',
'duration': 186,
'season': 'Season 39',
'season_number': 39,
'title': 'Here\'s A First Look At Mischa Barton\'s Neighbours Debut',
'alt_title': 'Here\'s A First Look At Mischa Barton\'s Neighbours Debut',
'description': 'Neighbours Premieres Monday, September 18 At 4:30pm On 10 And 10 Play And 6:30pm On 10 Peach',
'duration': 74,
'season': 'Season 41',
'season_number': 41,
'series': 'Neighbours',
'thumbnail': r're:https://.*\.jpg',
'uploader': 'Channel 10',
'age_limit': 15,
'timestamp': 1611810000,
'upload_date': '20210128',
'timestamp': 1694386800,
'upload_date': '20230910',
'uploader_id': '2199827728001',
},
'params': {
@ -35,21 +33,30 @@ class TenPlayIE(InfoExtractor):
},
'skip': 'Only available in Australia',
}, {
'url': 'https://10play.com.au/todd-sampsons-body-hack/episodes/season-4/episode-7/tpv200921kvngh',
'url': 'https://10play.com.au/neighbours/episodes/season-42/episode-9107/tpv240902nzqyp',
'info_dict': {
'id': '6192880312001',
'id': '9000000000091177',
'ext': 'mp4',
'title': "Todd Sampson's Body Hack - S4 Ep. 2",
'description': 'md5:fa278820ad90f08ea187f9458316ac74',
'title': 'Neighbours - S42 Ep. 9107',
'alt_title': 'Thu 05 Sep',
'description': 'md5:37a1f4271be34b9ee2b533426a5fbaef',
'duration': 1388,
'episode': 'Episode 9107',
'episode_number': 9107,
'season': 'Season 42',
'season_number': 42,
'series': 'Neighbours',
'thumbnail': r're:https://.*\.jpg',
'age_limit': 15,
'timestamp': 1600770600,
'upload_date': '20200922',
'timestamp': 1725517860,
'upload_date': '20240905',
'uploader': 'Channel 10',
'uploader_id': '2199827728001',
},
'params': {
'skip_download': True,
},
'skip': 'Only available in Australia',
}, {
'url': 'https://10play.com.au/how-to-stay-married/web-extras/season-1/terrys-talks-ep-1-embracing-change/tpv190915ylupc',
'only_matching': True,
@ -66,55 +73,42 @@ class TenPlayIE(InfoExtractor):
'X': 18,
}
def _get_bearer_token(self, video_id):
username, password = self._get_login_info()
if username is None or password is None:
self.raise_login_required('Your 10play account\'s details must be provided with --username and --password.')
_timestamp = dt.datetime.now().strftime('%Y%m%d000000')
_auth_header = base64.b64encode(_timestamp.encode('ascii')).decode('ascii')
data = self._download_json('https://10play.com.au/api/user/auth', video_id, 'Getting bearer token', headers={
'X-Network-Ten-Auth': _auth_header,
}, data=urlencode_postdata({
'email': username,
'password': password,
}))
return 'Bearer ' + data['jwt']['accessToken']
def _real_extract(self, url):
content_id = self._match_id(url)
data = self._download_json(
'https://10play.com.au/api/v1/videos/' + content_id, content_id)
headers = {}
if data.get('memberGated') is True:
_token = self._get_bearer_token(content_id)
headers = {'Authorization': _token}
_video_url = self._download_json(
data.get('playbackApiEndpoint'), content_id, 'Downloading video JSON',
headers=headers).get('source')
m3u8_url = self._request_webpage(HEADRequest(
_video_url), content_id).url
video_data = self._download_json(
f'https://vod.ten.com.au/api/videos/bcquery?command=find_videos_by_id&video_id={data["altId"]}',
content_id, 'Downloading video JSON')
m3u8_url = self._request_webpage(
HEADRequest(video_data['items'][0]['HLSURL']),
content_id, 'Checking stream URL').url
if '10play-not-in-oz' in m3u8_url:
self.raise_geo_restricted(countries=['AU'])
# Attempt to get a higher quality stream
m3u8_url = m3u8_url.replace(',150,75,55,0000', ',300,150,75,55,0000')
formats = self._extract_m3u8_formats(m3u8_url, content_id, 'mp4')
return {
'id': content_id,
'formats': formats,
'subtitles': {'en': [{'url': data.get('captionUrl')}]} if data.get('captionUrl') else None,
'id': data.get('altId') or content_id,
'duration': data.get('duration'),
'title': data.get('subtitle'),
'alt_title': data.get('title'),
'description': data.get('description'),
'age_limit': self._AUS_AGES.get(data.get('classification')),
'series': data.get('tvShow'),
'season_number': int_or_none(data.get('season')),
'episode_number': int_or_none(data.get('episode')),
'timestamp': data.get('published'),
'thumbnail': data.get('imageUrl'),
'subtitles': {'en': [{'url': data['captionUrl']}]} if url_or_none(data.get('captionUrl')) else None,
'uploader': 'Channel 10',
'uploader_id': '2199827728001',
**traverse_obj(data, {
'id': ('altId', {str}),
'duration': ('duration', {int_or_none}),
'title': ('subtitle', {str}),
'alt_title': ('title', {str}),
'description': ('description', {str}),
'age_limit': ('classification', {self._AUS_AGES.get}),
'series': ('tvShow', {str}),
'season_number': ('season', {int_or_none}),
'episode_number': ('episode', {int_or_none}),
'timestamp': ('published', {int_or_none}),
'thumbnail': ('imageUrl', {url_or_none}),
}),
}

View File

@ -1,7 +1,17 @@
import base64
import math
import time
from .common import InfoExtractor
from ..utils import InAdvancePagedList, str_or_none, traverse_obj, try_call
from .videa import VideaIE
from ..utils import (
InAdvancePagedList,
int_or_none,
str_or_none,
traverse_obj,
try_call,
update_url_query,
)
class XimalayaBaseIE(InfoExtractor):
@ -71,23 +81,92 @@ class XimalayaIE(XimalayaBaseIE):
'like_count': int,
},
},
{
# VIP-restricted audio
'url': 'https://www.ximalaya.com/sound/562111701',
'only_matching': True,
},
]
@staticmethod
def _decrypt_filename(file_id, seed):
cgstr = ''
key = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\\:._-1234567890'
for _ in key:
seed = float(int(211 * seed + 30031) % 65536)
r = int(seed / 65536 * len(key))
cgstr += key[r]
key = key.replace(key[r], '')
parts = file_id.split('*')
filename = ''.join(cgstr[int(part)] for part in parts if part.isdecimal())
if not filename.startswith('/'):
filename = '/' + filename
return filename
@staticmethod
def _decrypt_url_params(encrypted_params):
params = VideaIE.rc4(
base64.b64decode(encrypted_params), 'xkt3a41psizxrh9l').split('-')
# sign, token, timestamp
return params[1], params[2], params[3]
def _real_extract(self, url):
scheme = 'https' if url.startswith('https') else 'http'
audio_id = self._match_id(url)
audio_info_file = f'{scheme}://m.ximalaya.com/tracks/{audio_id}.json'
audio_info = self._download_json(
audio_info_file, audio_id,
f'Downloading info json {audio_info_file}', 'Unable to download info file')
f'{scheme}://m.ximalaya.com/tracks/{audio_id}.json', audio_id,
'Downloading info json', 'Unable to download info file')
formats = [{
formats = []
# NOTE: VIP-restricted audio
if audio_info.get('is_paid'):
ts = int(time.time())
vip_info = self._download_json(
f'{scheme}://mpay.ximalaya.com/mobile/track/pay/{audio_id}/{ts}',
audio_id, 'Downloading VIP info json', 'Unable to download VIP info file',
query={'device': 'pc', 'isBackend': 'true', '_': ts})
filename = self._decrypt_filename(vip_info['fileId'], vip_info['seed'])
sign, token, timestamp = self._decrypt_url_params(vip_info['ep'])
vip_url = update_url_query(
f'{vip_info["domain"]}/download/{vip_info["apiVersion"]}{filename}', {
'sign': sign,
'token': token,
'timestamp': timestamp,
'buy_key': vip_info['buyKey'],
'duration': vip_info['duration'],
})
fmt = {
'format_id': 'vip',
'url': vip_url,
'vcodec': 'none',
}
if '_preview_' in vip_url:
self.report_warning(
f'This tracks requires a VIP account. Using a sample instead. {self._login_hint()}')
fmt.update({
'format_note': 'Sample',
'preference': -10,
**traverse_obj(vip_info, {
'filesize': ('sampleLength', {int_or_none}),
'duration': ('sampleDuration', {int_or_none}),
}),
})
else:
fmt.update(traverse_obj(vip_info, {
'filesize': ('totalLength', {int_or_none}),
'duration': ('duration', {int_or_none}),
}))
fmt['abr'] = try_call(lambda: fmt['filesize'] * 8 / fmt['duration'] / 1024)
formats.append(fmt)
formats.extend([{
'format_id': f'{bps}k',
'url': audio_info[k],
'abr': bps,
'vcodec': 'none',
} for bps, k in ((24, 'play_path_32'), (64, 'play_path_64')) if audio_info.get(k)]
} for bps, k in ((24, 'play_path_32'), (64, 'play_path_64')) if audio_info.get(k)])
thumbnails = []
for k in audio_info:

View File

@ -3,16 +3,13 @@ from ..utils import (
int_or_none,
str_or_none,
try_get,
update_url_query,
url_or_none,
)
class XinpianchangIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://www\.xinpianchang\.com/(?P<id>[^/]+?)(?:\D|$)'
IE_NAME = 'xinpianchang'
IE_DESC = 'xinpianchang.com'
_VALID_URL = r'https?://(www\.)?xinpianchang\.com/(?P<id>a\d+)'
IE_DESC = '新片场'
_TESTS = [{
'url': 'https://www.xinpianchang.com/a11766551',
'info_dict': {
@ -49,11 +46,11 @@ class XinpianchangIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id=video_id)
domain = self.find_value_with_regex(var='requireNewDomain', webpage=webpage)
vid = self.find_value_with_regex(var='vid', webpage=webpage)
app_key = self.find_value_with_regex(var='modeServerAppKey', webpage=webpage)
api = update_url_query(f'{domain}/mod/api/v2/media/{vid}', {'appKey': app_key})
data = self._download_json(api, video_id=video_id)['data']
video_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['detail']['video']
data = self._download_json(
f'https://mod-api.xinpianchang.com/mod/api/v2/media/{video_data["vid"]}', video_id,
query={'appKey': video_data['appKey']})['data']
formats, subtitles = [], {}
for k, v in data.get('resource').items():
if k in ('dash', 'hls'):
@ -72,6 +69,10 @@ class XinpianchangIE(InfoExtractor):
'width': int_or_none(prog.get('width')),
'height': int_or_none(prog.get('height')),
'ext': 'mp4',
'http_headers': {
# NB: Server returns 403 without the Range header
'Range': 'bytes=0-',
},
} for prog in v if prog.get('url') or []])
return {
@ -87,6 +88,3 @@ class XinpianchangIE(InfoExtractor):
'formats': formats,
'subtitles': subtitles,
}
def find_value_with_regex(self, var, webpage):
return self._search_regex(rf'var\s{var}\s=\s\"(?P<vid>[^\"]+)\"', webpage, name=var)

View File

@ -69,6 +69,8 @@ from ..utils import (
)
STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
STREAMING_DATA_PO_TOKEN = '__yt_dlp_po_token'
# any clients starting with _ cannot be explicitly requested by the user
INNERTUBE_CLIENTS = {
'web': {
@ -79,6 +81,7 @@ INNERTUBE_CLIENTS = {
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
'REQUIRE_PO_TOKEN': True,
},
# Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats
'web_safari': {
@ -90,6 +93,7 @@ INNERTUBE_CLIENTS = {
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
'REQUIRE_PO_TOKEN': True,
},
'web_embedded': {
'INNERTUBE_CONTEXT': {
@ -132,6 +136,7 @@ INNERTUBE_CLIENTS = {
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
'REQUIRE_JS_PLAYER': False,
'REQUIRE_PO_TOKEN': True,
},
'android_music': {
'INNERTUBE_CONTEXT': {
@ -146,6 +151,7 @@ INNERTUBE_CLIENTS = {
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
'REQUIRE_JS_PLAYER': False,
'REQUIRE_PO_TOKEN': True,
},
'android_creator': {
'INNERTUBE_CONTEXT': {
@ -160,6 +166,7 @@ INNERTUBE_CLIENTS = {
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
'REQUIRE_JS_PLAYER': False,
'REQUIRE_PO_TOKEN': True,
},
# YouTube Kids videos aren't returned on this client for some reason
'android_vr': {
@ -323,6 +330,7 @@ def build_innertube_clients():
for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
ytcfg.setdefault('REQUIRE_PO_TOKEN', False)
ytcfg.setdefault('PLAYER_PARAMS', None)
ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
@ -688,31 +696,46 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
'identity token', default=None, fatal=False)
@staticmethod
def _extract_account_syncid(*args):
def _data_sync_id_to_delegated_session_id(self, data_sync_id):
if not data_sync_id:
return
# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
# and just "user_syncid||" for primary channel. We only want the channel_syncid
channel_syncid, _, user_syncid = data_sync_id.partition('||')
if user_syncid:
return channel_syncid
def _extract_account_syncid(self, *args):
"""
Extract syncId required to download private playlists of secondary channels
Extract current session ID required to download private playlists of secondary channels
@params response and/or ytcfg
"""
for data in args:
# ytcfg includes channel_syncid if on secondary channel
delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
if delegated_sid:
return delegated_sid
sync_ids = (try_get(
data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
lambda x: x['DATASYNC_ID']), str) or '').split('||')
if len(sync_ids) >= 2 and sync_ids[1]:
# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
# and just "user_syncid||" for primary channel. We only want the channel_syncid
return sync_ids[0]
# ytcfg includes channel_syncid if on secondary channel
if delegated_sid := traverse_obj(args, (..., 'DELEGATED_SESSION_ID', {str}, any)):
return delegated_sid
@staticmethod
def _extract_visitor_data(*args):
data_sync_id = self._extract_data_sync_id(*args)
return self._data_sync_id_to_delegated_session_id(data_sync_id)
def _extract_data_sync_id(self, *args):
"""
Extract current account dataSyncId.
In the format DELEGATED_SESSION_ID||USER_SESSION_ID or USER_SESSION_ID||
@params response and/or ytcfg
"""
if data_sync_id := self._configuration_arg('data_sync_id', [None], ie_key=YoutubeIE, casesense=True)[0]:
return data_sync_id
return traverse_obj(
args, (..., ('DATASYNC_ID', ('responseContext', 'mainAppWebResponseContext', 'datasyncId')), {str}, any))
def _extract_visitor_data(self, *args):
"""
Extracts visitorData from an API response or ytcfg
Appears to be used to track session state
"""
if visitor_data := self._configuration_arg('visitor_data', [None], ie_key=YoutubeIE, casesense=True)[0]:
return visitor_data
return get_first(
args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
expected_type=str)
@ -1334,11 +1357,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
}
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
_POTOKEN_EXPERIMENTS = ('51217476', '51217102')
_BROKEN_CLIENTS = {
short_client_name(client): client
for client in ('android', 'android_creator', 'android_music')
}
_DEFAULT_CLIENTS = ('ios', 'web_creator')
_GEO_BYPASS = False
@ -3701,6 +3719,54 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
**cls._get_checkok_params(),
}
def _get_config_po_token(self, client):
po_token_strs = self._configuration_arg('po_token', [], ie_key=YoutubeIE, casesense=True)
for token_str in po_token_strs:
po_token_client, sep, po_token = token_str.partition('+')
if not sep:
self.report_warning(
f'Invalid po_token configuration format. Expected "client+po_token", got "{token_str}"', only_once=True)
continue
if po_token_client == client:
return po_token
def fetch_po_token(self, client='web', visitor_data=None, data_sync_id=None, player_url=None, **kwargs):
# PO Token is bound to visitor_data / Visitor ID when logged out. Must have visitor_data for it to function.
if not visitor_data and not self.is_authenticated and player_url:
self.report_warning(
f'Unable to fetch PO Token for {client} client: Missing required Visitor Data. '
f'You may need to pass Visitor Data with --extractor-args "youtube:visitor_data=XXX"')
return
config_po_token = self._get_config_po_token(client)
if config_po_token:
# PO token is bound to data_sync_id / account Session ID when logged in. However, for the config po_token,
# if using first channel in an account then we don't need the data_sync_id anymore...
if not data_sync_id and self.is_authenticated and player_url:
self.report_warning(
f'Got a PO Token for {client} client, but missing Data Sync ID for account. Formats may not work.'
f'You may need to pass a Data Sync ID with --extractor-args "youtube:data_sync_id=XXX"')
return config_po_token
# Require PO Token if logged in for external fetching
if not data_sync_id and self.is_authenticated and player_url:
self.report_warning(
f'Unable to fetch PO Token for {client} client: Missing required Data Sync ID for account. '
f'You may need to pass a Data Sync ID with --extractor-args "youtube:data_sync_id=XXX"')
return
return self._fetch_po_token(
client=client,
visitor_data=visitor_data,
data_sync_id=data_sync_id,
player_url=player_url,
**kwargs,
)
def _fetch_po_token(self, client, visitor_data=None, data_sync_id=None, player_url=None, **kwargs):
"""External PO Token fetch stub"""
@staticmethod
def _is_agegated(player_response):
if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
@ -3717,13 +3783,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _is_unplayable(player_response):
return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, visitor_data, data_sync_id, po_token):
headers = self.generate_api_headers(
ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
ytcfg=player_ytcfg,
default_client=client,
visitor_data=visitor_data,
session_index=self._extract_session_index(master_ytcfg, player_ytcfg),
account_syncid=(
self._data_sync_id_to_delegated_session_id(data_sync_id)
or self._extract_account_syncid(master_ytcfg, initial_pr, player_ytcfg)
),
)
yt_query = {
'videoId': video_id,
@ -3734,6 +3804,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if player_params := self._configuration_arg('player_params', [default_pp], casesense=True)[0]:
yt_query['params'] = player_params
if po_token:
yt_query['serviceIntegrityDimensions'] = {'poToken': po_token}
sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
yt_query.update(self._generate_player_context(sts))
return self._extract_response(
item_id=video_id, ep='player', query=yt_query,
@ -3744,7 +3818,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _get_requested_clients(self, url, smuggled_data):
requested_clients = []
broken_clients = []
excluded_clients = []
allowed_clients = sorted(
(client for client in INNERTUBE_CLIENTS if client[:1] != '_'),
@ -3758,12 +3831,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
excluded_clients.append(client[1:])
elif client not in allowed_clients:
self.report_warning(f'Skipping unsupported client "{client}"')
elif client in self._BROKEN_CLIENTS.values():
broken_clients.append(client)
else:
requested_clients.append(client)
# Force deprioritization of _BROKEN_CLIENTS for format de-duplication
requested_clients.extend(broken_clients)
if not requested_clients:
requested_clients.extend(self._DEFAULT_CLIENTS)
for excluded_client in excluded_clients:
@ -3788,19 +3857,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return pr_id
def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
initial_pr = ignore_initial_response = None
initial_pr = None
if webpage:
if 'web' in clients:
experiments = traverse_obj(master_ytcfg, (
'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'serializedExperimentIds', {lambda x: x.split(',')}, ...))
if all(x in experiments for x in self._POTOKEN_EXPERIMENTS):
self.report_warning(
'Webpage contains broken formats (poToken experiment detected). Ignoring initial player response')
ignore_initial_response = True
initial_pr = self._search_json(
self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
prs = []
deprioritized_prs = []
if initial_pr and not self._invalid_player_response(initial_pr, video_id):
# Android player_response does not have microFormats which are needed for
# extraction of some data. So we return the initial_pr with formats
@ -3822,14 +3886,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return
tried_iframe_fallback = False
player_url = None
player_url = visitor_data = data_sync_id = None
skipped_clients = {}
while clients:
deprioritize_pr = False
client, base_client, variant = _split_innertube_client(clients.pop())
player_ytcfg = {}
if client == 'web':
player_ytcfg = self._get_default_ytcfg() if ignore_initial_response else master_ytcfg
elif 'configs' not in self._configuration_arg('player_skip'):
player_ytcfg = master_ytcfg if client == 'web' else {}
if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
@ -3842,34 +3905,53 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
player_url = self._download_player_url(video_id)
tried_iframe_fallback = True
pr = initial_pr if client == 'web' and not ignore_initial_response else None
for retry in self.RetryManager(fatal=False):
try:
pr = pr or self._extract_player_response(
client, video_id, player_ytcfg or master_ytcfg, player_ytcfg,
player_url if require_js_player else None, initial_pr, smuggled_data)
except ExtractorError as e:
self.report_warning(e)
break
experiments = traverse_obj(pr, (
'responseContext', 'serviceTrackingParams', lambda _, v: v['service'] == 'GFEEDBACK',
'params', lambda _, v: v['key'] == 'e', 'value', {lambda x: x.split(',')}, ...))
if all(x in experiments for x in self._POTOKEN_EXPERIMENTS):
pr = None
retry.error = ExtractorError('API returned broken formats (poToken experiment detected)', expected=True)
if not pr:
visitor_data = visitor_data or self._extract_visitor_data(master_ytcfg, initial_pr, player_ytcfg)
data_sync_id = data_sync_id or self._extract_data_sync_id(master_ytcfg, initial_pr, player_ytcfg)
po_token = self.fetch_po_token(
client=client, visitor_data=visitor_data,
data_sync_id=data_sync_id if self.is_authenticated else None,
player_url=player_url if require_js_player else None,
)
require_po_token = self._get_default_ytcfg(client).get('REQUIRE_PO_TOKEN')
if not po_token and require_po_token:
self.report_warning(
f'No PO Token provided for {client} client, '
f'which is required for working {client} formats. '
f'You can manually pass a PO Token for this client with '
f'--extractor-args "youtube:po_token={client}+XXX"',
only_once=True)
deprioritize_pr = True
pr = initial_pr if client == 'web' else None
try:
pr = pr or self._extract_player_response(
client, video_id,
master_ytcfg=player_ytcfg or master_ytcfg,
player_ytcfg=player_ytcfg,
player_url=player_url,
initial_pr=initial_pr,
visitor_data=visitor_data,
data_sync_id=data_sync_id,
po_token=po_token)
except ExtractorError as e:
self.report_warning(e)
continue
if pr_id := self._invalid_player_response(pr, video_id):
skipped_clients[client] = pr_id
elif pr:
# Save client name for introspection later
name = short_client_name(client)
sd = traverse_obj(pr, ('streamingData', {dict})) or {}
sd[STREAMING_DATA_CLIENT_NAME] = name
sd[STREAMING_DATA_CLIENT_NAME] = client
sd[STREAMING_DATA_PO_TOKEN] = po_token
for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
f[STREAMING_DATA_CLIENT_NAME] = name
prs.append(pr)
f[STREAMING_DATA_CLIENT_NAME] = client
f[STREAMING_DATA_PO_TOKEN] = po_token
if deprioritize_pr:
deprioritized_prs.append(pr)
else:
prs.append(pr)
# tv_embedded can work around age-gate and age-verification IF the video is embeddable
if self._is_agegated(pr) and variant != 'tv_embedded':
@ -3893,6 +3975,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# _producer, _testsuite, & _vr variants can also work around age-verification
append_client('web_creator', 'mediaconnect')
prs.extend(deprioritized_prs)
if skipped_clients:
self.report_warning(
f'Skipping player responses from {"/".join(skipped_clients)} clients '
@ -4027,13 +4111,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
# _BROKEN_CLIENTS return videoplayback URLs that expire after 30 seconds
# Ref: https://github.com/yt-dlp/yt-dlp/issues/9554
is_broken = client_name in self._BROKEN_CLIENTS
po_token = fmt.get(STREAMING_DATA_PO_TOKEN)
if po_token:
fmt_url = update_url_query(fmt_url, {'pot': po_token})
# Clients that require PO Token return videoplayback URLs that may return 403
is_broken = (not po_token and self._get_default_ytcfg(client_name).get('REQUIRE_PO_TOKEN'))
if is_broken:
self.report_warning(
f'{video_id}: {self._BROKEN_CLIENTS[client_name]} client formats are broken '
'and may yield HTTP Error 403. They will be deprioritized', only_once=True)
f'{video_id}: {client_name} client formats require a PO Token which was not provided. '
'They will be deprioritized as they may yield HTTP Error 403', only_once=True)
name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
fps = int_or_none(fmt.get('fps')) or 0
@ -4109,12 +4197,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
skip_manifests.add('dash')
def process_manifest_format(f, proto, client_name, itag):
def process_manifest_format(f, proto, client_name, itag, po_token):
key = (proto, f.get('language'))
if not all_formats and key in itags[itag]:
return False
itags[itag].add(key)
if f.get('source_preference') is None:
f['source_preference'] = -1
# Clients that require PO Token return videoplayback URLs that may return 403
# hls does not currently require PO Token
if (not po_token and self._get_default_ytcfg(client_name).get('REQUIRE_PO_TOKEN')) and proto != 'hls':
self.report_warning(
f'{video_id}: {client_name} client {proto} formats require a PO Token which was not provided. '
'They will be deprioritized as they may yield HTTP Error 403', only_once=True)
f['format_note'] = join_nonempty(f.get('format_note'), 'BROKEN', delim=' ')
f['source_preference'] -= 20
if itag and all_formats:
f['format_id'] = f'{itag}-{proto}'
elif any(p != proto for p, _ in itags[itag]):
@ -4126,9 +4226,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
f['format_note'] = join_nonempty(f.get('format_note'), '(default)', delim=' ')
f['language_preference'] = PREFERRED_LANG_VALUE
if f.get('source_preference') is None:
f['source_preference'] = -1
if itag in ('616', '235'):
f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
f['source_preference'] += 100
@ -4149,23 +4246,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
subtitles = {}
for sd in streaming_data:
client_name = sd.get(STREAMING_DATA_CLIENT_NAME)
po_token = sd.get(STREAMING_DATA_PO_TOKEN)
hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
if hls_manifest_url:
if po_token:
hls_manifest_url = hls_manifest_url.rstrip('/') + f'/pot/{po_token}'
fmts, subs = self._extract_m3u8_formats_and_subtitles(
hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
subtitles = self._merge_subtitles(subs, subtitles)
for f in fmts:
if process_manifest_format(f, 'hls', client_name, self._search_regex(
r'/itag/(\d+)', f['url'], 'itag', default=None)):
r'/itag/(\d+)', f['url'], 'itag', default=None), po_token):
yield f
dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
if dash_manifest_url:
if po_token:
dash_manifest_url = dash_manifest_url.rstrip('/') + f'/pot/{po_token}'
formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
for f in formats:
if process_manifest_format(f, 'dash', client_name, f['format_id']):
if process_manifest_format(f, 'dash', client_name, f['format_id'], po_token):
f['filesize'] = int_or_none(self._search_regex(
r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
if needs_live_processing:
@ -4987,7 +5088,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
def _rich_entries(self, rich_grid_renderer):
renderer = traverse_obj(
rich_grid_renderer,
('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer')), get_all=False) or {}
('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer', 'shortsLockupViewModel'), any)) or {}
video_id = renderer.get('videoId')
if video_id:
yield self._extract_video(renderer)
@ -4999,6 +5100,21 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
video_title=self._get_text(renderer, 'title'))
return
# shortsLockupViewModel extraction
entity_id = renderer.get('entityId')
if entity_id:
video_id = traverse_obj(renderer, ('onTap', 'innertubeCommand', 'reelWatchEndpoint', 'videoId', {str}))
if not video_id:
return
yield self.url_result(
f'https://www.youtube.com/shorts/{video_id}',
ie=YoutubeIE, video_id=video_id,
**traverse_obj(renderer, ('overlayMetadata', {
'title': ('primaryText', 'content', {str}),
'view_count': ('secondaryText', 'content', {parse_count}),
})),
thumbnails=self._extract_thumbnails(renderer, 'thumbnail', final_key='sources'))
return
def _video_entry(self, video_renderer):
video_id = video_renderer.get('videoId')

View File

@ -2919,6 +2919,7 @@ def mimetype2ext(mt, default=NO_DEFAULT):
'audio/webm': 'webm',
'audio/x-matroska': 'mka',
'audio/x-mpegurl': 'm3u',
'aacp': 'aac',
'midi': 'mid',
'ogg': 'ogg',
'wav': 'wav',