mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-14 21:23:05 +00:00
Compare commits
11 Commits
34baa9fdf0
...
47b8bf207b
Author | SHA1 | Date | |
---|---|---|---|
|
47b8bf207b | ||
|
4628a3aa75 | ||
|
5b4bb715e6 | ||
|
1235d333ab | ||
|
18e4940825 | ||
|
c0b6e5c74d | ||
|
727029c508 | ||
|
5c3895fff1 | ||
|
fd2ad7cb24 | ||
|
4a3175fc4c | ||
|
5cf34021f5 |
@ -144,6 +144,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu
|
|||||||
* Some private fields such as filenames are removed by default from the infojson. Use `--no-clean-infojson` or `--compat-options no-clean-infojson` to revert this
|
* Some private fields such as filenames are removed by default from the infojson. Use `--no-clean-infojson` or `--compat-options no-clean-infojson` to revert this
|
||||||
* When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this
|
* When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this
|
||||||
* `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi`
|
* `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi`
|
||||||
|
* youtube-dl tries to remove some superfluous punctuations from filenames. While this can sometimes be helpfull, it is often undesirable. So yt-dlp tries to keep the fields in the filenames as close to their original values as possible. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior
|
||||||
|
|
||||||
For ease of use, a few more compat options are available:
|
For ease of use, a few more compat options are available:
|
||||||
* `--compat-options all`: Use all compat options
|
* `--compat-options all`: Use all compat options
|
||||||
@ -1655,7 +1656,7 @@ Some extractors accept additional arguments which can be passed using `--extract
|
|||||||
The following extractors use this feature:
|
The following extractors use this feature:
|
||||||
|
|
||||||
#### youtube
|
#### youtube
|
||||||
* `skip`: `hls` or `dash` (or both) to skip download of the respective manifests
|
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and auto-translated subtitles respectively
|
||||||
* `player_client`: Clients to extract video data from. The main clients are `web`, `android`, `ios`, `mweb`. These also have `_music`, `_embedded`, `_agegate`, and `_creator` variants (Eg: `web_embedded`) (`mweb` has only `_agegate`). By default, `android,web` is used, but the agegate and creator variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can also use `all` to use all the clients, and `default` for the default clients.
|
* `player_client`: Clients to extract video data from. The main clients are `web`, `android`, `ios`, `mweb`. These also have `_music`, `_embedded`, `_agegate`, and `_creator` variants (Eg: `web_embedded`) (`mweb` has only `_agegate`). By default, `android,web` is used, but the agegate and creator variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can also use `all` to use all the clients, and `default` for the default clients.
|
||||||
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
|
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
|
||||||
* `include_live_dash`: Include live dash formats even without `--live-from-start` (These formats don't download properly)
|
* `include_live_dash`: Include live dash formats even without `--live-from-start` (These formats don't download properly)
|
||||||
|
@ -160,10 +160,12 @@ class TestUtil(unittest.TestCase):
|
|||||||
sanitize_filename('New World record at 0:12:34'),
|
sanitize_filename('New World record at 0:12:34'),
|
||||||
'New World record at 0_12_34')
|
'New World record at 0_12_34')
|
||||||
|
|
||||||
self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf')
|
self.assertEqual(sanitize_filename('--gasdgf'), '--gasdgf')
|
||||||
self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf')
|
self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf')
|
||||||
self.assertEqual(sanitize_filename('.gasdgf'), 'gasdgf')
|
self.assertEqual(sanitize_filename('--gasdgf', is_id=False), '_-gasdgf')
|
||||||
|
self.assertEqual(sanitize_filename('.gasdgf'), '.gasdgf')
|
||||||
self.assertEqual(sanitize_filename('.gasdgf', is_id=True), '.gasdgf')
|
self.assertEqual(sanitize_filename('.gasdgf', is_id=True), '.gasdgf')
|
||||||
|
self.assertEqual(sanitize_filename('.gasdgf', is_id=False), 'gasdgf')
|
||||||
|
|
||||||
forbidden = '"\0\\/'
|
forbidden = '"\0\\/'
|
||||||
for fc in forbidden:
|
for fc in forbidden:
|
||||||
|
@ -87,6 +87,7 @@ from .utils import (
|
|||||||
MaxDownloadsReached,
|
MaxDownloadsReached,
|
||||||
merge_headers,
|
merge_headers,
|
||||||
network_exceptions,
|
network_exceptions,
|
||||||
|
NO_DEFAULT,
|
||||||
number_of_digits,
|
number_of_digits,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
OUTTMPL_TYPES,
|
OUTTMPL_TYPES,
|
||||||
@ -1150,8 +1151,10 @@ class YoutubeDL(object):
|
|||||||
na = self.params.get('outtmpl_na_placeholder', 'NA')
|
na = self.params.get('outtmpl_na_placeholder', 'NA')
|
||||||
|
|
||||||
def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
|
def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
|
||||||
return sanitize_filename(str(value), restricted=restricted,
|
return sanitize_filename(str(value), restricted=restricted, is_id=(
|
||||||
is_id=re.search(r'(^|[_.])id(\.|$)', key))
|
bool(re.search(r'(^|[_.])id(\.|$)', key))
|
||||||
|
if 'filename-sanitization' in self.params.get('compat_opts', [])
|
||||||
|
else NO_DEFAULT))
|
||||||
|
|
||||||
sanitizer = sanitize if callable(sanitize) else filename_sanitizer
|
sanitizer = sanitize if callable(sanitize) else filename_sanitizer
|
||||||
sanitize = bool(sanitize)
|
sanitize = bool(sanitize)
|
||||||
@ -2456,6 +2459,11 @@ class YoutubeDL(object):
|
|||||||
info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
|
info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
|
||||||
if not self.params.get('allow_unplayable_formats'):
|
if not self.params.get('allow_unplayable_formats'):
|
||||||
formats = [f for f in formats if not f.get('has_drm')]
|
formats = [f for f in formats if not f.get('has_drm')]
|
||||||
|
if info_dict['__has_drm'] and all(
|
||||||
|
f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
|
||||||
|
self.report_warning(
|
||||||
|
'This video is DRM protected and only images are available for download. '
|
||||||
|
'Use --list-formats to see them')
|
||||||
|
|
||||||
get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
|
get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
|
||||||
if not get_from_start:
|
if not get_from_start:
|
||||||
@ -2628,8 +2636,9 @@ class YoutubeDL(object):
|
|||||||
|
|
||||||
if not formats_to_download:
|
if not formats_to_download:
|
||||||
if not self.params.get('ignore_no_formats_error'):
|
if not self.params.get('ignore_no_formats_error'):
|
||||||
raise ExtractorError('Requested format is not available', expected=True,
|
raise ExtractorError(
|
||||||
video_id=info_dict['id'], ie=info_dict['extractor'])
|
'Requested format is not available. Use --list-formats for a list of available formats',
|
||||||
|
expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
|
||||||
self.report_warning('Requested format is not available')
|
self.report_warning('Requested format is not available')
|
||||||
# Process what we can, even without any available formats.
|
# Process what we can, even without any available formats.
|
||||||
formats_to_download = [{}]
|
formats_to_download = [{}]
|
||||||
|
153
yt_dlp/extractor/banbye.py
Normal file
153
yt_dlp/extractor/banbye.py
Normal file
@ -0,0 +1,153 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import math
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
compat_parse_qs,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
format_field,
|
||||||
|
InAdvancePagedList,
|
||||||
|
traverse_obj,
|
||||||
|
unified_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class BanByeBaseIE(InfoExtractor):
|
||||||
|
_API_BASE = 'https://api.banbye.com'
|
||||||
|
_CDN_BASE = 'https://cdn.banbye.com'
|
||||||
|
_VIDEO_BASE = 'https://banbye.com/watch'
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_playlist_id(url, param='playlist'):
|
||||||
|
return compat_parse_qs(
|
||||||
|
compat_urllib_parse_urlparse(url).query).get(param, [None])[0]
|
||||||
|
|
||||||
|
def _extract_playlist(self, playlist_id):
|
||||||
|
data = self._download_json(f'{self._API_BASE}/playlists/{playlist_id}', playlist_id)
|
||||||
|
return self.playlist_result([
|
||||||
|
self.url_result(f'{self._VIDEO_BASE}/{video_id}', BanByeIE)
|
||||||
|
for video_id in data['videoIds']], playlist_id, data.get('name'))
|
||||||
|
|
||||||
|
|
||||||
|
class BanByeIE(BanByeBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?banbye.com/(?:en/)?watch/(?P<id>\w+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://banbye.com/watch/v_ytfmvkVYLE8T',
|
||||||
|
'md5': '2f4ea15c5ca259a73d909b2cfd558eb5',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'v_ytfmvkVYLE8T',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:5ec098f88a0d796f987648de6322ba0f',
|
||||||
|
'description': 'md5:4d94836e73396bc18ef1fa0f43e5a63a',
|
||||||
|
'uploader': 'wRealu24',
|
||||||
|
'channel_id': 'ch_wrealu24',
|
||||||
|
'channel_url': 'https://banbye.com/channel/ch_wrealu24',
|
||||||
|
'timestamp': 1647604800,
|
||||||
|
'upload_date': '20220318',
|
||||||
|
'duration': 1931,
|
||||||
|
'thumbnail': r're:https?://.*\.webp',
|
||||||
|
'tags': 'count:5',
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://banbye.com/watch/v_2JjQtqjKUE_F?playlistId=p_Ld82N6gBw_OJ',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'Krzysztof Karoń',
|
||||||
|
'id': 'p_Ld82N6gBw_OJ',
|
||||||
|
},
|
||||||
|
'playlist_count': 9,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
playlist_id = self._extract_playlist_id(url, 'playlistId')
|
||||||
|
|
||||||
|
if self._yes_playlist(playlist_id, video_id):
|
||||||
|
return self._extract_playlist(playlist_id)
|
||||||
|
|
||||||
|
data = self._download_json(f'{self._API_BASE}/videos/{video_id}', video_id)
|
||||||
|
thumbnails = [{
|
||||||
|
'id': f'{quality}p',
|
||||||
|
'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.webp',
|
||||||
|
} for quality in [48, 96, 144, 240, 512, 1080]]
|
||||||
|
formats = [{
|
||||||
|
'format_id': f'http-{quality}p',
|
||||||
|
'quality': quality,
|
||||||
|
'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.mp4',
|
||||||
|
} for quality in data['quality']]
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': data.get('title'),
|
||||||
|
'description': data.get('desc'),
|
||||||
|
'uploader': traverse_obj(data, ('channel', 'name')),
|
||||||
|
'channel_id': data.get('channelId'),
|
||||||
|
'channel_url': format_field(data, 'channelId', 'https://banbye.com/channel/%s'),
|
||||||
|
'timestamp': unified_timestamp(data.get('publishedAt')),
|
||||||
|
'duration': data.get('duration'),
|
||||||
|
'tags': data.get('tags'),
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'like_count': data.get('likes'),
|
||||||
|
'dislike_count': data.get('dislikes'),
|
||||||
|
'view_count': data.get('views'),
|
||||||
|
'comment_count': data.get('commentCount'),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class BanByeChannelIE(BanByeBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?banbye.com/(?:en/)?channel/(?P<id>\w+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://banbye.com/channel/ch_wrealu24',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'wRealu24',
|
||||||
|
'id': 'ch_wrealu24',
|
||||||
|
'description': 'md5:da54e48416b74dfdde20a04867c0c2f6',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 791,
|
||||||
|
}, {
|
||||||
|
'url': 'https://banbye.com/channel/ch_wrealu24?playlist=p_Ld82N6gBw_OJ',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'Krzysztof Karoń',
|
||||||
|
'id': 'p_Ld82N6gBw_OJ',
|
||||||
|
},
|
||||||
|
'playlist_count': 9,
|
||||||
|
}]
|
||||||
|
_PAGE_SIZE = 100
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
channel_id = self._match_id(url)
|
||||||
|
playlist_id = self._extract_playlist_id(url)
|
||||||
|
|
||||||
|
if playlist_id:
|
||||||
|
return self._extract_playlist(playlist_id)
|
||||||
|
|
||||||
|
def page_func(page_num):
|
||||||
|
data = self._download_json(f'{self._API_BASE}/videos', channel_id, query={
|
||||||
|
'channelId': channel_id,
|
||||||
|
'sort': 'new',
|
||||||
|
'limit': self._PAGE_SIZE,
|
||||||
|
'offset': page_num * self._PAGE_SIZE,
|
||||||
|
}, note=f'Downloading page {page_num+1}')
|
||||||
|
return [
|
||||||
|
self.url_result(f"{self._VIDEO_BASE}/{video['_id']}", BanByeIE)
|
||||||
|
for video in data['items']
|
||||||
|
]
|
||||||
|
|
||||||
|
channel_data = self._download_json(f'{self._API_BASE}/channels/{channel_id}', channel_id)
|
||||||
|
entries = InAdvancePagedList(
|
||||||
|
page_func,
|
||||||
|
math.ceil(channel_data['videoCount'] / self._PAGE_SIZE),
|
||||||
|
self._PAGE_SIZE)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, channel_id, channel_data.get('name'), channel_data.get('description'))
|
@ -122,6 +122,10 @@ from .awaan import (
|
|||||||
)
|
)
|
||||||
from .azmedien import AZMedienIE
|
from .azmedien import AZMedienIE
|
||||||
from .baidu import BaiduVideoIE
|
from .baidu import BaiduVideoIE
|
||||||
|
from .banbye import (
|
||||||
|
BanByeIE,
|
||||||
|
BanByeChannelIE,
|
||||||
|
)
|
||||||
from .bandaichannel import BandaiChannelIE
|
from .bandaichannel import BandaiChannelIE
|
||||||
from .bandcamp import (
|
from .bandcamp import (
|
||||||
BandcampIE,
|
BandcampIE,
|
||||||
@ -674,6 +678,12 @@ from .iqiyi import (
|
|||||||
IqIE,
|
IqIE,
|
||||||
IqAlbumIE
|
IqAlbumIE
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from .itprotv import (
|
||||||
|
ITProTVIE,
|
||||||
|
ITProTVCourseIE
|
||||||
|
)
|
||||||
|
|
||||||
from .itv import (
|
from .itv import (
|
||||||
ITVIE,
|
ITVIE,
|
||||||
ITVBTCCIE,
|
ITVBTCCIE,
|
||||||
|
@ -217,6 +217,7 @@ class GoIE(AdobePassIE):
|
|||||||
title = video_data['title']
|
title = video_data['title']
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
subtitles = {}
|
||||||
for asset in video_data.get('assets', {}).get('asset', []):
|
for asset in video_data.get('assets', {}).get('asset', []):
|
||||||
asset_url = asset.get('value')
|
asset_url = asset.get('value')
|
||||||
if not asset_url:
|
if not asset_url:
|
||||||
@ -256,8 +257,10 @@ class GoIE(AdobePassIE):
|
|||||||
error_message = ', '.join([error['message'] for error in errors])
|
error_message = ', '.join([error['message'] for error in errors])
|
||||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
|
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
|
||||||
asset_url += '?' + entitlement['uplynkData']['sessionKey']
|
asset_url += '?' + entitlement['uplynkData']['sessionKey']
|
||||||
formats.extend(self._extract_m3u8_formats(
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False))
|
asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False)
|
||||||
|
formats.extend(fmts)
|
||||||
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
else:
|
else:
|
||||||
f = {
|
f = {
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
@ -281,7 +284,6 @@ class GoIE(AdobePassIE):
|
|||||||
formats.append(f)
|
formats.append(f)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
subtitles = {}
|
|
||||||
for cc in video_data.get('closedcaption', {}).get('src', []):
|
for cc in video_data.get('closedcaption', {}).get('src', []):
|
||||||
cc_url = cc.get('value')
|
cc_url = cc.get('value')
|
||||||
if not cc_url:
|
if not cc_url:
|
||||||
|
141
yt_dlp/extractor/itprotv.py
Normal file
141
yt_dlp/extractor/itprotv.py
Normal file
@ -0,0 +1,141 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
str_or_none,
|
||||||
|
traverse_obj,
|
||||||
|
urljoin
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ITProTVBaseIE(InfoExtractor):
|
||||||
|
_ENDPOINTS = {
|
||||||
|
'course': 'course?url={}&brand=00002560-0000-3fa9-0000-1d61000035f3',
|
||||||
|
'episode': 'brand/00002560-0000-3fa9-0000-1d61000035f3/episode?url={}'
|
||||||
|
}
|
||||||
|
|
||||||
|
def _call_api(self, ep, item_id, webpage):
|
||||||
|
return self._download_json(
|
||||||
|
f'https://api.itpro.tv/api/urza/v3/consumer-web/{self._ENDPOINTS[ep].format(item_id)}',
|
||||||
|
item_id, note=f'Fetching {ep} data API',
|
||||||
|
headers={'Authorization': f'Bearer {self._fetch_jwt(webpage)}'})[ep]
|
||||||
|
|
||||||
|
def _fetch_jwt(self, webpage):
|
||||||
|
return self._search_regex(r'{"passedToken":"([\w-]+\.[\w-]+\.[\w-]+)",', webpage, 'jwt')
|
||||||
|
|
||||||
|
def _check_if_logged_in(self, webpage):
|
||||||
|
if re.match(r'{\s*member\s*:\s*null', webpage):
|
||||||
|
self.raise_login_required()
|
||||||
|
|
||||||
|
|
||||||
|
class ITProTVIE(ITProTVBaseIE):
|
||||||
|
_VALID_URL = r'https://app.itpro.tv/course/(?P<course>[\w-]+)/(?P<id>[\w-]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://app.itpro.tv/course/guided-tour/introductionitprotv',
|
||||||
|
'md5': 'bca4a28c2667fd1a63052e71a94bb88c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'introductionitprotv',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'An Introduction to ITProTV 101',
|
||||||
|
'thumbnail': 'https://itprotv-image-bucket.s3.amazonaws.com/getting-started/itprotv-101-introduction-PGM.11_39_56_02.Still001.png',
|
||||||
|
'description': 'md5:b175c2c3061ce35a4dd33865b2c1da4e',
|
||||||
|
'duration': 269,
|
||||||
|
'series': 'ITProTV 101',
|
||||||
|
'series_id': 'guided-tour',
|
||||||
|
'availability': 'needs_auth',
|
||||||
|
'chapter': 'ITProTV 101',
|
||||||
|
'chapter_number': 1,
|
||||||
|
'chapter_id': '5dbb3de426b46c0010b5d1b6'
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://app.itpro.tv/course/beyond-tech/job-interview-tips',
|
||||||
|
'md5': '101a299b98c47ccf4c67f9f0951defa8',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'job-interview-tips',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Job Interview Tips',
|
||||||
|
'thumbnail': 'https://s3.amazonaws.com:443/production-itprotv-thumbnails/2f370bf5-294d-4bbe-ab80-c0b5781630ea.png',
|
||||||
|
'description': 'md5:30d8ba483febdf89ec85623aad3c3cb6',
|
||||||
|
'duration': 267,
|
||||||
|
'series': 'Beyond Tech',
|
||||||
|
'series_id': 'beyond-tech',
|
||||||
|
'availability': 'needs_auth',
|
||||||
|
'chapter': 'Job Development',
|
||||||
|
'chapter_number': 2,
|
||||||
|
'chapter_id': '5f7c78d424330c000edf04d9'
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
episode_id, course_name = self._match_valid_url(url).group('id', 'course')
|
||||||
|
webpage = self._download_webpage(url, episode_id)
|
||||||
|
self._check_if_logged_in(webpage)
|
||||||
|
course = self._call_api('course', course_name, webpage)
|
||||||
|
episode = self._call_api('episode', episode_id, webpage)
|
||||||
|
|
||||||
|
chapter_number, chapter = next((
|
||||||
|
(i, topic) for i, topic in enumerate(course.get('topics') or [], 1)
|
||||||
|
if traverse_obj(topic, 'id') == episode.get('topic')), {})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': episode_id,
|
||||||
|
'title': episode.get('title'),
|
||||||
|
'description': episode.get('description'),
|
||||||
|
'thumbnail': episode.get('thumbnail'),
|
||||||
|
'formats': [
|
||||||
|
{'url': episode[f'jwVideo{h}Embed'], 'height': h}
|
||||||
|
for h in (320, 480, 720, 1080) if episode.get(f'jwVideo{h}Embed')
|
||||||
|
],
|
||||||
|
'duration': int_or_none(episode.get('length')),
|
||||||
|
'series': course.get('name'),
|
||||||
|
'series_id': course.get('url'),
|
||||||
|
'chapter': str_or_none(chapter.get('title')),
|
||||||
|
'chapter_number': chapter_number,
|
||||||
|
'chapter_id': str_or_none(chapter.get('id')),
|
||||||
|
'subtitles': {
|
||||||
|
'en': [{'ext': 'vtt', 'data': episode['enCaptionData']}]
|
||||||
|
} if episode.get('enCaptionData') else None,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ITProTVCourseIE(ITProTVBaseIE):
|
||||||
|
_VALID_URL = r'https?://app.itpro.tv/course/(?P<id>[\w-]+)/?(?:$|[#?])'
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'https://app.itpro.tv/course/guided-tour',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'guided-tour',
|
||||||
|
'description': 'md5:b175c2c3061ce35a4dd33865b2c1da4e',
|
||||||
|
'title': 'ITProTV 101',
|
||||||
|
},
|
||||||
|
'playlist_count': 6
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://app.itpro.tv/course/beyond-tech',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'beyond-tech',
|
||||||
|
'description': 'md5:44cd99855e7f81a15ce1269bd0621fed',
|
||||||
|
'title': 'Beyond Tech'
|
||||||
|
},
|
||||||
|
'playlist_count': 15
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
course_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, course_id)
|
||||||
|
self._check_if_logged_in(webpage)
|
||||||
|
course = self._call_api('course', course_id, webpage)
|
||||||
|
|
||||||
|
entries = [self.url_result(
|
||||||
|
urljoin(url, f'{course_id}/{episode["url"]}'), ITProTVIE,
|
||||||
|
episode['url'], episode.get('title'), url_transparent=True)
|
||||||
|
for episode in course['episodes']]
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, course_id, course.get('name'), course.get('description'))
|
@ -88,10 +88,9 @@ class ViuIE(ViuBaseIE):
|
|||||||
# r'(/hlsc_)[a-z]+(\d+\.m3u8)',
|
# r'(/hlsc_)[a-z]+(\d+\.m3u8)',
|
||||||
# r'\1whe\2', video_data['href'])
|
# r'\1whe\2', video_data['href'])
|
||||||
m3u8_url = video_data['href']
|
m3u8_url = video_data['href']
|
||||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4')
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
subtitles = {}
|
|
||||||
for key, value in video_data.items():
|
for key, value in video_data.items():
|
||||||
mobj = re.match(r'^subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key)
|
mobj = re.match(r'^subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key)
|
||||||
if not mobj:
|
if not mobj:
|
||||||
|
@ -818,12 +818,17 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str)
|
renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str)
|
||||||
badges = self._extract_badges(renderer)
|
badges = self._extract_badges(renderer)
|
||||||
thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
|
thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
|
||||||
|
navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
|
||||||
|
renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'), expected_type=str))
|
||||||
|
url = f'https://www.youtube.com/watch?v={video_id}'
|
||||||
|
if overlay_style == 'SHORTS' or (navigation_url and '/shorts/' in navigation_url):
|
||||||
|
url = f'https://www.youtube.com/shorts/{video_id}'
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url',
|
'_type': 'url',
|
||||||
'ie_key': YoutubeIE.ie_key(),
|
'ie_key': YoutubeIE.ie_key(),
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': f'https://www.youtube.com/watch?v={video_id}',
|
'url': url,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
@ -3018,7 +3023,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
|
streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
|
||||||
|
|
||||||
for fmt in streaming_formats:
|
for fmt in streaming_formats:
|
||||||
if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
|
if fmt.get('targetDurationSec'):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
itag = str_or_none(fmt.get('itag'))
|
itag = str_or_none(fmt.get('itag'))
|
||||||
@ -3100,6 +3105,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'fps': int_or_none(fmt.get('fps')) or None,
|
'fps': int_or_none(fmt.get('fps')) or None,
|
||||||
'height': height,
|
'height': height,
|
||||||
'quality': q(quality),
|
'quality': q(quality),
|
||||||
|
'has_drm': bool(fmt.get('drmFamilies')),
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
'url': fmt_url,
|
'url': fmt_url,
|
||||||
'width': int_or_none(fmt.get('width')),
|
'width': int_or_none(fmt.get('width')),
|
||||||
@ -3473,6 +3479,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
subtitles, automatic_captions = {}, {}
|
subtitles, automatic_captions = {}, {}
|
||||||
for lang_code, caption_track in captions.items():
|
for lang_code, caption_track in captions.items():
|
||||||
base_url = caption_track.get('baseUrl')
|
base_url = caption_track.get('baseUrl')
|
||||||
|
orig_lang = parse_qs(base_url).get('lang', [None])[-1]
|
||||||
if not base_url:
|
if not base_url:
|
||||||
continue
|
continue
|
||||||
lang_name = self._get_text(caption_track, 'name', max_runs=1)
|
lang_name = self._get_text(caption_track, 'name', max_runs=1)
|
||||||
@ -3486,19 +3493,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
for trans_code, trans_name in translation_languages.items():
|
for trans_code, trans_name in translation_languages.items():
|
||||||
if not trans_code:
|
if not trans_code:
|
||||||
continue
|
continue
|
||||||
|
orig_trans_code = trans_code
|
||||||
if caption_track.get('kind') != 'asr':
|
if caption_track.get('kind') != 'asr':
|
||||||
|
if 'translated_subs' in self._configuration_arg('skip'):
|
||||||
|
continue
|
||||||
trans_code += f'-{lang_code}'
|
trans_code += f'-{lang_code}'
|
||||||
trans_name += format_field(lang_name, template=' from %s')
|
trans_name += format_field(lang_name, template=' from %s')
|
||||||
# Add an "-orig" label to the original language so that it can be distinguished.
|
# Add an "-orig" label to the original language so that it can be distinguished.
|
||||||
# The subs are returned without "-orig" as well for compatibility
|
# The subs are returned without "-orig" as well for compatibility
|
||||||
if lang_code == f'a-{trans_code}':
|
if lang_code == f'a-{orig_trans_code}':
|
||||||
process_language(
|
process_language(
|
||||||
automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
|
automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
|
||||||
# Setting tlang=lang returns damaged subtitles.
|
# Setting tlang=lang returns damaged subtitles.
|
||||||
# Not using lang_code == f'a-{trans_code}' here for future-proofing
|
|
||||||
orig_lang = parse_qs(base_url).get('lang', [None])[-1]
|
|
||||||
process_language(automatic_captions, base_url, trans_code, trans_name,
|
process_language(automatic_captions, base_url, trans_code, trans_name,
|
||||||
{} if orig_lang == trans_code else {'tlang': trans_code})
|
{} if orig_lang == orig_trans_code else {'tlang': trans_code})
|
||||||
info['automatic_captions'] = automatic_captions
|
info['automatic_captions'] = automatic_captions
|
||||||
info['subtitles'] = subtitles
|
info['subtitles'] = subtitles
|
||||||
|
|
||||||
|
@ -338,7 +338,7 @@ def create_parser():
|
|||||||
action='callback', callback=_set_from_options_callback,
|
action='callback', callback=_set_from_options_callback,
|
||||||
callback_kwargs={
|
callback_kwargs={
|
||||||
'allowed_values': {
|
'allowed_values': {
|
||||||
'filename', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles',
|
'filename', 'filename-sanitization', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles',
|
||||||
'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge',
|
'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge',
|
||||||
'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-attach-info-json', 'embed-metadata',
|
'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-attach-info-json', 'embed-metadata',
|
||||||
'embed-thumbnail-atomicparsley', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
|
'embed-thumbnail-atomicparsley', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
|
||||||
|
@ -553,9 +553,9 @@ class FFmpegVideoConvertorPP(FFmpegPostProcessor):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _options(target_ext):
|
def _options(target_ext):
|
||||||
|
yield from FFmpegPostProcessor.stream_copy_opts(False)
|
||||||
if target_ext == 'avi':
|
if target_ext == 'avi':
|
||||||
return ['-c:v', 'libxvid', '-vtag', 'XVID']
|
yield from ('-c:v', 'libxvid', '-vtag', 'XVID')
|
||||||
return []
|
|
||||||
|
|
||||||
@PostProcessor._restrict_to(images=False)
|
@PostProcessor._restrict_to(images=False)
|
||||||
def run(self, info):
|
def run(self, info):
|
||||||
@ -1129,6 +1129,8 @@ class FFmpegConcatPP(FFmpegPostProcessor):
|
|||||||
super().__init__(downloader)
|
super().__init__(downloader)
|
||||||
|
|
||||||
def concat_files(self, in_files, out_file):
|
def concat_files(self, in_files, out_file):
|
||||||
|
if not self._downloader._ensure_dir_exists(out_file):
|
||||||
|
return
|
||||||
if len(in_files) == 1:
|
if len(in_files) == 1:
|
||||||
if os.path.realpath(in_files[0]) != os.path.realpath(out_file):
|
if os.path.realpath(in_files[0]) != os.path.realpath(out_file):
|
||||||
self.to_screen(f'Moving "{in_files[0]}" to "{out_file}"')
|
self.to_screen(f'Moving "{in_files[0]}" to "{out_file}"')
|
||||||
|
@ -705,36 +705,40 @@ def timeconvert(timestr):
|
|||||||
return timestamp
|
return timestamp
|
||||||
|
|
||||||
|
|
||||||
def sanitize_filename(s, restricted=False, is_id=False):
|
def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
|
||||||
"""Sanitizes a string so it could be used as part of a filename.
|
"""Sanitizes a string so it could be used as part of a filename.
|
||||||
If restricted is set, use a stricter subset of allowed characters.
|
@param restricted Use a stricter subset of allowed characters
|
||||||
Set is_id if this is not an arbitrary string, but an ID that should be kept
|
@param is_id Whether this is an ID that should be kept unchanged if possible.
|
||||||
if possible.
|
If unset, yt-dlp's new sanitization rules are in effect
|
||||||
"""
|
"""
|
||||||
|
if s == '':
|
||||||
|
return ''
|
||||||
|
|
||||||
def replace_insane(char):
|
def replace_insane(char):
|
||||||
if restricted and char in ACCENT_CHARS:
|
if restricted and char in ACCENT_CHARS:
|
||||||
return ACCENT_CHARS[char]
|
return ACCENT_CHARS[char]
|
||||||
elif not restricted and char == '\n':
|
elif not restricted and char == '\n':
|
||||||
return ' '
|
return '\0 '
|
||||||
elif char == '?' or ord(char) < 32 or ord(char) == 127:
|
elif char == '?' or ord(char) < 32 or ord(char) == 127:
|
||||||
return ''
|
return ''
|
||||||
elif char == '"':
|
elif char == '"':
|
||||||
return '' if restricted else '\''
|
return '' if restricted else '\''
|
||||||
elif char == ':':
|
elif char == ':':
|
||||||
return '_-' if restricted else ' -'
|
return '\0_\0-' if restricted else '\0 \0-'
|
||||||
elif char in '\\/|*<>':
|
elif char in '\\/|*<>':
|
||||||
return '_'
|
return '\0_'
|
||||||
if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
|
if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
|
||||||
return '_'
|
return '\0_'
|
||||||
if restricted and ord(char) > 127:
|
|
||||||
return '_'
|
|
||||||
return char
|
return char
|
||||||
|
|
||||||
if s == '':
|
s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) # Handle timestamps
|
||||||
return ''
|
|
||||||
# Handle timestamps
|
|
||||||
s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
|
|
||||||
result = ''.join(map(replace_insane, s))
|
result = ''.join(map(replace_insane, s))
|
||||||
|
if is_id is NO_DEFAULT:
|
||||||
|
result = re.sub('(\0.)(?:(?=\\1)..)+', r'\1', result) # Remove repeated substitute chars
|
||||||
|
STRIP_RE = '(?:\0.|[ _-])*'
|
||||||
|
result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result) # Remove substitute chars from start/end
|
||||||
|
result = result.replace('\0', '') or '_'
|
||||||
|
|
||||||
if not is_id:
|
if not is_id:
|
||||||
while '__' in result:
|
while '__' in result:
|
||||||
result = result.replace('__', '_')
|
result = result.replace('__', '_')
|
||||||
|
Loading…
Reference in New Issue
Block a user