Compare commits

..

No commits in common. "f5ea47488a2c59b2520b4988b7eab4d8830e3077" and "258d88f3011a2226361c0642ff680840d49e8092" have entirely different histories.

12 changed files with 45 additions and 193 deletions

View File

@ -1207,7 +1207,7 @@ The field names themselves (the part inside the parenthesis) can also have some
1. **Default**: A literal default value can be specified for when the field is empty using a `|` separator. This overrides `--output-na-template`. Eg: `%(uploader|Unknown)s` 1. **Default**: A literal default value can be specified for when the field is empty using a `|` separator. This overrides `--output-na-template`. Eg: `%(uploader|Unknown)s`
1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, yt-dlp additionally supports converting to `B` = **B**ytes, `j` = **j**son (flag `#` for pretty-printing), `h` = HTML escaping, `l` = a comma separated **l**ist (flag `#` for `\n` newline-separated), `q` = a string **q**uoted for the terminal (flag `#` to split a list into different arguments), `D` = add **D**ecimal suffixes (Eg: 10M) (flag `#` to use 1024 as factor), and `S` = **S**anitize as filename (flag `#` for restricted) 1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, yt-dlp additionally supports converting to `B` = **B**ytes, `j` = **j**son (flag `#` for pretty-printing), `l` = a comma separated **l**ist (flag `#` for `\n` newline-separated), `q` = a string **q**uoted for the terminal (flag `#` to split a list into different arguments), `D` = add **D**ecimal suffixes (Eg: 10M) (flag `#` to use 1024 as factor), and `S` = **S**anitize as filename (flag `#` for restricted)
1. **Unicode normalization**: The format type `U` can be used for NFC [unicode normalization](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize). The alternate form flag (`#`) changes the normalization to NFD and the conversion flag `+` can be used for NFKC/NFKD compatibility equivalence normalization. Eg: `%(title)+.100U` is NFKC 1. **Unicode normalization**: The format type `U` can be used for NFC [unicode normalization](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize). The alternate form flag (`#`) changes the normalization to NFD and the conversion flag `+` can be used for NFKC/NFKD compatibility equivalence normalization. Eg: `%(title)+.100U` is NFKC

View File

@ -895,7 +895,7 @@ class TestUtil(unittest.TestCase):
'dynamic_range': 'HDR10', 'dynamic_range': 'HDR10',
}) })
self.assertEqual(parse_codecs('av01.0.12M.10.0.110.09.16.09.0'), { self.assertEqual(parse_codecs('av01.0.12M.10.0.110.09.16.09.0'), {
'vcodec': 'av01.0.12M.10.0.110.09.16.09.0', 'vcodec': 'av01.0.12M.10',
'acodec': 'none', 'acodec': 'none',
'dynamic_range': 'HDR10', 'dynamic_range': 'HDR10',
}) })

View File

@ -86,7 +86,6 @@ from .utils import (
YoutubeDLRedirectHandler, YoutubeDLRedirectHandler,
age_restricted, age_restricted,
args_to_str, args_to_str,
bug_reports_message,
date_from_str, date_from_str,
determine_ext, determine_ext,
determine_protocol, determine_protocol,
@ -319,14 +318,9 @@ class YoutubeDL:
default_search: Prepend this string if an input url is not valid. default_search: Prepend this string if an input url is not valid.
'auto' for elaborate guessing 'auto' for elaborate guessing
encoding: Use this encoding instead of the system-specified. encoding: Use this encoding instead of the system-specified.
extract_flat: Whether to resolve and process url_results further extract_flat: Do not resolve URLs, return the immediate result.
* False: Always process (default) Pass in 'in_playlist' to only show this behavior for
* True: Never process playlist items.
* 'in_playlist': Do not process inside playlist/multi_video
* 'discard': Always process, but don't return the result
from inside playlist/multi_video
* 'discard_in_playlist': Same as "discard", but only for
playlists (not multi_video)
wait_for_video: If given, wait for scheduled streams to become available. wait_for_video: If given, wait for scheduled streams to become available.
The value should be a tuple containing the range The value should be a tuple containing the range
(min_secs, max_secs) to wait between retries (min_secs, max_secs) to wait between retries
@ -1500,7 +1494,6 @@ class YoutubeDL:
def __extract_info(self, url, ie, download, extra_info, process): def __extract_info(self, url, ie, download, extra_info, process):
ie_result = ie.extract(url) ie_result = ie.extract(url)
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here) if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
return return
if isinstance(ie_result, list): if isinstance(ie_result, list):
# Backwards compatibility: old IE result format # Backwards compatibility: old IE result format
@ -1685,8 +1678,6 @@ class YoutubeDL:
def __process_playlist(self, ie_result, download): def __process_playlist(self, ie_result, download):
"""Process each entry in the playlist""" """Process each entry in the playlist"""
assert ie_result['_type'] in ('playlist', 'multi_video')
title = ie_result.get('title') or ie_result.get('id') or '<Untitled>' title = ie_result.get('title') or ie_result.get('id') or '<Untitled>'
self.to_screen(f'[download] Downloading playlist: {title}') self.to_screen(f'[download] Downloading playlist: {title}')
@ -1732,12 +1723,6 @@ class YoutubeDL:
self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} videos' self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} videos'
f'{format_field(ie_result, "playlist_count", " of %s")}') f'{format_field(ie_result, "playlist_count", " of %s")}')
keep_resolved_entries = self.params.get('extract_flat') != 'discard'
if self.params.get('extract_flat') == 'discard_in_playlist':
keep_resolved_entries = ie_result['_type'] != 'playlist'
if keep_resolved_entries:
self.write_debug('The information of all playlist entries will be held in memory')
failures = 0 failures = 0
max_failures = self.params.get('skip_playlist_after_errors') or float('inf') max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
for i, (playlist_index, entry) in enumerate(entries): for i, (playlist_index, entry) in enumerate(entries):
@ -1778,8 +1763,7 @@ class YoutubeDL:
self.report_error( self.report_error(
f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction') f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
break break
if keep_resolved_entries: resolved_entries[i] = (playlist_index, entry_result)
resolved_entries[i] = (playlist_index, entry_result)
# Update with processed data # Update with processed data
ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], []) ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
@ -3539,38 +3523,27 @@ class YoutubeDL:
] for f in formats if f.get('preference') is None or f['preference'] >= -1000] ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1) return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
def simplified_codec(f, field):
assert field in ('acodec', 'vcodec')
codec = f.get(field, 'unknown')
if not codec:
return 'unknown'
elif codec != 'none':
return '.'.join(codec.split('.')[:4])
if field == 'vcodec' and f.get('acodec') == 'none':
return 'images'
elif field == 'acodec' and f.get('vcodec') == 'none':
return ''
return self._format_out('audio only' if field == 'vcodec' else 'video only',
self.Styles.SUPPRESS)
delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True) delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
table = [ table = [
[ [
self._format_out(format_field(f, 'format_id'), self.Styles.ID), self._format_out(format_field(f, 'format_id'), self.Styles.ID),
format_field(f, 'ext'), format_field(f, 'ext'),
format_field(f, func=self.format_resolution, ignore=('audio only', 'images')), format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
format_field(f, 'fps', '\t%d', func=round), format_field(f, 'fps', '\t%d'),
format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''), format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
delim, delim,
format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes), format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
format_field(f, 'tbr', '\t%dk', func=round), format_field(f, 'tbr', '\t%dk'),
shorten_protocol_name(f.get('protocol', '')), shorten_protocol_name(f.get('protocol', '')),
delim, delim,
simplified_codec(f, 'vcodec'), format_field(f, 'vcodec', default='unknown').replace(
format_field(f, 'vbr', '\t%dk', func=round), 'none', 'images' if f.get('acodec') == 'none'
simplified_codec(f, 'acodec'), else self._format_out('audio only', self.Styles.SUPPRESS)),
format_field(f, 'abr', '\t%dk', func=round), format_field(f, 'vbr', '\t%dk'),
format_field(f, 'acodec', default='unknown').replace(
'none', '' if f.get('vcodec') == 'none'
else self._format_out('video only', self.Styles.SUPPRESS)),
format_field(f, 'abr', '\t%dk'),
format_field(f, 'asr', '\t%s', func=format_decimal_suffix), format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
join_nonempty( join_nonempty(
self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None, self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,

View File

@ -688,21 +688,6 @@ def parse_options(argv=None):
'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl' 'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl'
)) ))
playlist_pps = [pp for pp in postprocessors if pp.get('when') == 'playlist']
write_playlist_infojson = (opts.writeinfojson and not opts.clean_infojson
and opts.allow_playlist_files and opts.outtmpl.get('pl_infojson') != '')
if not any((
opts.extract_flat,
opts.dump_single_json,
opts.forceprint.get('playlist'),
opts.print_to_file.get('playlist'),
write_playlist_infojson,
)):
if not playlist_pps:
opts.extract_flat = 'discard'
elif playlist_pps == [{'key': 'FFmpegConcat', 'only_multi_video': True, 'when': 'playlist'}]:
opts.extract_flat = 'discard_in_playlist'
final_ext = ( final_ext = (
opts.recodevideo if opts.recodevideo in FFmpegVideoConvertorPP.SUPPORTED_EXTS opts.recodevideo if opts.recodevideo in FFmpegVideoConvertorPP.SUPPORTED_EXTS
else opts.remuxvideo if opts.remuxvideo in FFmpegVideoRemuxerPP.SUPPORTED_EXTS else opts.remuxvideo if opts.remuxvideo in FFmpegVideoRemuxerPP.SUPPORTED_EXTS

View File

@ -450,7 +450,8 @@ class FileDownloader:
raise NotImplementedError('This method must be implemented by subclasses') raise NotImplementedError('This method must be implemented by subclasses')
def _hook_progress(self, status, info_dict): def _hook_progress(self, status, info_dict):
# Ideally we want to make a copy of the dict, but that is too slow if not self._progress_hooks:
return
status['info_dict'] = info_dict status['info_dict'] = info_dict
# youtube-dl passes the same status object to all the hooks. # youtube-dl passes the same status object to all the hooks.
# Some third party scripts seems to be relying on this. # Some third party scripts seems to be relying on this.

View File

@ -206,12 +206,6 @@ class HttpFD(FileDownloader):
except RESPONSE_READ_EXCEPTIONS as err: except RESPONSE_READ_EXCEPTIONS as err:
raise RetryDownload(err) raise RetryDownload(err)
def close_stream():
if ctx.stream is not None:
if not ctx.tmpfilename == '-':
ctx.stream.close()
ctx.stream = None
def download(): def download():
data_len = ctx.data.info().get('Content-length', None) data_len = ctx.data.info().get('Content-length', None)
@ -245,9 +239,12 @@ class HttpFD(FileDownloader):
before = start # start measuring before = start # start measuring
def retry(e): def retry(e):
close_stream() to_stdout = ctx.tmpfilename == '-'
ctx.resume_len = (byte_counter if ctx.tmpfilename == '-' if ctx.stream is not None:
else os.path.getsize(encodeFilename(ctx.tmpfilename))) if not to_stdout:
ctx.stream.close()
ctx.stream = None
ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename))
raise RetryDownload(e) raise RetryDownload(e)
while True: while True:
@ -385,9 +382,6 @@ class HttpFD(FileDownloader):
continue continue
except SucceedDownload: except SucceedDownload:
return True return True
except: # noqa: E722
close_stream()
raise
self.report_error('giving up after %s retries' % retries) self.report_error('giving up after %s retries' % retries)
return False return False

View File

@ -948,7 +948,6 @@ from .mlb import (
) )
from .mlssoccer import MLSSoccerIE from .mlssoccer import MLSSoccerIE
from .mnet import MnetIE from .mnet import MnetIE
from .mocha import MochaVideoIE
from .moevideo import MoeVideoIE from .moevideo import MoeVideoIE
from .mofosex import ( from .mofosex import (
MofosexIE, MofosexIE,
@ -1671,7 +1670,6 @@ from .svt import (
SVTSeriesIE, SVTSeriesIE,
) )
from .swrmediathek import SWRMediathekIE from .swrmediathek import SWRMediathekIE
from .syvdk import SYVDKIE
from .syfy import SyfyIE from .syfy import SyfyIE
from .sztvhu import SztvHuIE from .sztvhu import SztvHuIE
from .tagesschau import TagesschauIE from .tagesschau import TagesschauIE

View File

@ -113,7 +113,7 @@ class CrunchyrollBaseIE(InfoExtractor):
class CrunchyrollIE(CrunchyrollBaseIE, VRVBaseIE): class CrunchyrollIE(CrunchyrollBaseIE, VRVBaseIE):
IE_NAME = 'crunchyroll' IE_NAME = 'crunchyroll'
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?!series/|watch/)(?:[^/]+/){1,2}[^/?&]*?)(?P<id>[0-9]+))(?:[/?&]|$)' _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P<id>[0-9]+))(?:[/?&]|$)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
'info_dict': { 'info_dict': {

View File

@ -1,66 +0,0 @@
from .common import InfoExtractor
from ..utils import int_or_none, traverse_obj
class MochaVideoIE(InfoExtractor):
_VALID_URL = r'https?://video.mocha.com.vn/(?P<video_slug>[\w-]+)'
_TESTS = [{
'url': 'http://video.mocha.com.vn/chuyen-meo-gia-su-tu-thong-diep-cuoc-song-v18694039',
'info_dict': {
'id': '18694039',
'title': 'Chuyện mèo giả sư tử | Thông điệp cuộc sống',
'ext': 'mp4',
'view_count': int,
'like_count': int,
'dislike_count': int,
'display_id': 'chuyen-meo-gia-su-tu-thong-diep-cuoc-song',
'thumbnail': 'http://mcvideomd1fr.keeng.net/playnow/images/20220505/ad0a055d-2f69-42ca-b888-4790041fe6bc_640x480.jpg',
'description': '',
'duration': 70,
'timestamp': 1652254203,
'upload_date': '20220511',
'comment_count': int,
'categories': ['Kids']
}
}]
def _real_extract(self, url):
video_slug = self._match_valid_url(url).group('video_slug')
json_data = self._download_json(
'http://apivideo.mocha.com.vn:8081/onMediaBackendBiz/mochavideo/getVideoDetail',
video_slug, query={'url': url, 'token': ''})['data']['videoDetail']
video_id = str(json_data['id'])
video_urls = (json_data.get('list_resolution') or []) + [json_data.get('original_path')]
formats, subtitles = [], {}
for video in video_urls:
if isinstance(video, str):
formats.extend([{'url': video, 'ext': 'mp4'}])
else:
fmts, subs = self._extract_m3u8_formats_and_subtitles(
video.get('video_path'), video_id, ext='mp4')
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
self._sort_formats(formats)
return {
'id': video_id,
'display_id': json_data.get('slug') or video_slug,
'title': json_data.get('name'),
'formats': formats,
'subtitles': subtitles,
'description': json_data.get('description'),
'duration': json_data.get('durationS'),
'view_count': json_data.get('total_view'),
'like_count': json_data.get('total_like'),
'dislike_count': json_data.get('total_unlike'),
'thumbnail': json_data.get('image_path_thumb'),
'timestamp': int_or_none(json_data.get('publish_time'), scale=1000),
'is_live': json_data.get('isLive'),
'channel': traverse_obj(json_data, ('channels', '0', 'name')),
'channel_id': traverse_obj(json_data, ('channels', '0', 'id')),
'channel_follower_count': traverse_obj(json_data, ('channels', '0', 'numfollow')),
'categories': traverse_obj(json_data, ('categories', ..., 'categoryname')),
'comment_count': json_data.get('total_comment'),
}

View File

@ -1,33 +0,0 @@
from .common import InfoExtractor
from ..utils import traverse_obj
class SYVDKIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?24syv\.dk/episode/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://24syv.dk/episode/isabella-arendt-stiller-op-for-de-konservative-2',
'md5': '429ce5a423dd4b1e1d0bf3a569558089',
'info_dict': {
'id': '12215',
'display_id': 'isabella-arendt-stiller-op-for-de-konservative-2',
'ext': 'mp3',
'title': 'Isabella Arendt stiller op for De Konservative',
'description': 'md5:f5fa6a431813bf37284f3412ad7c6c06'
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
info_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['episodeDetails'][0]
return {
'id': str(info_data['id']),
'vcodec': 'none',
'ext': 'mp3',
'url': info_data['details']['enclosure'],
'display_id': video_id,
'title': traverse_obj(info_data, ('title', 'rendered')),
'description': traverse_obj(info_data, ('details', 'post_title')),
}

View File

@ -38,9 +38,8 @@ class ModifyChaptersPP(FFmpegPostProcessor):
if not cuts: if not cuts:
return [], info return [], info
original_duration, info['duration'] = info.get('duration'), info['chapters'][-1]['end_time'] if self._duration_mismatch(real_duration, info.get('duration'), 1):
if self._duration_mismatch(real_duration, original_duration, 1): if not self._duration_mismatch(real_duration, info['chapters'][-1]['end_time']):
if not self._duration_mismatch(real_duration, info['duration']):
self.to_screen(f'Skipping {self.pp_key()} since the video appears to be already cut') self.to_screen(f'Skipping {self.pp_key()} since the video appears to be already cut')
return [], info return [], info
if not info.get('__real_download'): if not info.get('__real_download'):

View File

@ -3419,23 +3419,24 @@ def parse_codecs(codecs_str):
str.strip, codecs_str.strip().strip(',').split(',')))) str.strip, codecs_str.strip().strip(',').split(','))))
vcodec, acodec, scodec, hdr = None, None, None, None vcodec, acodec, scodec, hdr = None, None, None, None
for full_codec in split_codecs: for full_codec in split_codecs:
parts = re.sub(r'0+(?=\d)', '', full_codec).split('.') parts = full_codec.split('.')
if parts[0] in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', codec = parts[0].replace('0', '')
'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'): if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
if vcodec: 'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
continue if not vcodec:
vcodec = full_codec vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1', 'hvc1') else full_codec
if parts[0] in ('dvh1', 'dvhe'): if codec in ('dvh1', 'dvhe'):
hdr = 'DV' hdr = 'DV'
elif parts[0] == 'av1' and traverse_obj(parts, 3) == '10': elif codec == 'av1' and len(parts) > 3 and parts[3] == '10':
hdr = 'HDR10' hdr = 'HDR10'
elif parts[:2] == ['vp9', '2']: elif full_codec.replace('0', '').startswith('vp9.2'):
hdr = 'HDR10' hdr = 'HDR10'
elif parts[0] in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', elif codec in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): if not acodec:
acodec = acodec or full_codec acodec = full_codec
elif parts[0] in ('stpp', 'wvtt'): elif codec in ('stpp', 'wvtt',):
scodec = scodec or full_codec if not scodec:
scodec = full_codec
else: else:
write_string(f'WARNING: Unknown codec {full_codec}\n') write_string(f'WARNING: Unknown codec {full_codec}\n')
if vcodec or acodec or scodec: if vcodec or acodec or scodec: