Compare commits

...

10 Commits

Author SHA1 Message Date
pukkandan
f5ea47488a
[cleanup] Minor fixes 2022-07-11 02:24:36 +05:30
pukkandan
134c913cca
Discard info_dict from memory if no longer needed
Closes #1399
2022-07-11 02:14:23 +05:30
pukkandan
56b5b832bf
[extractor/crunchyroll] Improve _VALID_URL
<http://www.crunchyroll.com/series/GR24PVM76/nichijou-my-ordinary-life>
should be handled by Generic

Closes #4322
2022-07-11 01:13:32 +05:30
pukkandan
cb794ee010
Do not allow extractors to return None 2022-07-11 01:13:31 +05:30
pukkandan
6d645b5577
[http] Ensure the file handle is always closed
Closes #4323
2022-07-11 01:13:29 +05:30
pukkandan
563e0bf82a
Fix rounding of integers in format table 2022-07-11 01:10:38 +05:30
pukkandan
d816f61fbf
[utils, cleanup] Refactor parse_codecs 2022-07-11 01:10:38 +05:30
pukkandan
4019bf0525
[ModifyChapters] Modify duration in infodict 2022-07-11 01:10:38 +05:30
HobbyistDev
65ea4cba29
[extractor/mocha] Add extractor (#4213)
Closes https://github.com/yt-dlp/yt-dlp/issues/3752
Authored by: HobbyistDev
2022-07-11 01:02:12 +05:30
Misael Aguayo
17a23f0930
[extractor/syvdk] Add extractor (#4250)
Closes https://github.com/yt-dlp/yt-dlp/issues/4077
Authored by: misaelaguayo
2022-07-11 00:52:30 +05:30
12 changed files with 193 additions and 45 deletions

View File

@ -1207,7 +1207,7 @@ The field names themselves (the part inside the parenthesis) can also have some
1. **Default**: A literal default value can be specified for when the field is empty using a `|` separator. This overrides `--output-na-template`. Eg: `%(uploader|Unknown)s` 1. **Default**: A literal default value can be specified for when the field is empty using a `|` separator. This overrides `--output-na-template`. Eg: `%(uploader|Unknown)s`
1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, yt-dlp additionally supports converting to `B` = **B**ytes, `j` = **j**son (flag `#` for pretty-printing), `l` = a comma separated **l**ist (flag `#` for `\n` newline-separated), `q` = a string **q**uoted for the terminal (flag `#` to split a list into different arguments), `D` = add **D**ecimal suffixes (Eg: 10M) (flag `#` to use 1024 as factor), and `S` = **S**anitize as filename (flag `#` for restricted) 1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, yt-dlp additionally supports converting to `B` = **B**ytes, `j` = **j**son (flag `#` for pretty-printing), `h` = HTML escaping, `l` = a comma separated **l**ist (flag `#` for `\n` newline-separated), `q` = a string **q**uoted for the terminal (flag `#` to split a list into different arguments), `D` = add **D**ecimal suffixes (Eg: 10M) (flag `#` to use 1024 as factor), and `S` = **S**anitize as filename (flag `#` for restricted)
1. **Unicode normalization**: The format type `U` can be used for NFC [unicode normalization](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize). The alternate form flag (`#`) changes the normalization to NFD and the conversion flag `+` can be used for NFKC/NFKD compatibility equivalence normalization. Eg: `%(title)+.100U` is NFKC 1. **Unicode normalization**: The format type `U` can be used for NFC [unicode normalization](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize). The alternate form flag (`#`) changes the normalization to NFD and the conversion flag `+` can be used for NFKC/NFKD compatibility equivalence normalization. Eg: `%(title)+.100U` is NFKC

View File

@ -895,7 +895,7 @@ class TestUtil(unittest.TestCase):
'dynamic_range': 'HDR10', 'dynamic_range': 'HDR10',
}) })
self.assertEqual(parse_codecs('av01.0.12M.10.0.110.09.16.09.0'), { self.assertEqual(parse_codecs('av01.0.12M.10.0.110.09.16.09.0'), {
'vcodec': 'av01.0.12M.10', 'vcodec': 'av01.0.12M.10.0.110.09.16.09.0',
'acodec': 'none', 'acodec': 'none',
'dynamic_range': 'HDR10', 'dynamic_range': 'HDR10',
}) })

View File

@ -86,6 +86,7 @@ from .utils import (
YoutubeDLRedirectHandler, YoutubeDLRedirectHandler,
age_restricted, age_restricted,
args_to_str, args_to_str,
bug_reports_message,
date_from_str, date_from_str,
determine_ext, determine_ext,
determine_protocol, determine_protocol,
@ -318,9 +319,14 @@ class YoutubeDL:
default_search: Prepend this string if an input url is not valid. default_search: Prepend this string if an input url is not valid.
'auto' for elaborate guessing 'auto' for elaborate guessing
encoding: Use this encoding instead of the system-specified. encoding: Use this encoding instead of the system-specified.
extract_flat: Do not resolve URLs, return the immediate result. extract_flat: Whether to resolve and process url_results further
Pass in 'in_playlist' to only show this behavior for * False: Always process (default)
playlist items. * True: Never process
* 'in_playlist': Do not process inside playlist/multi_video
* 'discard': Always process, but don't return the result
from inside playlist/multi_video
* 'discard_in_playlist': Same as "discard", but only for
playlists (not multi_video)
wait_for_video: If given, wait for scheduled streams to become available. wait_for_video: If given, wait for scheduled streams to become available.
The value should be a tuple containing the range The value should be a tuple containing the range
(min_secs, max_secs) to wait between retries (min_secs, max_secs) to wait between retries
@ -1494,6 +1500,7 @@ class YoutubeDL:
def __extract_info(self, url, ie, download, extra_info, process): def __extract_info(self, url, ie, download, extra_info, process):
ie_result = ie.extract(url) ie_result = ie.extract(url)
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here) if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
return return
if isinstance(ie_result, list): if isinstance(ie_result, list):
# Backwards compatibility: old IE result format # Backwards compatibility: old IE result format
@ -1678,6 +1685,8 @@ class YoutubeDL:
def __process_playlist(self, ie_result, download): def __process_playlist(self, ie_result, download):
"""Process each entry in the playlist""" """Process each entry in the playlist"""
assert ie_result['_type'] in ('playlist', 'multi_video')
title = ie_result.get('title') or ie_result.get('id') or '<Untitled>' title = ie_result.get('title') or ie_result.get('id') or '<Untitled>'
self.to_screen(f'[download] Downloading playlist: {title}') self.to_screen(f'[download] Downloading playlist: {title}')
@ -1723,6 +1732,12 @@ class YoutubeDL:
self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} videos' self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} videos'
f'{format_field(ie_result, "playlist_count", " of %s")}') f'{format_field(ie_result, "playlist_count", " of %s")}')
keep_resolved_entries = self.params.get('extract_flat') != 'discard'
if self.params.get('extract_flat') == 'discard_in_playlist':
keep_resolved_entries = ie_result['_type'] != 'playlist'
if keep_resolved_entries:
self.write_debug('The information of all playlist entries will be held in memory')
failures = 0 failures = 0
max_failures = self.params.get('skip_playlist_after_errors') or float('inf') max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
for i, (playlist_index, entry) in enumerate(entries): for i, (playlist_index, entry) in enumerate(entries):
@ -1763,6 +1778,7 @@ class YoutubeDL:
self.report_error( self.report_error(
f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction') f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
break break
if keep_resolved_entries:
resolved_entries[i] = (playlist_index, entry_result) resolved_entries[i] = (playlist_index, entry_result)
# Update with processed data # Update with processed data
@ -3523,27 +3539,38 @@ class YoutubeDL:
] for f in formats if f.get('preference') is None or f['preference'] >= -1000] ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1) return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
def simplified_codec(f, field):
assert field in ('acodec', 'vcodec')
codec = f.get(field, 'unknown')
if not codec:
return 'unknown'
elif codec != 'none':
return '.'.join(codec.split('.')[:4])
if field == 'vcodec' and f.get('acodec') == 'none':
return 'images'
elif field == 'acodec' and f.get('vcodec') == 'none':
return ''
return self._format_out('audio only' if field == 'vcodec' else 'video only',
self.Styles.SUPPRESS)
delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True) delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
table = [ table = [
[ [
self._format_out(format_field(f, 'format_id'), self.Styles.ID), self._format_out(format_field(f, 'format_id'), self.Styles.ID),
format_field(f, 'ext'), format_field(f, 'ext'),
format_field(f, func=self.format_resolution, ignore=('audio only', 'images')), format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
format_field(f, 'fps', '\t%d'), format_field(f, 'fps', '\t%d', func=round),
format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''), format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
delim, delim,
format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes), format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
format_field(f, 'tbr', '\t%dk'), format_field(f, 'tbr', '\t%dk', func=round),
shorten_protocol_name(f.get('protocol', '')), shorten_protocol_name(f.get('protocol', '')),
delim, delim,
format_field(f, 'vcodec', default='unknown').replace( simplified_codec(f, 'vcodec'),
'none', 'images' if f.get('acodec') == 'none' format_field(f, 'vbr', '\t%dk', func=round),
else self._format_out('audio only', self.Styles.SUPPRESS)), simplified_codec(f, 'acodec'),
format_field(f, 'vbr', '\t%dk'), format_field(f, 'abr', '\t%dk', func=round),
format_field(f, 'acodec', default='unknown').replace(
'none', '' if f.get('vcodec') == 'none'
else self._format_out('video only', self.Styles.SUPPRESS)),
format_field(f, 'abr', '\t%dk'),
format_field(f, 'asr', '\t%s', func=format_decimal_suffix), format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
join_nonempty( join_nonempty(
self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None, self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,

View File

@ -688,6 +688,21 @@ def parse_options(argv=None):
'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl' 'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl'
)) ))
playlist_pps = [pp for pp in postprocessors if pp.get('when') == 'playlist']
write_playlist_infojson = (opts.writeinfojson and not opts.clean_infojson
and opts.allow_playlist_files and opts.outtmpl.get('pl_infojson') != '')
if not any((
opts.extract_flat,
opts.dump_single_json,
opts.forceprint.get('playlist'),
opts.print_to_file.get('playlist'),
write_playlist_infojson,
)):
if not playlist_pps:
opts.extract_flat = 'discard'
elif playlist_pps == [{'key': 'FFmpegConcat', 'only_multi_video': True, 'when': 'playlist'}]:
opts.extract_flat = 'discard_in_playlist'
final_ext = ( final_ext = (
opts.recodevideo if opts.recodevideo in FFmpegVideoConvertorPP.SUPPORTED_EXTS opts.recodevideo if opts.recodevideo in FFmpegVideoConvertorPP.SUPPORTED_EXTS
else opts.remuxvideo if opts.remuxvideo in FFmpegVideoRemuxerPP.SUPPORTED_EXTS else opts.remuxvideo if opts.remuxvideo in FFmpegVideoRemuxerPP.SUPPORTED_EXTS

View File

@ -450,8 +450,7 @@ class FileDownloader:
raise NotImplementedError('This method must be implemented by subclasses') raise NotImplementedError('This method must be implemented by subclasses')
def _hook_progress(self, status, info_dict): def _hook_progress(self, status, info_dict):
if not self._progress_hooks: # Ideally we want to make a copy of the dict, but that is too slow
return
status['info_dict'] = info_dict status['info_dict'] = info_dict
# youtube-dl passes the same status object to all the hooks. # youtube-dl passes the same status object to all the hooks.
# Some third party scripts seems to be relying on this. # Some third party scripts seems to be relying on this.

View File

@ -206,6 +206,12 @@ class HttpFD(FileDownloader):
except RESPONSE_READ_EXCEPTIONS as err: except RESPONSE_READ_EXCEPTIONS as err:
raise RetryDownload(err) raise RetryDownload(err)
def close_stream():
if ctx.stream is not None:
if not ctx.tmpfilename == '-':
ctx.stream.close()
ctx.stream = None
def download(): def download():
data_len = ctx.data.info().get('Content-length', None) data_len = ctx.data.info().get('Content-length', None)
@ -239,12 +245,9 @@ class HttpFD(FileDownloader):
before = start # start measuring before = start # start measuring
def retry(e): def retry(e):
to_stdout = ctx.tmpfilename == '-' close_stream()
if ctx.stream is not None: ctx.resume_len = (byte_counter if ctx.tmpfilename == '-'
if not to_stdout: else os.path.getsize(encodeFilename(ctx.tmpfilename)))
ctx.stream.close()
ctx.stream = None
ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename))
raise RetryDownload(e) raise RetryDownload(e)
while True: while True:
@ -382,6 +385,9 @@ class HttpFD(FileDownloader):
continue continue
except SucceedDownload: except SucceedDownload:
return True return True
except: # noqa: E722
close_stream()
raise
self.report_error('giving up after %s retries' % retries) self.report_error('giving up after %s retries' % retries)
return False return False

View File

@ -948,6 +948,7 @@ from .mlb import (
) )
from .mlssoccer import MLSSoccerIE from .mlssoccer import MLSSoccerIE
from .mnet import MnetIE from .mnet import MnetIE
from .mocha import MochaVideoIE
from .moevideo import MoeVideoIE from .moevideo import MoeVideoIE
from .mofosex import ( from .mofosex import (
MofosexIE, MofosexIE,
@ -1670,6 +1671,7 @@ from .svt import (
SVTSeriesIE, SVTSeriesIE,
) )
from .swrmediathek import SWRMediathekIE from .swrmediathek import SWRMediathekIE
from .syvdk import SYVDKIE
from .syfy import SyfyIE from .syfy import SyfyIE
from .sztvhu import SztvHuIE from .sztvhu import SztvHuIE
from .tagesschau import TagesschauIE from .tagesschau import TagesschauIE

View File

@ -113,7 +113,7 @@ class CrunchyrollBaseIE(InfoExtractor):
class CrunchyrollIE(CrunchyrollBaseIE, VRVBaseIE): class CrunchyrollIE(CrunchyrollBaseIE, VRVBaseIE):
IE_NAME = 'crunchyroll' IE_NAME = 'crunchyroll'
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P<id>[0-9]+))(?:[/?&]|$)' _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?!series/|watch/)(?:[^/]+/){1,2}[^/?&]*?)(?P<id>[0-9]+))(?:[/?&]|$)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
'info_dict': { 'info_dict': {

66
yt_dlp/extractor/mocha.py Normal file
View File

@ -0,0 +1,66 @@
from .common import InfoExtractor
from ..utils import int_or_none, traverse_obj
class MochaVideoIE(InfoExtractor):
_VALID_URL = r'https?://video.mocha.com.vn/(?P<video_slug>[\w-]+)'
_TESTS = [{
'url': 'http://video.mocha.com.vn/chuyen-meo-gia-su-tu-thong-diep-cuoc-song-v18694039',
'info_dict': {
'id': '18694039',
'title': 'Chuyện mèo giả sư tử | Thông điệp cuộc sống',
'ext': 'mp4',
'view_count': int,
'like_count': int,
'dislike_count': int,
'display_id': 'chuyen-meo-gia-su-tu-thong-diep-cuoc-song',
'thumbnail': 'http://mcvideomd1fr.keeng.net/playnow/images/20220505/ad0a055d-2f69-42ca-b888-4790041fe6bc_640x480.jpg',
'description': '',
'duration': 70,
'timestamp': 1652254203,
'upload_date': '20220511',
'comment_count': int,
'categories': ['Kids']
}
}]
def _real_extract(self, url):
video_slug = self._match_valid_url(url).group('video_slug')
json_data = self._download_json(
'http://apivideo.mocha.com.vn:8081/onMediaBackendBiz/mochavideo/getVideoDetail',
video_slug, query={'url': url, 'token': ''})['data']['videoDetail']
video_id = str(json_data['id'])
video_urls = (json_data.get('list_resolution') or []) + [json_data.get('original_path')]
formats, subtitles = [], {}
for video in video_urls:
if isinstance(video, str):
formats.extend([{'url': video, 'ext': 'mp4'}])
else:
fmts, subs = self._extract_m3u8_formats_and_subtitles(
video.get('video_path'), video_id, ext='mp4')
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
self._sort_formats(formats)
return {
'id': video_id,
'display_id': json_data.get('slug') or video_slug,
'title': json_data.get('name'),
'formats': formats,
'subtitles': subtitles,
'description': json_data.get('description'),
'duration': json_data.get('durationS'),
'view_count': json_data.get('total_view'),
'like_count': json_data.get('total_like'),
'dislike_count': json_data.get('total_unlike'),
'thumbnail': json_data.get('image_path_thumb'),
'timestamp': int_or_none(json_data.get('publish_time'), scale=1000),
'is_live': json_data.get('isLive'),
'channel': traverse_obj(json_data, ('channels', '0', 'name')),
'channel_id': traverse_obj(json_data, ('channels', '0', 'id')),
'channel_follower_count': traverse_obj(json_data, ('channels', '0', 'numfollow')),
'categories': traverse_obj(json_data, ('categories', ..., 'categoryname')),
'comment_count': json_data.get('total_comment'),
}

33
yt_dlp/extractor/syvdk.py Normal file
View File

@ -0,0 +1,33 @@
from .common import InfoExtractor
from ..utils import traverse_obj
class SYVDKIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?24syv\.dk/episode/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://24syv.dk/episode/isabella-arendt-stiller-op-for-de-konservative-2',
'md5': '429ce5a423dd4b1e1d0bf3a569558089',
'info_dict': {
'id': '12215',
'display_id': 'isabella-arendt-stiller-op-for-de-konservative-2',
'ext': 'mp3',
'title': 'Isabella Arendt stiller op for De Konservative',
'description': 'md5:f5fa6a431813bf37284f3412ad7c6c06'
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
info_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['episodeDetails'][0]
return {
'id': str(info_data['id']),
'vcodec': 'none',
'ext': 'mp3',
'url': info_data['details']['enclosure'],
'display_id': video_id,
'title': traverse_obj(info_data, ('title', 'rendered')),
'description': traverse_obj(info_data, ('details', 'post_title')),
}

View File

@ -38,8 +38,9 @@ class ModifyChaptersPP(FFmpegPostProcessor):
if not cuts: if not cuts:
return [], info return [], info
if self._duration_mismatch(real_duration, info.get('duration'), 1): original_duration, info['duration'] = info.get('duration'), info['chapters'][-1]['end_time']
if not self._duration_mismatch(real_duration, info['chapters'][-1]['end_time']): if self._duration_mismatch(real_duration, original_duration, 1):
if not self._duration_mismatch(real_duration, info['duration']):
self.to_screen(f'Skipping {self.pp_key()} since the video appears to be already cut') self.to_screen(f'Skipping {self.pp_key()} since the video appears to be already cut')
return [], info return [], info
if not info.get('__real_download'): if not info.get('__real_download'):

View File

@ -3419,24 +3419,23 @@ def parse_codecs(codecs_str):
str.strip, codecs_str.strip().strip(',').split(',')))) str.strip, codecs_str.strip().strip(',').split(','))))
vcodec, acodec, scodec, hdr = None, None, None, None vcodec, acodec, scodec, hdr = None, None, None, None
for full_codec in split_codecs: for full_codec in split_codecs:
parts = full_codec.split('.') parts = re.sub(r'0+(?=\d)', '', full_codec).split('.')
codec = parts[0].replace('0', '') if parts[0] in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'): 'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
if not vcodec: if vcodec:
vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1', 'hvc1') else full_codec continue
if codec in ('dvh1', 'dvhe'): vcodec = full_codec
if parts[0] in ('dvh1', 'dvhe'):
hdr = 'DV' hdr = 'DV'
elif codec == 'av1' and len(parts) > 3 and parts[3] == '10': elif parts[0] == 'av1' and traverse_obj(parts, 3) == '10':
hdr = 'HDR10' hdr = 'HDR10'
elif full_codec.replace('0', '').startswith('vp9.2'): elif parts[:2] == ['vp9', '2']:
hdr = 'HDR10' hdr = 'HDR10'
elif codec in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): elif parts[0] in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac',
if not acodec: 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
acodec = full_codec acodec = acodec or full_codec
elif codec in ('stpp', 'wvtt',): elif parts[0] in ('stpp', 'wvtt'):
if not scodec: scodec = scodec or full_codec
scodec = full_codec
else: else:
write_string(f'WARNING: Unknown codec {full_codec}\n') write_string(f'WARNING: Unknown codec {full_codec}\n')
if vcodec or acodec or scodec: if vcodec or acodec or scodec: