Compare commits

...

8 Commits

Author SHA1 Message Date
Bricio
5625e6073f
[Biqle] Fix extractor (#2731)
Closes #193
Authored by: Bricio
2022-02-18 08:02:14 -08:00
pukkandan
0ad92dfb18
[youtube] De-prioritize potentially damaged formats
Closes #2823
2022-02-18 19:41:37 +05:30
pukkandan
60f3e99592
Tolerate failure to --write-link due to unknown URL
Closes #2724
2022-02-18 18:14:50 +05:30
pukkandan
8d93e69d67
Create necessary directories for --print-to-file
Closes #2721
2022-02-18 18:03:21 +05:30
pukkandan
3aa915400d
Fix -all for --sub-langs
Closes #2703
2022-02-18 18:03:20 +05:30
pukkandan
dcd55f766d
[aria2c] Add --http-accept-gzip=true
Closes #1936, #1236
2022-02-18 18:03:20 +05:30
pukkandan
2e4cacd038
[youtube] Fix intermittent failure of embed-based age-gate bypass 2022-02-18 18:03:13 +05:30
Ronnnny
c15c316b21
[abc] Support 1080p (#2819)
Authored by: Ronnnny
2022-02-18 00:25:47 -08:00
5 changed files with 85 additions and 66 deletions

View File

@ -2663,12 +2663,15 @@ class YoutubeDL(object):
# given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
requested_langs = [] requested_langs = []
for lang_re in self.params.get('subtitleslangs'): for lang_re in self.params.get('subtitleslangs'):
if lang_re == 'all':
requested_langs.extend(all_sub_langs)
continue
discard = lang_re[0] == '-' discard = lang_re[0] == '-'
if discard: if discard:
lang_re = lang_re[1:] lang_re = lang_re[1:]
if lang_re == 'all':
if discard:
requested_langs = []
else:
requested_langs.extend(all_sub_langs)
continue
current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs) current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
if discard: if discard:
for lang in current_langs: for lang in current_langs:
@ -2732,6 +2735,7 @@ class YoutubeDL(object):
filename = self.evaluate_outtmpl(file_tmpl, info_dict) filename = self.evaluate_outtmpl(file_tmpl, info_dict)
tmpl = format_tmpl(tmpl) tmpl = format_tmpl(tmpl)
self.to_screen(f'[info] Writing {tmpl!r} to: {filename}') self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
if self._ensure_dir_exists(filename):
with io.open(filename, 'a', encoding='utf-8') as f: with io.open(filename, 'a', encoding='utf-8') as f:
f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n') f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
@ -2904,9 +2908,11 @@ class YoutubeDL(object):
# Write internet shortcut files # Write internet shortcut files
def _write_link_file(link_type): def _write_link_file(link_type):
if 'webpage_url' not in info_dict: url = try_get(info_dict['webpage_url'], iri_to_uri)
self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information') if not url:
return False self.report_warning(
f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
return True
linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext')) linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
if not self._ensure_dir_exists(encodeFilename(linkfn)): if not self._ensure_dir_exists(encodeFilename(linkfn)):
return False return False
@ -2917,7 +2923,7 @@ class YoutubeDL(object):
self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}') self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
newline='\r\n' if link_type == 'url' else '\n') as linkfile: newline='\r\n' if link_type == 'url' else '\n') as linkfile:
template_vars = {'url': iri_to_uri(info_dict['webpage_url'])} template_vars = {'url': url}
if link_type == 'desktop': if link_type == 'desktop':
template_vars['filename'] = linkfn[:-(len(link_type) + 1)] template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
linkfile.write(LINK_TEMPLATES[link_type] % template_vars) linkfile.write(LINK_TEMPLATES[link_type] % template_vars)

View File

@ -253,7 +253,7 @@ class Aria2cFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-c', cmd = [self.exe, '-c',
'--console-log-level=warn', '--summary-interval=0', '--download-result=hide', '--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
'--file-allocation=none', '-x16', '-j16', '-s16'] '--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16']
if 'fragments' in info_dict: if 'fragments' in info_dict:
cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true'] cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true']
else: else:

View File

@ -213,7 +213,7 @@ class ABCIViewIE(InfoExtractor):
'hdnea': token, 'hdnea': token,
}) })
for sd in ('720', 'sd', 'sd-low'): for sd in ('1080', '720', 'sd', 'sd-low'):
sd_url = try_get( sd_url = try_get(
stream, lambda x: x['streams']['hls'][sd], compat_str) stream, lambda x: x['streams']['hls'][sd], compat_str)
if not sd_url: if not sd_url:

View File

@ -3,27 +3,28 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from .vk import VKIE from .vk import VKIE
from ..compat import ( from ..compat import compat_b64decode
compat_b64decode, from ..utils import (
compat_urllib_parse_unquote, int_or_none,
js_to_json,
traverse_obj,
unified_timestamp,
) )
from ..utils import int_or_none
class BIQLEIE(InfoExtractor): class BIQLEIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P<id>-?\d+_\d+)' _VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P<id>-?\d+_\d+)'
_TESTS = [{ _TESTS = [{
# Youtube embed 'url': 'https://biqle.ru/watch/-2000421746_85421746',
'url': 'https://biqle.ru/watch/-115995369_456239081', 'md5': 'ae6ef4f04d19ac84e4658046d02c151c',
'md5': '97af5a06ee4c29bbf9c001bdb1cf5c06',
'info_dict': { 'info_dict': {
'id': '8v4f-avW-VI', 'id': '-2000421746_85421746',
'ext': 'mp4', 'ext': 'mp4',
'title': "PASSE-PARTOUT - L'ete c'est fait pour jouer", 'title': 'Forsaken By Hope Studio Clip',
'description': 'Passe-Partout', 'description': 'Forsaken By Hope Studio Clip — Смотреть онлайн',
'uploader_id': 'mrsimpsonstef3', 'upload_date': '19700101',
'uploader': 'Phanolito', 'thumbnail': r're:https://[^/]+/impf/7vN3ACwSTgChP96OdOfzFjUCzFR6ZglDQgWsIw/KPaACiVJJxM\.jpg\?size=800x450&quality=96&keep_aspect_ratio=1&background=000000&sign=b48ea459c4d33dbcba5e26d63574b1cb&type=video_thumb',
'upload_date': '20120822', 'timestamp': 0,
}, },
}, { }, {
'url': 'http://biqle.org/watch/-44781847_168547604', 'url': 'http://biqle.org/watch/-44781847_168547604',
@ -32,50 +33,59 @@ class BIQLEIE(InfoExtractor):
'id': '-44781847_168547604', 'id': '-44781847_168547604',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Ребенок в шоке от автоматической мойки', 'title': 'Ребенок в шоке от автоматической мойки',
'description': 'Ребенок в шоке от автоматической мойки — Смотреть онлайн',
'timestamp': 1396633454, 'timestamp': 1396633454,
'uploader': 'Dmitry Kotov',
'upload_date': '20140404', 'upload_date': '20140404',
'uploader_id': '47850140', 'thumbnail': r're:https://[^/]+/c535507/u190034692/video/l_b84df002\.jpg',
}, },
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
embed_url = self._proto_relative_url(self._search_regex(
r'<iframe.+?src="((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^"]+)".*?></iframe>', title = self._html_search_meta('name', webpage, 'Title', fatal=False)
webpage, 'embed url')) timestamp = unified_timestamp(self._html_search_meta('uploadDate', webpage, 'Upload Date', default=None))
description = self._html_search_meta('description', webpage, 'Description', default=None)
global_embed_url = self._search_regex(
r'<script[^<]+?window.globEmbedUrl\s*=\s*\'((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^\']+)\'',
webpage, 'global Embed url')
hash = self._search_regex(
r'<script id="data-embed-video[^<]+?hash: "([^"]+)"[^<]*</script>', webpage, 'Hash')
embed_url = global_embed_url + hash
if VKIE.suitable(embed_url): if VKIE.suitable(embed_url):
return self.url_result(embed_url, VKIE.ie_key(), video_id) return self.url_result(embed_url, VKIE.ie_key(), video_id)
embed_page = self._download_webpage( embed_page = self._download_webpage(
embed_url, video_id, headers={'Referer': url}) embed_url, video_id, 'Downloading embed webpage', headers={'Referer': url})
video_ext = self._get_cookies(embed_url).get('video_ext')
if video_ext: glob_params = self._parse_json(self._search_regex(
video_ext = compat_urllib_parse_unquote(video_ext.value) r'<script id="globParams">[^<]*window.globParams = ([^;]+);[^<]+</script>',
if not video_ext: embed_page, 'Global Parameters'), video_id, transform_source=js_to_json)
video_ext = compat_b64decode(self._search_regex( host_name = compat_b64decode(glob_params['server'][::-1]).decode()
r'video_ext\s*:\s*[\'"]([A-Za-z0-9+/=]+)',
embed_page, 'video_ext')).decode()
video_id, sig, _, access_token = video_ext.split(':')
item = self._download_json( item = self._download_json(
'https://api.vk.com/method/video.get', video_id, f'https://{host_name}/method/video.get/{video_id}', video_id,
headers={'User-Agent': 'okhttp/3.4.1'}, query={ headers={'Referer': url}, query={
'access_token': access_token, 'token': glob_params['video']['access_token'],
'sig': sig,
'v': 5.44,
'videos': video_id, 'videos': video_id,
'ckey': glob_params['c_key'],
'credentials': glob_params['video']['credentials'],
})['response']['items'][0] })['response']['items'][0]
title = item['title']
formats = [] formats = []
for f_id, f_url in item.get('files', {}).items(): for f_id, f_url in item.get('files', {}).items():
if f_id == 'external': if f_id == 'external':
return self.url_result(f_url) return self.url_result(f_url)
ext, height = f_id.split('_') ext, height = f_id.split('_')
height_extra_key = traverse_obj(glob_params, ('video', 'partial', 'quality', height))
if height_extra_key:
formats.append({ formats.append({
'format_id': height + 'p', 'format_id': f'{height}p',
'url': f_url, 'url': f'https://{host_name}/{f_url[8:]}&videos={video_id}&extra_key={height_extra_key}',
'height': int_or_none(height), 'height': int_or_none(height),
'ext': ext, 'ext': ext,
}) })
@ -96,10 +106,9 @@ class BIQLEIE(InfoExtractor):
'title': title, 'title': title,
'formats': formats, 'formats': formats,
'comment_count': int_or_none(item.get('comments')), 'comment_count': int_or_none(item.get('comments')),
'description': item.get('description'), 'description': description,
'duration': int_or_none(item.get('duration')), 'duration': int_or_none(item.get('duration')),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'timestamp': int_or_none(item.get('date')), 'timestamp': timestamp,
'uploader': item.get('owner_id'),
'view_count': int_or_none(item.get('views')), 'view_count': int_or_none(item.get('views')),
} }

View File

@ -225,28 +225,28 @@ INNERTUBE_CLIENTS = {
def build_innertube_clients(): def build_innertube_clients():
third_party = { THIRD_PARTY = {
'embedUrl': 'https://google.com', # Can be any valid URL 'embedUrl': 'https://google.com', # Can be any valid URL
} }
base_clients = ('android', 'web', 'ios', 'mweb') BASE_CLIENTS = ('android', 'web', 'ios', 'mweb')
priority = qualities(base_clients[::-1]) priority = qualities(BASE_CLIENTS[::-1])
for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()): for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8') ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com') ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
ytcfg.setdefault('REQUIRE_JS_PLAYER', True) ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en') ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
if client in base_clients: base_client, *variant = client.split('_')
INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg) ytcfg['priority'] = 10 * priority(base_client)
if variant == ['embedded']:
ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
INNERTUBE_CLIENTS[f'{base_client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED' agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
agegate_ytcfg['priority'] -= 1 agegate_ytcfg['priority'] -= 1
elif client.endswith('_embedded'):
ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
ytcfg['priority'] -= 2 ytcfg['priority'] -= 2
else: elif variant:
ytcfg['priority'] -= 3 ytcfg['priority'] -= 3
@ -2936,6 +2936,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres' 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
]) ])
streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[]) streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
approx_duration = max(traverse_obj(streaming_formats, (..., 'approxDurationMs'), expected_type=float_or_none) or [0]) or None
for fmt in streaming_formats: for fmt in streaming_formats:
if fmt.get('targetDurationSec') or fmt.get('drmFamilies'): if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
@ -2995,12 +2996,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
itags[itag] = 'https' itags[itag] = 'https'
stream_ids.append(stream_id) stream_ids.append(stream_id)
tbr = float_or_none( tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
language_preference = ( language_preference = (
10 if audio_track.get('audioIsDefault') and 10 10 if audio_track.get('audioIsDefault') and 10
else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
else -1) else -1)
# Some formats may have much smaller duration than others (possibly damaged during encoding)
# Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) < approx_duration - 10000)
dct = { dct = {
'asr': int_or_none(fmt.get('audioSampleRate')), 'asr': int_or_none(fmt.get('audioSampleRate')),
'filesize': int_or_none(fmt.get('contentLength')), 'filesize': int_or_none(fmt.get('contentLength')),
@ -3009,7 +3012,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'%s%s' % (audio_track.get('displayName') or '', '%s%s' % (audio_track.get('displayName') or '',
' (default)' if language_preference > 0 else ''), ' (default)' if language_preference > 0 else ''),
fmt.get('qualityLabel') or quality.replace('audio_quality_', ''), fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
throttled and 'THROTTLED', delim=', '), throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
'source_preference': -10 if throttled else -1, 'source_preference': -10 if throttled else -1,
'fps': int_or_none(fmt.get('fps')) or None, 'fps': int_or_none(fmt.get('fps')) or None,
'height': height, 'height': height,
@ -3020,6 +3023,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'language': join_nonempty(audio_track.get('id', '').split('.')[0], 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
'desc' if language_preference < -1 else ''), 'desc' if language_preference < -1 else ''),
'language_preference': language_preference, 'language_preference': language_preference,
'preference': -10 if is_damaged else None,
} }
mime_mobj = re.match( mime_mobj = re.match(
r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '') r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')