Compare commits

...

8 Commits

Author SHA1 Message Date
Bricio
5625e6073f
[Biqle] Fix extractor (#2731)
Closes #193
Authored by: Bricio
2022-02-18 08:02:14 -08:00
pukkandan
0ad92dfb18
[youtube] De-prioritize potentially damaged formats
Closes #2823
2022-02-18 19:41:37 +05:30
pukkandan
60f3e99592
Tolerate failure to --write-link due to unknown URL
Closes #2724
2022-02-18 18:14:50 +05:30
pukkandan
8d93e69d67
Create necessary directories for --print-to-file
Closes #2721
2022-02-18 18:03:21 +05:30
pukkandan
3aa915400d
Fix -all for --sub-langs
Closes #2703
2022-02-18 18:03:20 +05:30
pukkandan
dcd55f766d
[aria2c] Add --http-accept-gzip=true
Closes #1936, #1236
2022-02-18 18:03:20 +05:30
pukkandan
2e4cacd038
[youtube] Fix intermittent failure of embed-based age-gate bypass 2022-02-18 18:03:13 +05:30
Ronnnny
c15c316b21
[abc] Support 1080p (#2819)
Authored by: Ronnnny
2022-02-18 00:25:47 -08:00
5 changed files with 85 additions and 66 deletions

View File

@ -2663,12 +2663,15 @@ class YoutubeDL(object):
# given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
requested_langs = []
for lang_re in self.params.get('subtitleslangs'):
if lang_re == 'all':
requested_langs.extend(all_sub_langs)
continue
discard = lang_re[0] == '-'
if discard:
lang_re = lang_re[1:]
if lang_re == 'all':
if discard:
requested_langs = []
else:
requested_langs.extend(all_sub_langs)
continue
current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
if discard:
for lang in current_langs:
@ -2732,6 +2735,7 @@ class YoutubeDL(object):
filename = self.evaluate_outtmpl(file_tmpl, info_dict)
tmpl = format_tmpl(tmpl)
self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
if self._ensure_dir_exists(filename):
with io.open(filename, 'a', encoding='utf-8') as f:
f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
@ -2904,9 +2908,11 @@ class YoutubeDL(object):
# Write internet shortcut files
def _write_link_file(link_type):
if 'webpage_url' not in info_dict:
self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
return False
url = try_get(info_dict['webpage_url'], iri_to_uri)
if not url:
self.report_warning(
f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
return True
linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
if not self._ensure_dir_exists(encodeFilename(linkfn)):
return False
@ -2917,7 +2923,7 @@ class YoutubeDL(object):
self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
newline='\r\n' if link_type == 'url' else '\n') as linkfile:
template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
template_vars = {'url': url}
if link_type == 'desktop':
template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
linkfile.write(LINK_TEMPLATES[link_type] % template_vars)

View File

@ -253,7 +253,7 @@ class Aria2cFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-c',
'--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
'--file-allocation=none', '-x16', '-j16', '-s16']
'--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16']
if 'fragments' in info_dict:
cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true']
else:

View File

@ -213,7 +213,7 @@ class ABCIViewIE(InfoExtractor):
'hdnea': token,
})
for sd in ('720', 'sd', 'sd-low'):
for sd in ('1080', '720', 'sd', 'sd-low'):
sd_url = try_get(
stream, lambda x: x['streams']['hls'][sd], compat_str)
if not sd_url:

View File

@ -3,27 +3,28 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from .vk import VKIE
from ..compat import (
compat_b64decode,
compat_urllib_parse_unquote,
from ..compat import compat_b64decode
from ..utils import (
int_or_none,
js_to_json,
traverse_obj,
unified_timestamp,
)
from ..utils import int_or_none
class BIQLEIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P<id>-?\d+_\d+)'
_TESTS = [{
# Youtube embed
'url': 'https://biqle.ru/watch/-115995369_456239081',
'md5': '97af5a06ee4c29bbf9c001bdb1cf5c06',
'url': 'https://biqle.ru/watch/-2000421746_85421746',
'md5': 'ae6ef4f04d19ac84e4658046d02c151c',
'info_dict': {
'id': '8v4f-avW-VI',
'id': '-2000421746_85421746',
'ext': 'mp4',
'title': "PASSE-PARTOUT - L'ete c'est fait pour jouer",
'description': 'Passe-Partout',
'uploader_id': 'mrsimpsonstef3',
'uploader': 'Phanolito',
'upload_date': '20120822',
'title': 'Forsaken By Hope Studio Clip',
'description': 'Forsaken By Hope Studio Clip — Смотреть онлайн',
'upload_date': '19700101',
'thumbnail': r're:https://[^/]+/impf/7vN3ACwSTgChP96OdOfzFjUCzFR6ZglDQgWsIw/KPaACiVJJxM\.jpg\?size=800x450&quality=96&keep_aspect_ratio=1&background=000000&sign=b48ea459c4d33dbcba5e26d63574b1cb&type=video_thumb',
'timestamp': 0,
},
}, {
'url': 'http://biqle.org/watch/-44781847_168547604',
@ -32,50 +33,59 @@ class BIQLEIE(InfoExtractor):
'id': '-44781847_168547604',
'ext': 'mp4',
'title': 'Ребенок в шоке от автоматической мойки',
'description': 'Ребенок в шоке от автоматической мойки — Смотреть онлайн',
'timestamp': 1396633454,
'uploader': 'Dmitry Kotov',
'upload_date': '20140404',
'uploader_id': '47850140',
'thumbnail': r're:https://[^/]+/c535507/u190034692/video/l_b84df002\.jpg',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
embed_url = self._proto_relative_url(self._search_regex(
r'<iframe.+?src="((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^"]+)".*?></iframe>',
webpage, 'embed url'))
title = self._html_search_meta('name', webpage, 'Title', fatal=False)
timestamp = unified_timestamp(self._html_search_meta('uploadDate', webpage, 'Upload Date', default=None))
description = self._html_search_meta('description', webpage, 'Description', default=None)
global_embed_url = self._search_regex(
r'<script[^<]+?window.globEmbedUrl\s*=\s*\'((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^\']+)\'',
webpage, 'global Embed url')
hash = self._search_regex(
r'<script id="data-embed-video[^<]+?hash: "([^"]+)"[^<]*</script>', webpage, 'Hash')
embed_url = global_embed_url + hash
if VKIE.suitable(embed_url):
return self.url_result(embed_url, VKIE.ie_key(), video_id)
embed_page = self._download_webpage(
embed_url, video_id, headers={'Referer': url})
video_ext = self._get_cookies(embed_url).get('video_ext')
if video_ext:
video_ext = compat_urllib_parse_unquote(video_ext.value)
if not video_ext:
video_ext = compat_b64decode(self._search_regex(
r'video_ext\s*:\s*[\'"]([A-Za-z0-9+/=]+)',
embed_page, 'video_ext')).decode()
video_id, sig, _, access_token = video_ext.split(':')
embed_url, video_id, 'Downloading embed webpage', headers={'Referer': url})
glob_params = self._parse_json(self._search_regex(
r'<script id="globParams">[^<]*window.globParams = ([^;]+);[^<]+</script>',
embed_page, 'Global Parameters'), video_id, transform_source=js_to_json)
host_name = compat_b64decode(glob_params['server'][::-1]).decode()
item = self._download_json(
'https://api.vk.com/method/video.get', video_id,
headers={'User-Agent': 'okhttp/3.4.1'}, query={
'access_token': access_token,
'sig': sig,
'v': 5.44,
f'https://{host_name}/method/video.get/{video_id}', video_id,
headers={'Referer': url}, query={
'token': glob_params['video']['access_token'],
'videos': video_id,
'ckey': glob_params['c_key'],
'credentials': glob_params['video']['credentials'],
})['response']['items'][0]
title = item['title']
formats = []
for f_id, f_url in item.get('files', {}).items():
if f_id == 'external':
return self.url_result(f_url)
ext, height = f_id.split('_')
height_extra_key = traverse_obj(glob_params, ('video', 'partial', 'quality', height))
if height_extra_key:
formats.append({
'format_id': height + 'p',
'url': f_url,
'format_id': f'{height}p',
'url': f'https://{host_name}/{f_url[8:]}&videos={video_id}&extra_key={height_extra_key}',
'height': int_or_none(height),
'ext': ext,
})
@ -96,10 +106,9 @@ class BIQLEIE(InfoExtractor):
'title': title,
'formats': formats,
'comment_count': int_or_none(item.get('comments')),
'description': item.get('description'),
'description': description,
'duration': int_or_none(item.get('duration')),
'thumbnails': thumbnails,
'timestamp': int_or_none(item.get('date')),
'uploader': item.get('owner_id'),
'timestamp': timestamp,
'view_count': int_or_none(item.get('views')),
}

View File

@ -225,28 +225,28 @@ INNERTUBE_CLIENTS = {
def build_innertube_clients():
third_party = {
THIRD_PARTY = {
'embedUrl': 'https://google.com', # Can be any valid URL
}
base_clients = ('android', 'web', 'ios', 'mweb')
priority = qualities(base_clients[::-1])
BASE_CLIENTS = ('android', 'web', 'ios', 'mweb')
priority = qualities(BASE_CLIENTS[::-1])
for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
if client in base_clients:
INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
base_client, *variant = client.split('_')
ytcfg['priority'] = 10 * priority(base_client)
if variant == ['embedded']:
ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
INNERTUBE_CLIENTS[f'{base_client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
agegate_ytcfg['priority'] -= 1
elif client.endswith('_embedded'):
ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
ytcfg['priority'] -= 2
else:
elif variant:
ytcfg['priority'] -= 3
@ -2936,6 +2936,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
])
streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
approx_duration = max(traverse_obj(streaming_formats, (..., 'approxDurationMs'), expected_type=float_or_none) or [0]) or None
for fmt in streaming_formats:
if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
@ -2995,12 +2996,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
itags[itag] = 'https'
stream_ids.append(stream_id)
tbr = float_or_none(
fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
language_preference = (
10 if audio_track.get('audioIsDefault') and 10
else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
else -1)
# Some formats may have much smaller duration than others (possibly damaged during encoding)
# Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) < approx_duration - 10000)
dct = {
'asr': int_or_none(fmt.get('audioSampleRate')),
'filesize': int_or_none(fmt.get('contentLength')),
@ -3009,7 +3012,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'%s%s' % (audio_track.get('displayName') or '',
' (default)' if language_preference > 0 else ''),
fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
throttled and 'THROTTLED', delim=', '),
throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
'source_preference': -10 if throttled else -1,
'fps': int_or_none(fmt.get('fps')) or None,
'height': height,
@ -3020,6 +3023,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'language': join_nonempty(audio_track.get('id', '').split('.')[0],
'desc' if language_preference < -1 else ''),
'language_preference': language_preference,
'preference': -10 if is_damaged else None,
}
mime_mobj = re.match(
r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')