Compare commits

...

6 Commits

Author SHA1 Message Date
pukkandan
6db9c4d57d
Ignore format-specific fields in initial pass of --match-filter
Closes #3074
2022-03-25 14:27:09 +05:30
Lesmiscore (Naoya Ozaki)
3cea3edd1a
[utils] WebSocketsWrapper: Allow omitting __enter__ invocation (#3187)
Authored by: Lesmiscore
2022-03-25 17:24:39 +09:00
pukkandan
b1a7cd056a
Treat multiple --match-filters as OR
Closes #3144
2022-03-25 13:33:46 +05:30
pukkandan
28787f16c6
[downloader] Fix invocation of HttpieFD
Closes #3154
2022-03-25 13:00:42 +05:30
zackmark29
1fb707badb
[viu] Fixed extractor (#3136)
Closes #3133
Authored by: zackmark29, pukkandan
2022-03-24 20:23:54 -07:00
pukkandan
a3f2445e29
[postprocessor,cleanup] Create _download_json 2022-03-25 08:45:35 +05:30
10 changed files with 209 additions and 189 deletions

View File

@ -196,15 +196,7 @@ def expect_dict(self, got_dict, expected_dict):
def sanitize_got_info_dict(got_dict): def sanitize_got_info_dict(got_dict):
IGNORED_FIELDS = ( IGNORED_FIELDS = (
# Format keys *YoutubeDL._format_fields,
'url', 'manifest_url', 'format', 'format_id', 'format_note', 'width', 'height', 'resolution',
'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'vbr', 'fps', 'vcodec', 'container', 'filesize',
'filesize_approx', 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'preference',
'language', 'language_preference', 'quality', 'source_preference', 'http_headers',
'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
# RTMP formats
'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time',
# Lists # Lists
'formats', 'thumbnails', 'subtitles', 'automatic_captions', 'comments', 'entries', 'formats', 'thumbnails', 'subtitles', 'automatic_captions', 'comments', 'entries',

View File

@ -931,7 +931,7 @@ class TestYoutubeDL(unittest.TestCase):
res = get_videos() res = get_videos()
self.assertEqual(res, ['1', '2']) self.assertEqual(res, ['1', '2'])
def f(v): def f(v, incomplete):
if v['id'] == '1': if v['id'] == '1':
return None return None
else: else:

View File

@ -513,6 +513,16 @@ class YoutubeDL(object):
'track_number', 'disc_number', 'release_year', 'track_number', 'disc_number', 'release_year',
)) ))
_format_fields = {
# NB: Keep in sync with the docstring of extractor/common.py
'url', 'manifest_url', 'ext', 'format', 'format_id', 'format_note',
'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',
'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
'preference', 'language', 'language_preference', 'quality', 'source_preference',
'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
}
_format_selection_exts = { _format_selection_exts = {
'audio': {'m4a', 'mp3', 'ogg', 'aac'}, 'audio': {'m4a', 'mp3', 'ogg', 'aac'},
'video': {'mp4', 'flv', 'webm', '3gp'}, 'video': {'mp4', 'flv', 'webm', '3gp'},
@ -2541,7 +2551,7 @@ class YoutubeDL(object):
info_dict, _ = self.pre_process(info_dict) info_dict, _ = self.pre_process(info_dict)
if self._match_entry(info_dict) is not None: if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
return info_dict return info_dict
self.post_extract(info_dict) self.post_extract(info_dict)

View File

@ -13,6 +13,7 @@ from ..compat import (
) )
from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
from ..utils import ( from ..utils import (
classproperty,
cli_option, cli_option,
cli_valueless_option, cli_valueless_option,
cli_bool_option, cli_bool_option,
@ -73,17 +74,23 @@ class ExternalFD(FragmentFD):
def get_basename(cls): def get_basename(cls):
return cls.__name__[:-2].lower() return cls.__name__[:-2].lower()
@classproperty
def EXE_NAME(cls):
return cls.get_basename()
@property @property
def exe(self): def exe(self):
return self.get_basename() return self.EXE_NAME
@classmethod @classmethod
def available(cls, path=None): def available(cls, path=None):
path = check_executable(path or cls.get_basename(), [cls.AVAILABLE_OPT]) path = check_executable(
if path: cls.EXE_NAME if path in (None, cls.get_basename()) else path,
cls.exe = path [cls.AVAILABLE_OPT])
return path if not path:
return False return False
cls.exe = path
return path
@classmethod @classmethod
def supports(cls, info_dict): def supports(cls, info_dict):
@ -106,7 +113,7 @@ class ExternalFD(FragmentFD):
def _configuration_args(self, keys=None, *args, **kwargs): def _configuration_args(self, keys=None, *args, **kwargs):
return _configuration_args( return _configuration_args(
self.get_basename(), self.params.get('external_downloader_args'), self.get_basename(), self.get_basename(), self.params.get('external_downloader_args'), self.EXE_NAME,
keys, *args, **kwargs) keys, *args, **kwargs)
def _call_downloader(self, tmpfilename, info_dict): def _call_downloader(self, tmpfilename, info_dict):
@ -306,10 +313,7 @@ class Aria2cFD(ExternalFD):
class HttpieFD(ExternalFD): class HttpieFD(ExternalFD):
AVAILABLE_OPT = '--version' AVAILABLE_OPT = '--version'
EXE_NAME = 'http'
@classmethod
def available(cls, path=None):
return super().available(path or 'http')
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']] cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
@ -510,11 +514,13 @@ class AVconvFD(FFmpegFD):
pass pass
_BY_NAME = dict( _BY_NAME = {
(klass.get_basename(), klass) klass.get_basename(): klass
for name, klass in globals().items() for name, klass in globals().items()
if name.endswith('FD') and name not in ('ExternalFD', 'FragmentFD') if name.endswith('FD') and name not in ('ExternalFD', 'FragmentFD')
) }
_BY_EXE = {klass.EXE_NAME: klass for klass in _BY_NAME.values()}
def list_external_downloaders(): def list_external_downloaders():
@ -526,4 +532,4 @@ def get_external_downloader(external_downloader):
downloader . """ downloader . """
# Drop .exe extension on Windows # Drop .exe extension on Windows
bn = os.path.splitext(os.path.basename(external_downloader))[0] bn = os.path.splitext(os.path.basename(external_downloader))[0]
return _BY_NAME.get(bn) return _BY_NAME.get(bn, _BY_EXE.get(bn))

View File

@ -212,7 +212,6 @@ class FC2LiveIE(InfoExtractor):
'Accept': '*/*', 'Accept': '*/*',
'User-Agent': std_headers['User-Agent'], 'User-Agent': std_headers['User-Agent'],
}) })
ws.__enter__()
self.write_debug('[debug] Sending HLS server request') self.write_debug('[debug] Sending HLS server request')

View File

@ -1,55 +1,32 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re import re
import json
import uuid
import random
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_str
compat_kwargs,
compat_str,
compat_urlparse,
compat_urllib_request,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
strip_or_none,
try_get, try_get,
smuggle_url, smuggle_url,
unsmuggle_url, unsmuggle_url,
url_or_none,
) )
class ViuBaseIE(InfoExtractor): class ViuBaseIE(InfoExtractor):
def _real_initialize(self): def _call_api(self, path, *args, headers={}, **kwargs):
viu_auth_res = self._request_webpage(
'https://www.viu.com/api/apps/v2/authenticate', None,
'Requesting Viu auth', query={
'acct': 'test',
'appid': 'viu_desktop',
'fmt': 'json',
'iid': 'guest',
'languageid': 'default',
'platform': 'desktop',
'userid': 'guest',
'useridtype': 'guest',
'ver': '1.0'
}, headers=self.geo_verification_headers())
self._auth_token = viu_auth_res.info()['X-VIU-AUTH']
def _call_api(self, path, *args, **kwargs):
headers = self.geo_verification_headers()
headers.update({
'X-VIU-AUTH': self._auth_token
})
headers.update(kwargs.get('headers', {}))
kwargs['headers'] = headers
response = self._download_json( response = self._download_json(
'https://www.viu.com/api/' + path, *args, f'https://www.viu.com/api/{path}', *args, **kwargs,
**compat_kwargs(kwargs))['response'] headers={**self.geo_verification_headers(), **headers})['response']
if response.get('status') != 'success': if response.get('status') != 'success':
raise ExtractorError('%s said: %s' % ( raise ExtractorError(f'{self.IE_NAME} said: {response["message"]}', expected=True)
self.IE_NAME, response['message']), expected=True)
return response return response
@ -101,6 +78,7 @@ class ViuIE(ViuBaseIE):
tdirforwhole = video_data.get('tdirforwhole') tdirforwhole = video_data.get('tdirforwhole')
# #EXT-X-BYTERANGE is not supported by native hls downloader # #EXT-X-BYTERANGE is not supported by native hls downloader
# and ffmpeg (#10955) # and ffmpeg (#10955)
# FIXME: It is supported in yt-dlp
# hls_file = video_data.get('hlsfile') # hls_file = video_data.get('hlsfile')
hls_file = video_data.get('jwhlsfile') hls_file = video_data.get('jwhlsfile')
if url_path and tdirforwhole and hls_file: if url_path and tdirforwhole and hls_file:
@ -227,42 +205,63 @@ class ViuOTTIE(InfoExtractor):
'zh-cn': 2, 'zh-cn': 2,
'en-us': 3, 'en-us': 3,
} }
_user_info = None
_user_token = None
_auth_codes = {}
def _detect_error(self, response): def _detect_error(self, response):
code = response.get('status', {}).get('code') code = try_get(response, lambda x: x['status']['code'])
if code > 0: if code and code > 0:
message = try_get(response, lambda x: x['status']['message']) message = try_get(response, lambda x: x['status']['message'])
raise ExtractorError('%s said: %s (%s)' % ( raise ExtractorError(f'{self.IE_NAME} said: {message} ({code})', expected=True)
self.IE_NAME, message, code), expected=True) return response.get('data') or {}
return response['data']
def _raise_login_required(self):
raise ExtractorError(
'This video requires login. '
'Specify --username and --password or --netrc (machine: %s) '
'to provide account credentials.' % self._NETRC_MACHINE,
expected=True)
def _login(self, country_code, video_id): def _login(self, country_code, video_id):
if not self._user_info: if self._user_token is None:
username, password = self._get_login_info() username, password = self._get_login_info()
if username is None or password is None: if username is None:
return return
headers = {
'Authorization': f'Bearer {self._auth_codes[country_code]}',
'Content-Type': 'application/json'
}
data = self._download_json(
'https://api-gateway-global.viu.com/api/account/validate',
video_id, 'Validating email address', headers=headers,
data=json.dumps({
'principal': username,
'provider': 'email'
}).encode())
if not data.get('exists'):
raise ExtractorError('Invalid email address')
data = self._download_json( data = self._download_json(
compat_urllib_request.Request( 'https://api-gateway-global.viu.com/api/auth/login',
'https://www.viu.com/ott/%s/index.php' % country_code, method='POST'), video_id, 'Logging in', headers=headers,
video_id, 'Logging in', errnote=False, fatal=False,
query={'r': 'user/login'},
data=json.dumps({ data=json.dumps({
'username': username, 'email': username,
'password': password, 'password': password,
'platform_flag_label': 'web', 'provider': 'email',
}).encode()) }).encode())
self._user_info = self._detect_error(data)['user'] self._detect_error(data)
self._user_token = data.get('identity')
# need to update with valid user's token else will throw an error again
self._auth_codes[country_code] = data.get('token')
return self._user_token
return self._user_info def _get_token(self, country_code, video_id):
rand = ''.join(random.choice('0123456789') for _ in range(10))
return self._download_json(
f'https://api-gateway-global.viu.com/api/auth/token?v={rand}000', video_id,
headers={'Content-Type': 'application/json'}, note='Getting bearer token',
data=json.dumps({
'countryCode': country_code.upper(),
'platform': 'browser',
'platformFlagLabel': 'web',
'language': 'en',
'uuid': str(uuid.uuid4()),
'carrierId': '0'
}).encode('utf-8'))['token']
def _real_extract(self, url): def _real_extract(self, url):
url, idata = unsmuggle_url(url, {}) url, idata = unsmuggle_url(url, {})
@ -279,16 +278,16 @@ class ViuOTTIE(InfoExtractor):
query['area_id'] = area_id query['area_id'] = area_id
product_data = self._download_json( product_data = self._download_json(
'http://www.viu.com/ott/%s/index.php' % country_code, video_id, f'http://www.viu.com/ott/{country_code}/index.php', video_id,
'Downloading video info', query=query)['data'] 'Downloading video info', query=query)['data']
video_data = product_data.get('current_product') video_data = product_data.get('current_product')
if not video_data: if not video_data:
raise ExtractorError('This video is not available in your region.', expected=True) self.raise_geo_restricted()
series_id = video_data.get('series_id') series_id = video_data.get('series_id')
if self._yes_playlist(series_id, video_id, idata): if self._yes_playlist(series_id, video_id, idata):
series = product_data.get('series', {}) series = product_data.get('series') or {}
product = series.get('product') product = series.get('product')
if product: if product:
entries = [] entries = []
@ -296,14 +295,10 @@ class ViuOTTIE(InfoExtractor):
item_id = entry.get('product_id') item_id = entry.get('product_id')
if not item_id: if not item_id:
continue continue
item_id = compat_str(item_id)
entries.append(self.url_result( entries.append(self.url_result(
smuggle_url( smuggle_url(f'http://www.viu.com/ott/{country_code}/{lang_code}/vod/{item_id}/',
'http://www.viu.com/ott/%s/%s/vod/%s/' % (country_code, lang_code, item_id), {'force_noplaylist': True}),
{'force_noplaylist': True}), # prevent infinite recursion ViuOTTIE, str(item_id), entry.get('synopsis', '').strip()))
'ViuOTT',
item_id,
entry.get('synopsis', '').strip()))
return self.playlist_result(entries, series_id, series.get('name'), series.get('description')) return self.playlist_result(entries, series_id, series.get('name'), series.get('description'))
@ -312,69 +307,65 @@ class ViuOTTIE(InfoExtractor):
'ccs_product_id': video_data['ccs_product_id'], 'ccs_product_id': video_data['ccs_product_id'],
'language_flag_id': self._LANGUAGE_FLAG.get(lang_code.lower()) or '3', 'language_flag_id': self._LANGUAGE_FLAG.get(lang_code.lower()) or '3',
} }
headers = {
'Referer': url,
'Origin': url,
}
try:
stream_data = self._download_json(
'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code,
video_id, 'Downloading stream info', query=query, headers=headers)
stream_data = self._detect_error(stream_data)['stream']
except (ExtractorError, KeyError):
stream_data = None
if video_data.get('user_level', 0) > 0:
user = self._login(country_code, video_id)
if user:
query['identity'] = user['identity']
stream_data = self._download_json(
'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code,
video_id, 'Downloading stream info', query=query, headers=headers)
stream_data = self._detect_error(stream_data).get('stream')
else:
# preview is limited to 3min for non-members
# try to bypass the duration limit
duration_limit = True
query['duration'] = '180'
stream_data = self._download_json(
'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code,
video_id, 'Downloading stream info', query=query, headers=headers)
try:
stream_data = self._detect_error(stream_data)['stream']
except (ExtractorError, KeyError): # if still not working, give up
self._raise_login_required()
def download_playback():
stream_data = self._download_json(
'https://api-gateway-global.viu.com/api/playback/distribute',
video_id=video_id, query=query, fatal=False, note='Downloading stream info',
headers={
'Authorization': f'Bearer {self._auth_codes[country_code]}',
'Referer': url,
'Origin': url
})
return self._detect_error(stream_data).get('stream')
if not self._auth_codes.get(country_code):
self._auth_codes[country_code] = self._get_token(country_code, video_id)
stream_data = None
try:
stream_data = download_playback()
except (ExtractorError, KeyError):
token = self._login(country_code, video_id)
if token is not None:
query['identity'] = token
else:
# preview is limited to 3min for non-members. But we can try to bypass it
duration_limit, query['duration'] = True, '180'
try:
stream_data = download_playback()
except (ExtractorError, KeyError):
if token is not None:
raise
self.raise_login_required(method='password')
if not stream_data: if not stream_data:
raise ExtractorError('Cannot get stream info', expected=True) raise ExtractorError('Cannot get stream info', expected=True)
stream_sizes = stream_data.get('size', {})
formats = [] formats = []
for vid_format, stream_url in stream_data.get('url', {}).items(): for vid_format, stream_url in (stream_data.get('url') or {}).items():
height = int_or_none(self._search_regex( height = int(self._search_regex(r's(\d+)p', vid_format, 'height', default=None))
r's(\d+)p', vid_format, 'height', default=None))
# bypass preview duration limit # bypass preview duration limit
if duration_limit: if duration_limit:
stream_url = compat_urlparse.urlparse(stream_url) stream_url = urllib.parse.urlparse(stream_url)
query = dict(compat_urlparse.parse_qsl(stream_url.query, keep_blank_values=True))
time_duration = int_or_none(video_data.get('time_duration'))
query.update({ query.update({
'duration': time_duration if time_duration > 0 else '9999999', 'duration': video_data.get('time_duration') or '9999999',
'duration_start': '0', 'duration_start': '0',
}) })
stream_url = stream_url._replace(query=compat_urlparse.urlencode(query)).geturl() stream_url = stream_url._replace(query=urllib.parse.urlencode(dict(
urllib.parse.parse_qsl(stream_url.query, keep_blank_values=True)))).geturl()
formats.append({ formats.append({
'format_id': vid_format, 'format_id': vid_format,
'url': stream_url, 'url': stream_url,
'height': height, 'height': height,
'ext': 'mp4', 'ext': 'mp4',
'filesize': int_or_none(stream_sizes.get(vid_format)) 'filesize': try_get(stream_data, lambda x: x['size'][vid_format], int)
}) })
self._sort_formats(formats) self._sort_formats(formats)
subtitles = {} subtitles = {}
for sub in video_data.get('subtitle', []): for sub in video_data.get('subtitle') or []:
sub_url = sub.get('url') sub_url = sub.get('url')
if not sub_url: if not sub_url:
continue continue
@ -383,17 +374,16 @@ class ViuOTTIE(InfoExtractor):
'ext': 'srt', 'ext': 'srt',
}) })
title = video_data['synopsis'].strip() title = strip_or_none(video_data.get('synopsis'))
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': video_data.get('description'), 'description': video_data.get('description'),
'series': product_data.get('series', {}).get('name'), 'series': try_get(product_data, lambda x: x['series']['name']),
'episode': title, 'episode': title,
'episode_number': int_or_none(video_data.get('number')), 'episode_number': int_or_none(video_data.get('number')),
'duration': int_or_none(stream_data.get('duration')), 'duration': int_or_none(stream_data.get('duration')),
'thumbnail': video_data.get('cover_image_url'), 'thumbnail': url_or_none(video_data.get('cover_image_url')),
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
} }

View File

@ -465,19 +465,18 @@ def create_parser():
metavar='COUNT', dest='max_views', default=None, type=int, metavar='COUNT', dest='max_views', default=None, type=int,
help=optparse.SUPPRESS_HELP) help=optparse.SUPPRESS_HELP)
selection.add_option( selection.add_option(
'--match-filter', '--match-filters',
metavar='FILTER', dest='match_filter', default=None, metavar='FILTER', dest='match_filter', action='append',
help=( help=(
'Generic video filter. Any field (see "OUTPUT TEMPLATE") can be compared with a ' 'Generic video filter. Any field (see "OUTPUT TEMPLATE") can be compared with a '
'number or a string using the operators defined in "Filtering formats". ' 'number or a string using the operators defined in "Filtering formats". '
'You can also simply specify a field to match if the field is present ' 'You can also simply specify a field to match if the field is present, '
'and "!field" to check if the field is not present. In addition, ' 'use "!field" to check if the field is not present, and "&" to check multiple conditions. '
'Python style regular expression matching can be done using "~=", ' 'Use a "\\" to escape "&" or quotes if needed. If used multiple times, '
'and multiple filters can be checked with "&". ' 'the filter matches if atleast one of the conditions are met. Eg: --match-filter '
'Use a "\\" to escape "&" or quotes if needed. Eg: --match-filter ' '!is_live --match-filter "like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" '
'"!is_live & like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" ' 'matches only videos that are not live OR those that have a like count more than 100 '
'matches only videos that are not live, has a like count more than 100 ' '(or the like field is not available) and also has a description '
'(or the like field is not available), and also has a description '
'that contains the phrase "cats & dogs" (ignoring case)')) 'that contains the phrase "cats & dogs" (ignoring case)'))
selection.add_option( selection.add_option(
'--no-match-filter', '--no-match-filter',

View File

@ -1,13 +1,18 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import functools import functools
import itertools
import json
import os import os
import time
import urllib.error
from ..compat import compat_str
from ..utils import ( from ..utils import (
_configuration_args, _configuration_args,
encodeFilename, encodeFilename,
network_exceptions,
PostProcessingError, PostProcessingError,
sanitized_Request,
write_string, write_string,
) )
@ -63,7 +68,7 @@ class PostProcessor(metaclass=PostProcessorMetaClass):
@classmethod @classmethod
def pp_key(cls): def pp_key(cls):
name = cls.__name__[:-2] name = cls.__name__[:-2]
return compat_str(name[6:]) if name[:6].lower() == 'ffmpeg' else name return name[6:] if name[:6].lower() == 'ffmpeg' else name
def to_screen(self, text, prefix=True, *args, **kwargs): def to_screen(self, text, prefix=True, *args, **kwargs):
tag = '[%s] ' % self.PP_NAME if prefix else '' tag = '[%s] ' % self.PP_NAME if prefix else ''
@ -180,6 +185,28 @@ class PostProcessor(metaclass=PostProcessorMetaClass):
progress_template.get('postprocess-title') or 'yt-dlp %(progress._default_template)s', progress_template.get('postprocess-title') or 'yt-dlp %(progress._default_template)s',
progress_dict)) progress_dict))
def _download_json(self, url, *, expected_http_errors=(404,)):
# While this is not an extractor, it behaves similar to one and
# so obey extractor_retries and sleep_interval_requests
max_retries = self.get_param('extractor_retries', 3)
sleep_interval = self.get_param('sleep_interval_requests') or 0
self.write_debug(f'{self.PP_NAME} query: {url}')
for retries in itertools.count():
try:
rsp = self._downloader.urlopen(sanitized_Request(url))
return json.loads(rsp.read().decode(rsp.info().get_param('charset') or 'utf-8'))
except network_exceptions as e:
if isinstance(e, urllib.error.HTTPError) and e.code in expected_http_errors:
return None
if retries < max_retries:
self.report_warning(f'{e}. Retrying...')
if sleep_interval > 0:
self.to_screen(f'Sleeping {sleep_interval} seconds ...')
time.sleep(sleep_interval)
continue
raise PostProcessingError(f'Unable to communicate with {self.PP_NAME} API: {e}')
class AudioConversionError(PostProcessingError): class AudioConversionError(PostProcessingError):
pass pass

View File

@ -1,12 +1,9 @@
from hashlib import sha256 from hashlib import sha256
import itertools
import json import json
import re import re
import time
from .ffmpeg import FFmpegPostProcessor from .ffmpeg import FFmpegPostProcessor
from ..compat import compat_urllib_parse_urlencode, compat_HTTPError from ..compat import compat_urllib_parse_urlencode
from ..utils import PostProcessingError, network_exceptions, sanitized_Request
class SponsorBlockPP(FFmpegPostProcessor): class SponsorBlockPP(FFmpegPostProcessor):
@ -94,28 +91,7 @@ class SponsorBlockPP(FFmpegPostProcessor):
'categories': json.dumps(self._categories), 'categories': json.dumps(self._categories),
'actionTypes': json.dumps(['skip', 'poi']) 'actionTypes': json.dumps(['skip', 'poi'])
}) })
self.write_debug(f'SponsorBlock query: {url}') for d in self._download_json(url) or []:
for d in self._get_json(url):
if d['videoID'] == video_id: if d['videoID'] == video_id:
return d['segments'] return d['segments']
return [] return []
def _get_json(self, url):
# While this is not an extractor, it behaves similar to one and
# so obey extractor_retries and sleep_interval_requests
max_retries = self.get_param('extractor_retries', 3)
sleep_interval = self.get_param('sleep_interval_requests') or 0
for retries in itertools.count():
try:
rsp = self._downloader.urlopen(sanitized_Request(url))
return json.loads(rsp.read().decode(rsp.info().get_param('charset') or 'utf-8'))
except network_exceptions as e:
if isinstance(e, compat_HTTPError) and e.code == 404:
return []
if retries < max_retries:
self.report_warning(f'{e}. Retrying...')
if sleep_interval > 0:
self.to_screen(f'Sleeping {sleep_interval} seconds ...')
time.sleep(sleep_interval)
continue
raise PostProcessingError(f'Unable to communicate with SponsorBlock API: {e}')

View File

@ -3545,6 +3545,11 @@ def _match_one(filter_part, dct, incomplete):
'=': operator.eq, '=': operator.eq,
} }
if isinstance(incomplete, bool):
is_incomplete = lambda _: incomplete
else:
is_incomplete = lambda k: k in incomplete
operator_rex = re.compile(r'''(?x)\s* operator_rex = re.compile(r'''(?x)\s*
(?P<key>[a-z_]+) (?P<key>[a-z_]+)
\s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
@ -3583,7 +3588,7 @@ def _match_one(filter_part, dct, incomplete):
if numeric_comparison is not None and m['op'] in STRING_OPERATORS: if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
raise ValueError('Operator %s only supports string values!' % m['op']) raise ValueError('Operator %s only supports string values!' % m['op'])
if actual_value is None: if actual_value is None:
return incomplete or m['none_inclusive'] return is_incomplete(m['key']) or m['none_inclusive']
return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison) return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
UNARY_OPERATORS = { UNARY_OPERATORS = {
@ -3598,7 +3603,7 @@ def _match_one(filter_part, dct, incomplete):
if m: if m:
op = UNARY_OPERATORS[m.group('op')] op = UNARY_OPERATORS[m.group('op')]
actual_value = dct.get(m.group('key')) actual_value = dct.get(m.group('key'))
if incomplete and actual_value is None: if is_incomplete(m.group('key')) and actual_value is None:
return True return True
return op(actual_value) return op(actual_value)
@ -3606,24 +3611,29 @@ def _match_one(filter_part, dct, incomplete):
def match_str(filter_str, dct, incomplete=False): def match_str(filter_str, dct, incomplete=False):
""" Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """ Filter a dictionary with a simple string syntax.
When incomplete, all conditions passes on missing fields @returns Whether the filter passes
@param incomplete Set of keys that is expected to be missing from dct.
Can be True/False to indicate all/none of the keys may be missing.
All conditions on incomplete keys pass if the key is missing
""" """
return all( return all(
_match_one(filter_part.replace(r'\&', '&'), dct, incomplete) _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
for filter_part in re.split(r'(?<!\\)&', filter_str)) for filter_part in re.split(r'(?<!\\)&', filter_str))
def match_filter_func(filter_str): def match_filter_func(filters):
if filter_str is None: if not filters:
return None return None
filters = variadic(filters)
def _match_func(info_dict, *args, **kwargs): def _match_func(info_dict, *args, **kwargs):
if match_str(filter_str, info_dict, *args, **kwargs): if any(match_str(f, info_dict, *args, **kwargs) for f in filters):
return None return None
else: else:
video_title = info_dict.get('title', info_dict.get('id', 'video')) video_title = info_dict.get('title') or info_dict.get('id') or 'video'
return '%s does not pass filter %s, skipping ..' % (video_title, filter_str) filter_str = ') | ('.join(map(str.strip, filters))
return f'{video_title} does not pass filter ({filter_str}), skipping ..'
return _match_func return _match_func
@ -5434,15 +5444,18 @@ class Config:
class WebSocketsWrapper(): class WebSocketsWrapper():
"""Wraps websockets module to use in non-async scopes""" """Wraps websockets module to use in non-async scopes"""
def __init__(self, url, headers=None): def __init__(self, url, headers=None, connect=True):
self.loop = asyncio.events.new_event_loop() self.loop = asyncio.events.new_event_loop()
self.conn = compat_websockets.connect( self.conn = compat_websockets.connect(
url, extra_headers=headers, ping_interval=None, url, extra_headers=headers, ping_interval=None,
close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf')) close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'))
if connect:
self.__enter__()
atexit.register(self.__exit__, None, None, None) atexit.register(self.__exit__, None, None, None)
def __enter__(self): def __enter__(self):
self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop) if not self.pool:
self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
return self return self
def send(self, *args): def send(self, *args):
@ -5502,3 +5515,11 @@ has_websockets = bool(compat_websockets)
def merge_headers(*dicts): def merge_headers(*dicts):
"""Merge dicts of http headers case insensitively, prioritizing the latter ones""" """Merge dicts of http headers case insensitively, prioritizing the latter ones"""
return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))} return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
class classproperty:
def __init__(self, f):
self.f = f
def __get__(self, _, cls):
return self.f(cls)