Compare commits

...

3 Commits

Author SHA1 Message Date
pukkandan
f0c9fb9682
[utils] Popen: Refactor to use contextmanager
Fixes https://github.com/yt-dlp/yt-dlp/issues/3531#issuecomment-1156223597
2022-06-16 06:23:50 +05:30
pukkandan
560738f34d
[extractor] Import _ALL_CLASSES lazily
This significantly speeds up `import yt_dlp` in the absence of `lazy_extractors`
2022-06-16 06:23:50 +05:30
pukkandan
99d10bf607
[cleanup, extractor] Rename extractors.py to _extractors.py
This should be considered part of the next commit,
but is separated so that `git` can detect the renaming better
2022-06-16 06:23:49 +05:30
15 changed files with 2328 additions and 2334 deletions

View File

@ -53,7 +53,7 @@ def get_all_ies():
if os.path.exists(PLUGINS_DIRNAME):
os.rename(PLUGINS_DIRNAME, BLOCKED_DIRNAME)
try:
from yt_dlp.extractor import _ALL_CLASSES
from yt_dlp.extractor.extractors import _ALL_CLASSES
finally:
if os.path.exists(BLOCKED_DIRNAME):
os.rename(BLOCKED_DIRNAME, PLUGINS_DIRNAME)

View File

@ -38,8 +38,6 @@ from .compat import (
from .cookies import load_cookies
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
from .downloader.rtmp import rtmpdump_version
from .extractor import _LAZY_LOADER
from .extractor import _PLUGIN_CLASSES as plugin_extractors
from .extractor import gen_extractor_classes, get_info_extractor
from .extractor.openload import PhantomJSwrapper
from .minicurses import format_text
@ -3659,6 +3657,10 @@ class YoutubeDL:
if not self.params.get('verbose'):
return
# These imports can be slow. So import them only as needed
from .extractor.extractors import _LAZY_LOADER
from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors
def get_encoding(stream):
ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
if not supports_terminal_sequences(stream):
@ -3703,14 +3705,12 @@ class YoutubeDL:
if source == 'source':
try:
sp = Popen(
stdout, _, _ = Popen.run(
['git', 'rev-parse', '--short', 'HEAD'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
cwd=os.path.dirname(os.path.abspath(__file__)))
out, err = sp.communicate_or_kill()
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
write_debug('Git HEAD: %s' % out)
text=True, cwd=os.path.dirname(os.path.abspath(__file__)),
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if re.fullmatch('[0-9a-f]+', stdout.strip()):
write_debug(f'Git HEAD: {stdout.strip()}')
except Exception:
with contextlib.suppress(Exception):
sys.exc_clear()

View File

@ -12,7 +12,7 @@ import sys
from .compat import compat_getpass, compat_shlex_quote
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
from .downloader import FileDownloader
from .extractor import GenericIE, list_extractor_classes
from .extractor import list_extractor_classes
from .extractor.adobepass import MSO_INFO
from .extractor.common import InfoExtractor
from .options import parseOpts
@ -79,6 +79,10 @@ def get_urls(urls, batchfile, verbose):
def print_extractor_information(opts, urls):
# Importing GenericIE is currently slow since it imports other extractors
# TODO: Move this back to module level after generalization of embed detection
from .extractor.generic import GenericIE
out = ''
if opts.list_extractors:
urls = dict.fromkeys(urls, False)

View File

@ -33,7 +33,7 @@ def _is_package(module):
def passthrough_module(parent, child, *, callback=lambda _: None):
parent_module = importlib.import_module(parent)
child_module = importlib.import_module(child, parent)
child_module = None # Import child module only as needed
class PassthroughModule(types.ModuleType):
def __getattr__(self, attr):
@ -41,6 +41,9 @@ def passthrough_module(parent, child, *, callback=lambda _: None):
with contextlib.suppress(ImportError):
return importlib.import_module(f'.{attr}', parent)
nonlocal child_module
child_module = child_module or importlib.import_module(child, parent)
ret = _NO_ATTRIBUTE
with contextlib.suppress(AttributeError):
ret = getattr(child_module, attr)

View File

@ -709,21 +709,19 @@ def _get_kwallet_network_wallet(logger):
"""
default_wallet = 'kdewallet'
try:
proc = Popen([
stdout, _, returncode = Popen.run([
'dbus-send', '--session', '--print-reply=literal',
'--dest=org.kde.kwalletd5',
'/modules/kwalletd5',
'org.kde.KWallet.networkWallet'
], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
stdout, stderr = proc.communicate_or_kill()
if proc.returncode != 0:
if returncode:
logger.warning('failed to read NetworkWallet')
return default_wallet
else:
network_wallet = stdout.decode().strip()
logger.debug(f'NetworkWallet = "{network_wallet}"')
return network_wallet
logger.debug(f'NetworkWallet = "{stdout.strip()}"')
return stdout.strip()
except Exception as e:
logger.warning(f'exception while obtaining NetworkWallet: {e}')
return default_wallet
@ -741,17 +739,16 @@ def _get_kwallet_password(browser_keyring_name, logger):
network_wallet = _get_kwallet_network_wallet(logger)
try:
proc = Popen([
stdout, _, returncode = Popen.run([
'kwallet-query',
'--read-password', f'{browser_keyring_name} Safe Storage',
'--folder', f'{browser_keyring_name} Keys',
network_wallet
], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
stdout, stderr = proc.communicate_or_kill()
if proc.returncode != 0:
logger.error(f'kwallet-query failed with return code {proc.returncode}. Please consult '
'the kwallet-query man page for details')
if returncode:
logger.error(f'kwallet-query failed with return code {returncode}. '
'Please consult the kwallet-query man page for details')
return b''
else:
if stdout.lower().startswith(b'failed to read'):
@ -766,9 +763,7 @@ def _get_kwallet_password(browser_keyring_name, logger):
return b''
else:
logger.debug('password found')
if stdout[-1:] == b'\n':
stdout = stdout[:-1]
return stdout
return stdout.rstrip(b'\n')
except Exception as e:
logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
return b''
@ -815,17 +810,13 @@ def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
def _get_mac_keyring_password(browser_keyring_name, logger):
logger.debug('using find-generic-password to obtain password from OSX keychain')
try:
proc = Popen(
stdout, _, _ = Popen.run(
['security', 'find-generic-password',
'-w', # write password to stdout
'-a', browser_keyring_name, # match 'account'
'-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
stdout, stderr = proc.communicate_or_kill()
if stdout[-1:] == b'\n':
stdout = stdout[:-1]
return stdout
return stdout.rstrip(b'\n')
except Exception as e:
logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
return None

View File

@ -34,6 +34,7 @@ class Features(enum.Enum):
class ExternalFD(FragmentFD):
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps')
SUPPORTED_FEATURES = ()
_CAPTURE_STDERR = True
def real_download(self, filename, info_dict):
self.report_destination(filename)
@ -128,24 +129,25 @@ class ExternalFD(FragmentFD):
self._debug_cmd(cmd)
if 'fragments' not in info_dict:
p = Popen(cmd, stderr=subprocess.PIPE)
_, stderr = p.communicate_or_kill()
if p.returncode != 0:
self.to_stderr(stderr.decode('utf-8', 'replace'))
return p.returncode
_, stderr, returncode = Popen.run(
cmd, text=True, stderr=subprocess.PIPE if self._CAPTURE_STDERR else None)
if returncode and stderr:
self.to_stderr(stderr)
return returncode
fragment_retries = self.params.get('fragment_retries', 0)
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
count = 0
while count <= fragment_retries:
p = Popen(cmd, stderr=subprocess.PIPE)
_, stderr = p.communicate_or_kill()
if p.returncode == 0:
_, stderr, returncode = Popen.run(cmd, text=True, stderr=subprocess.PIPE)
if not returncode:
break
# TODO: Decide whether to retry based on error code
# https://aria2.github.io/manual/en/html/aria2c.html#exit-status
self.to_stderr(stderr.decode('utf-8', 'replace'))
if stderr:
self.to_stderr(stderr)
count += 1
if count <= fragment_retries:
self.to_screen(
@ -180,6 +182,7 @@ class ExternalFD(FragmentFD):
class CurlFD(ExternalFD):
AVAILABLE_OPT = '-V'
_CAPTURE_STDERR = False # curl writes the progress to stderr
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed']
@ -204,16 +207,6 @@ class CurlFD(ExternalFD):
cmd += ['--', info_dict['url']]
return cmd
def _call_downloader(self, tmpfilename, info_dict):
cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
self._debug_cmd(cmd)
# curl writes the progress to stderr so don't capture it.
p = Popen(cmd)
p.communicate_or_kill()
return p.returncode
class AxelFD(ExternalFD):
AVAILABLE_OPT = '-V'
@ -500,24 +493,23 @@ class FFmpegFD(ExternalFD):
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
self._debug_cmd(args)
proc = Popen(args, stdin=subprocess.PIPE, env=env)
if url in ('-', 'pipe:'):
self.on_process_started(proc, proc.stdin)
try:
retval = proc.wait()
except BaseException as e:
# subprocces.run would send the SIGKILL signal to ffmpeg and the
# mp4 file couldn't be played, but if we ask ffmpeg to quit it
# produces a file that is playable (this is mostly useful for live
# streams). Note that Windows is not affected and produces playable
# files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and url not in ('-', 'pipe:'):
proc.communicate_or_kill(b'q')
else:
proc.kill()
proc.wait()
raise
return retval
with Popen(args, stdin=subprocess.PIPE, env=env) as proc:
if url in ('-', 'pipe:'):
self.on_process_started(proc, proc.stdin)
try:
retval = proc.wait()
except BaseException as e:
# subprocces.run would send the SIGKILL signal to ffmpeg and the
# mp4 file couldn't be played, but if we ask ffmpeg to quit it
# produces a file that is playable (this is mostly useful for live
# streams). Note that Windows is not affected and produces playable
# files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and url not in ('-', 'pipe:'):
proc.communicate_or_kill(b'q')
else:
proc.kill(timeout=None)
raise
return retval
class AVconvFD(FFmpegFD):

View File

@ -92,8 +92,7 @@ class RtmpFD(FileDownloader):
self.to_screen('')
return proc.wait()
except BaseException: # Including KeyboardInterrupt
proc.kill()
proc.wait()
proc.kill(timeout=None)
raise
url = info_dict['url']

View File

@ -1,32 +1,15 @@
import contextlib
import os
from ..compat.compat_utils import passthrough_module
from ..utils import load_plugins
_LAZY_LOADER = False
if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
with contextlib.suppress(ImportError):
from .lazy_extractors import * # noqa: F403
from .lazy_extractors import _ALL_CLASSES
_LAZY_LOADER = True
if not _LAZY_LOADER:
from .extractors import * # noqa: F403
_ALL_CLASSES = [ # noqa: F811
klass
for name, klass in globals().items()
if name.endswith('IE') and name != 'GenericIE'
]
_ALL_CLASSES.append(GenericIE) # noqa: F405
_PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals())
_ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES
passthrough_module(__name__, '.extractors')
del passthrough_module
def gen_extractor_classes():
""" Return a list of supported extractors.
The order does matter; the first extractor matched is the one handling the URL.
"""
from .extractors import _ALL_CLASSES
return _ALL_CLASSES
@ -39,10 +22,12 @@ def gen_extractors():
def list_extractor_classes(age_limit=None):
"""Return a list of extractors that are suitable for the given age, sorted by extractor name"""
from .generic import GenericIE
yield from sorted(filter(
lambda ie: ie.is_suitable(age_limit) and ie != GenericIE, # noqa: F405
lambda ie: ie.is_suitable(age_limit) and ie != GenericIE,
gen_extractor_classes()), key=lambda ie: ie.IE_NAME.lower())
yield GenericIE # noqa: F405
yield GenericIE
def list_extractors(age_limit=None):
@ -52,4 +37,6 @@ def list_extractors(age_limit=None):
def get_info_extractor(ie_name):
"""Returns the info extractor class with the given ie_name"""
return globals()[ie_name + 'IE']
from . import extractors
return getattr(extractors, f'{ie_name}IE')

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -9,7 +9,6 @@ from ..utils import (
ExtractorError,
Popen,
check_executable,
encodeArgument,
get_exe_version,
is_outdated_version,
)
@ -213,16 +212,14 @@ class PhantomJSwrapper:
else:
self.extractor.to_screen(f'{video_id}: {note2}')
p = Popen(
stdout, stderr, returncode = Popen.run(
[self.exe, '--ssl-protocol=any', self._TMP_FILES['script'].name],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = p.communicate_or_kill()
if p.returncode != 0:
raise ExtractorError(
'Executing JS failed\n:' + encodeArgument(err))
text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if returncode:
raise ExtractorError(f'Executing JS failed\n:{stderr}')
with open(self._TMP_FILES['html'].name, 'rb') as f:
html = f.read().decode('utf-8')
self._load_cookies()
return (html, encodeArgument(out))
return (html, stdout)

View File

@ -157,14 +157,12 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
self._report_run('atomicparsley', filename)
self.write_debug('AtomicParsley command line: %s' % shell_quote(cmd))
p = Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate_or_kill()
if p.returncode != 0:
msg = stderr.decode('utf-8', 'replace').strip()
self.report_warning(f'Unable to embed thumbnails using AtomicParsley; {msg}')
stdout, stderr, returncode = Popen.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if returncode:
self.report_warning(f'Unable to embed thumbnails using AtomicParsley; {stderr.strip()}')
# for formats that don't support thumbnails (like 3gp) AtomicParsley
# won't create to the temporary file
if b'No changes' in stdout:
if 'No changes' in stdout:
self.report_warning('The file format doesn\'t support embedding a thumbnail')
success = False

View File

@ -239,14 +239,12 @@ class FFmpegPostProcessor(PostProcessor):
encodeArgument('-i')]
cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True))
self.write_debug(f'{self.basename} command line: {shell_quote(cmd)}')
handle = Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout_data, stderr_data = handle.communicate_or_kill()
expected_ret = 0 if self.probe_available else 1
if handle.wait() != expected_ret:
stdout, stderr, returncode = Popen.run(cmd, text=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if returncode != (0 if self.probe_available else 1):
return None
except OSError:
return None
output = (stdout_data if self.probe_available else stderr_data).decode('ascii', 'ignore')
output = stdout if self.probe_available else stderr
if self.probe_available:
audio_codec = None
for line in output.split('\n'):
@ -280,11 +278,10 @@ class FFmpegPostProcessor(PostProcessor):
]
cmd += opts
cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True))
self.write_debug('ffprobe command line: %s' % shell_quote(cmd))
p = Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
stdout, stderr = p.communicate()
return json.loads(stdout.decode('utf-8', 'replace'))
cmd.append(self._ffmpeg_filename_argument(path))
self.write_debug(f'ffprobe command line: {shell_quote(cmd)}')
stdout, _, _ = Popen.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
return json.loads(stdout)
def get_stream_number(self, path, keys, value):
streams = self.get_metadata_object(path)['streams']
@ -346,16 +343,13 @@ class FFmpegPostProcessor(PostProcessor):
for i, (path, opts) in enumerate(path_opts) if path)
self.write_debug('ffmpeg command line: %s' % shell_quote(cmd))
p = Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
stdout, stderr = p.communicate_or_kill()
if p.returncode not in variadic(expected_retcodes):
stderr = stderr.decode('utf-8', 'replace').strip()
self.write_debug(stderr)
raise FFmpegPostProcessorError(stderr.split('\n')[-1])
stdout, stderr, returncode = Popen.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
if returncode not in variadic(expected_retcodes):
raise FFmpegPostProcessorError(stderr.strip().splitlines()[-1])
for out_path, _ in output_path_opts:
if out_path:
self.try_utime(out_path, oldest_mtime, oldest_mtime)
return stderr.decode('utf-8', 'replace')
return stderr
def run_ffmpeg(self, path, out_path, opts, **kwargs):
return self.run_ffmpeg_multiple_files([path], out_path, opts, **kwargs)

View File

@ -84,17 +84,15 @@ class SponSkrubPP(PostProcessor):
cmd = [encodeArgument(i) for i in cmd]
self.write_debug('sponskrub command line: %s' % shell_quote(cmd))
pipe = None if self.get_param('verbose') else subprocess.PIPE
p = Popen(cmd, stdout=pipe)
stdout = p.communicate_or_kill()[0]
stdout, _, returncode = Popen.run(cmd, text=True, stdout=None if self.get_param('verbose') else subprocess.PIPE)
if p.returncode == 0:
if not returncode:
os.replace(temp_filename, filename)
self.to_screen('Sponsor sections have been %s' % ('removed' if self.cutout else 'marked'))
elif p.returncode == 3:
elif returncode == 3:
self.to_screen('No segments in the SponsorBlock database')
else:
msg = stdout.decode('utf-8', 'replace').strip() if stdout else ''
msg = msg.split('\n')[0 if msg.lower().startswith('unrecognised') else -1]
raise PostProcessingError(msg if msg else 'sponskrub failed with error code %s' % p.returncode)
raise PostProcessingError(
stdout.strip().splitlines()[0 if stdout.strip().lower().startswith('unrecognised') else -1]
or f'sponskrub failed with error code {returncode}')
return [], information

View File

@ -841,17 +841,31 @@ class Popen(subprocess.Popen):
else:
_startupinfo = None
def __init__(self, *args, **kwargs):
def __init__(self, *args, text=False, **kwargs):
if text is True:
kwargs['universal_newlines'] = True # For 3.6 compatibility
kwargs.setdefault('encoding', 'utf-8')
kwargs.setdefault('errors', 'replace')
super().__init__(*args, **kwargs, startupinfo=self._startupinfo)
def communicate_or_kill(self, *args, **kwargs):
try:
return self.communicate(*args, **kwargs)
except BaseException: # Including KeyboardInterrupt
self.kill()
self.wait()
self.kill(timeout=None)
raise
def kill(self, *, timeout=0):
super().kill()
if timeout != 0:
self.wait(timeout=timeout)
@classmethod
def run(cls, *args, **kwargs):
with cls(*args, **kwargs) as proc:
stdout, stderr = proc.communicate_or_kill()
return stdout or '', stderr or '', proc.returncode
def get_subprocess_encoding():
if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
@ -2556,7 +2570,7 @@ def check_executable(exe, args=[]):
""" Checks if the given binary is installed somewhere in PATH, and returns its name.
args can be a list of arguments for a short output (like -version) """
try:
Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate_or_kill()
Popen.run([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except OSError:
return False
return exe
@ -2569,14 +2583,11 @@ def _get_exe_version_output(exe, args, *, to_screen=None):
# STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
# SIGTTOU if yt-dlp is run in the background.
# See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
out, _ = Popen(
[encodeArgument(exe)] + args, stdin=subprocess.PIPE,
stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate_or_kill()
stdout, _, _ = Popen.run([encodeArgument(exe)] + args, text=True,
stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
except OSError:
return False
if isinstance(out, bytes): # Python 2.x
out = out.decode('ascii', 'ignore')
return out
return stdout
def detect_exe_version(output, version_re=None, unrecognized='present'):
@ -4796,14 +4807,13 @@ def write_xattr(path, key, value):
value = value.decode()
try:
p = Popen(
_, stderr, returncode = Popen.run(
[exe, '-w', key, value, path] if exe == 'xattr' else [exe, '-n', key, '-v', value, path],
stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
except OSError as e:
raise XAttrMetadataError(e.errno, e.strerror)
stderr = p.communicate_or_kill()[1].decode('utf-8', 'replace')
if p.returncode:
raise XAttrMetadataError(p.returncode, stderr)
if returncode:
raise XAttrMetadataError(returncode, stderr)
def random_birthday(year_field, month_field, day_field):
@ -5146,10 +5156,8 @@ def windows_enable_vt_mode(): # TODO: Do this the proper way https://bugs.pytho
if get_windows_version() < (10, 0, 10586):
return
global WINDOWS_VT_MODE
startupinfo = subprocess.STARTUPINFO()
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
try:
subprocess.Popen('', shell=True, startupinfo=startupinfo).wait()
Popen.run('', shell=True)
except Exception:
return