Expand and escape environment variables correctly in outtmpl

Fixes: https://www.reddit.com/r/youtubedl/comments/otfmq3/ytdlp_same_parameters_different_results
This commit is contained in:
pukkandan 2021-07-29 05:19:26 +05:30
parent c0bc527bca
commit 901130bbcf
5 changed files with 60 additions and 37 deletions

View File

@ -13,7 +13,7 @@ import copy
from test.helper import FakeYDL, assertRegexpMatches from test.helper import FakeYDL, assertRegexpMatches
from yt_dlp import YoutubeDL from yt_dlp import YoutubeDL
from yt_dlp.compat import compat_str, compat_urllib_error from yt_dlp.compat import compat_os_name, compat_setenv, compat_str, compat_urllib_error
from yt_dlp.extractor import YoutubeIE from yt_dlp.extractor import YoutubeIE
from yt_dlp.extractor.common import InfoExtractor from yt_dlp.extractor.common import InfoExtractor
from yt_dlp.postprocessor.common import PostProcessor from yt_dlp.postprocessor.common import PostProcessor
@ -663,7 +663,7 @@ class TestYoutubeDL(unittest.TestCase):
self.assertEqual(ydl.validate_outtmpl(tmpl), None) self.assertEqual(ydl.validate_outtmpl(tmpl), None)
outtmpl, tmpl_dict = ydl.prepare_outtmpl(tmpl, info or self.outtmpl_info) outtmpl, tmpl_dict = ydl.prepare_outtmpl(tmpl, info or self.outtmpl_info)
out = outtmpl % tmpl_dict out = ydl.escape_outtmpl(outtmpl) % tmpl_dict
fname = ydl.prepare_filename(info or self.outtmpl_info) fname = ydl.prepare_filename(info or self.outtmpl_info)
if callable(expected): if callable(expected):
@ -685,9 +685,15 @@ class TestYoutubeDL(unittest.TestCase):
test('%(autonumber)s', '001', autonumber_size=3) test('%(autonumber)s', '001', autonumber_size=3)
# Escaping % # Escaping %
test('%', '%')
test('%%', '%') test('%%', '%')
test('%%%%', '%%') test('%%%%', '%%')
test('%s', '%s')
test('%%%s', '%%s')
test('%d', '%d')
test('%abc%', '%abc%')
test('%%(width)06d.%(ext)s', '%(width)06d.mp4') test('%%(width)06d.%(ext)s', '%(width)06d.mp4')
test('%%%(height)s', '%1080')
test('%(width)06d.%(ext)s', 'NA.mp4') test('%(width)06d.%(ext)s', 'NA.mp4')
test('%(width)06d.%%(ext)s', 'NA.%(ext)s') test('%(width)06d.%%(ext)s', 'NA.%(ext)s')
test('%%(width)06d.%(ext)s', '%(width)06d.mp4') test('%%(width)06d.%(ext)s', '%(width)06d.mp4')
@ -702,12 +708,9 @@ class TestYoutubeDL(unittest.TestCase):
test('%(id)s', ('ab:cd', 'ab -cd'), info={'id': 'ab:cd'}) test('%(id)s', ('ab:cd', 'ab -cd'), info={'id': 'ab:cd'})
# Invalid templates # Invalid templates
self.assertTrue(isinstance(YoutubeDL.validate_outtmpl('%'), ValueError))
self.assertTrue(isinstance(YoutubeDL.validate_outtmpl('%(title)'), ValueError)) self.assertTrue(isinstance(YoutubeDL.validate_outtmpl('%(title)'), ValueError))
test('%(invalid@tmpl|def)s', 'none', outtmpl_na_placeholder='none') test('%(invalid@tmpl|def)s', 'none', outtmpl_na_placeholder='none')
test('%()s', 'NA') test('%()s', 'NA')
test('%s', '%s')
test('%d', '%d')
# NA placeholder # NA placeholder
NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(x|def)s-%(id)s.%(ext)s' NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(x|def)s-%(id)s.%(ext)s'
@ -741,6 +744,7 @@ class TestYoutubeDL(unittest.TestCase):
# Internal formatting # Internal formatting
FORMATS = self.outtmpl_info['formats'] FORMATS = self.outtmpl_info['formats']
test('%(timestamp-1000>%H-%M-%S)s', '11-43-20') test('%(timestamp-1000>%H-%M-%S)s', '11-43-20')
test('%(title|%)s %(title|%%)s', '% %%')
test('%(id+1-height+3)05d', '00158') test('%(id+1-height+3)05d', '00158')
test('%(width+100)05d', 'NA') test('%(width+100)05d', 'NA')
test('%(formats.0) 15s', ('% 15s' % FORMATS[0], '% 15s' % str(FORMATS[0]).replace(':', ' -'))) test('%(formats.0) 15s', ('% 15s' % FORMATS[0], '% 15s' % str(FORMATS[0]).replace(':', ' -')))
@ -759,6 +763,11 @@ class TestYoutubeDL(unittest.TestCase):
# test('%(foo|)s.%(ext)s', ('.mp4', '_.mp4')) # fixme # test('%(foo|)s.%(ext)s', ('.mp4', '_.mp4')) # fixme
# test('%(foo|)s', ('', '_')) # fixme # test('%(foo|)s', ('', '_')) # fixme
# Environment variable expansion for prepare_filename
compat_setenv('__yt_dlp_var', 'expanded')
envvar = '%__yt_dlp_var%' if compat_os_name == 'nt' else '$__yt_dlp_var'
test(envvar, (envvar, 'expanded'))
# Path expansion and escaping # Path expansion and escaping
test('Hello %(title1)s', 'Hello $PATH') test('Hello %(title1)s', 'Hello $PATH')
test('Hello %(title2)s', 'Hello %PATH%') test('Hello %(title2)s', 'Hello %PATH%')

View File

@ -65,7 +65,8 @@ from .utils import (
float_or_none, float_or_none,
format_bytes, format_bytes,
format_field, format_field,
STR_FORMAT_RE, STR_FORMAT_RE_TMPL,
STR_FORMAT_TYPES,
formatSeconds, formatSeconds,
GeoRestrictedError, GeoRestrictedError,
HEADRequest, HEADRequest,
@ -845,20 +846,40 @@ class YoutubeDL(object):
return sanitize_path(path, force=self.params.get('windowsfilenames')) return sanitize_path(path, force=self.params.get('windowsfilenames'))
@staticmethod @staticmethod
def validate_outtmpl(tmpl): def _outtmpl_expandpath(outtmpl):
# expand_path translates '%%' into '%' and '$$' into '$'
# correspondingly that is not what we want since we need to keep
# '%%' intact for template dict substitution step. Working around
# with boundary-alike separator hack.
sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
# outtmpl should be expand_path'ed before template dict substitution
# because meta fields may contain env variables we don't want to
# be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
# title "Hello $PATH", we don't want `$PATH` to be expanded.
return expand_path(outtmpl).replace(sep, '')
@staticmethod
def escape_outtmpl(outtmpl):
''' Escape any remaining strings like %s, %abc% etc. '''
return re.sub(
STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
outtmpl)
@classmethod
def validate_outtmpl(cls, outtmpl):
''' @return None or Exception object ''' ''' @return None or Exception object '''
outtmpl = cls.escape_outtmpl(cls._outtmpl_expandpath(outtmpl))
try: try:
re.sub( outtmpl % collections.defaultdict(int)
STR_FORMAT_RE.format(''),
lambda mobj: ('%' if not mobj.group('has_key') else '') + mobj.group(0),
tmpl
) % collections.defaultdict(int)
return None return None
except ValueError as err: except ValueError as err:
return err return err
def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None): def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
""" Make the template and info_dict suitable for substitution (outtmpl % info_dict)""" """ Make the template and info_dict suitable for substitution : ydl.outtmpl_escape(outtmpl) % info_dict """
info_dict = dict(info_dict) info_dict = dict(info_dict)
na = self.params.get('outtmpl_na_placeholder', 'NA') na = self.params.get('outtmpl_na_placeholder', 'NA')
@ -879,7 +900,7 @@ class YoutubeDL(object):
} }
TMPL_DICT = {} TMPL_DICT = {}
EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE.format('[^)]*')) EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}]'))
MATH_FUNCTIONS = { MATH_FUNCTIONS = {
'+': float.__add__, '+': float.__add__,
'-': float.__sub__, '-': float.__sub__,
@ -938,10 +959,11 @@ class YoutubeDL(object):
def create_key(outer_mobj): def create_key(outer_mobj):
if not outer_mobj.group('has_key'): if not outer_mobj.group('has_key'):
return '%{}'.format(outer_mobj.group(0)) return f'%{outer_mobj.group(0)}'
prefix = outer_mobj.group('prefix')
key = outer_mobj.group('key') key = outer_mobj.group('key')
fmt = outer_mobj.group('format') original_fmt = fmt = outer_mobj.group('format')
mobj = re.match(INTERNAL_FORMAT_RE, key) mobj = re.match(INTERNAL_FORMAT_RE, key)
if mobj is None: if mobj is None:
value, default, mobj = None, na, {'fields': ''} value, default, mobj = None, na, {'fields': ''}
@ -965,6 +987,7 @@ class YoutubeDL(object):
value = float_or_none(value) value = float_or_none(value)
if value is None: if value is None:
value, fmt = default, 's' value, fmt = default, 's'
if sanitize: if sanitize:
if fmt[-1] == 'r': if fmt[-1] == 'r':
# If value is an object, sanitize might convert it to a string # If value is an object, sanitize might convert it to a string
@ -972,9 +995,10 @@ class YoutubeDL(object):
value, fmt = repr(value), '%ss' % fmt[:-1] value, fmt = repr(value), '%ss' % fmt[:-1]
if fmt[-1] in 'csr': if fmt[-1] in 'csr':
value = sanitize(mobj['fields'].split('.')[-1], value) value = sanitize(mobj['fields'].split('.')[-1], value)
key += '\0%s' % fmt
key = '%s\0%s' % (key.replace('%', '%\0'), original_fmt)
TMPL_DICT[key] = value TMPL_DICT[key] = value
return '%({key}){fmt}'.format(key=key, fmt=fmt) return f'{prefix}%({key}){fmt}'
return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
@ -986,19 +1010,8 @@ class YoutubeDL(object):
is_id=(k == 'id' or k.endswith('_id'))) is_id=(k == 'id' or k.endswith('_id')))
outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']) outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize) outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
outtmpl = self.escape_outtmpl(self._outtmpl_expandpath(outtmpl))
# expand_path translates '%%' into '%' and '$$' into '$' filename = outtmpl % template_dict
# correspondingly that is not what we want since we need to keep
# '%%' intact for template dict substitution step. Working around
# with boundary-alike separator hack.
sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
# outtmpl should be expand_path'ed before template dict substitution
# because meta fields may contain env variables we don't want to
# be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
# title "Hello $PATH", we don't want `$PATH` to be expanded.
filename = expand_path(outtmpl).replace(sep, '') % template_dict
force_ext = OUTTMPL_TYPES.get(tmpl_type) force_ext = OUTTMPL_TYPES.get(tmpl_type)
if force_ext is not None: if force_ext is not None:
@ -2344,7 +2357,7 @@ class YoutubeDL(object):
if re.match(r'\w+$', tmpl): if re.match(r'\w+$', tmpl):
tmpl = '%({})s'.format(tmpl) tmpl = '%({})s'.format(tmpl)
tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict) tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
self.to_stdout(tmpl % info_copy) self.to_stdout(self.escape_outtmpl(tmpl) % info_copy)
print_mandatory('title') print_mandatory('title')
print_mandatory('id') print_mandatory('id')

View File

@ -23,7 +23,7 @@ class ExecAfterDownloadPP(PostProcessor):
def parse_cmd(self, cmd, info): def parse_cmd(self, cmd, info):
tmpl, tmpl_dict = self._downloader.prepare_outtmpl(cmd, info) tmpl, tmpl_dict = self._downloader.prepare_outtmpl(cmd, info)
if tmpl_dict: # if there are no replacements, tmpl_dict = {} if tmpl_dict: # if there are no replacements, tmpl_dict = {}
return tmpl % tmpl_dict return self._downloader.escape_outtmpl(tmpl) % tmpl_dict
# If no replacements are found, replace {} for backard compatibility # If no replacements are found, replace {} for backard compatibility
if '{}' not in cmd: if '{}' not in cmd:

View File

@ -55,7 +55,7 @@ class MetadataFromFieldPP(PostProcessor):
def run(self, info): def run(self, info):
for dictn in self._data: for dictn in self._data:
tmpl, tmpl_dict = self._downloader.prepare_outtmpl(dictn['tmpl'], info) tmpl, tmpl_dict = self._downloader.prepare_outtmpl(dictn['tmpl'], info)
data_to_parse = tmpl % tmpl_dict data_to_parse = self._downloader.escape_outtmpl(tmpl) % tmpl_dict
self.write_debug('Searching for r"%s" in %s' % (dictn['regex'], dictn['tmpl'])) self.write_debug('Searching for r"%s" in %s' % (dictn['regex'], dictn['tmpl']))
match = re.search(dictn['regex'], data_to_parse) match = re.search(dictn['regex'], data_to_parse)
if match is None: if match is None:

View File

@ -4438,8 +4438,8 @@ OUTTMPL_TYPES = {
# As of [1] format syntax is: # As of [1] format syntax is:
# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
STR_FORMAT_RE = r'''(?x) STR_FORMAT_RE_TMPL = r'''(?x)
(?<!%) (?<!%)(?P<prefix>(?:%%)*)
% %
(?P<has_key>\((?P<key>{0})\))? # mapping key (?P<has_key>\((?P<key>{0})\))? # mapping key
(?P<format> (?P<format>
@ -4447,10 +4447,11 @@ STR_FORMAT_RE = r'''(?x)
(?:\d+)? # minimum field width (optional) (?:\d+)? # minimum field width (optional)
(?:\.\d+)? # precision (optional) (?:\.\d+)? # precision (optional)
[hlL]? # length modifier (optional) [hlL]? # length modifier (optional)
[diouxXeEfFgGcrs] # conversion type {1} # conversion type
) )
''' '''
STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
def limit_length(s, length): def limit_length(s, length):
""" Add ellipses to overly long strings """ """ Add ellipses to overly long strings """