Compare commits

...

3 Commits

Author SHA1 Message Date
Bricio
2068a60318
[generic] Set rss guid as video id (#2741)
Closes #2424
Authored by: Bricio
2022-02-11 15:32:58 -08:00
Lukas Fink
1ce9a3cb49
Add regex operator and quoting to format filters (#2698)
Closes #2681 
Authored by: lukasfink1
2022-02-11 13:35:34 -08:00
pukkandan
d49f8db39f
[utils] Validate DateRange input
Closes #2641
2022-02-12 02:46:05 +05:30
4 changed files with 40 additions and 13 deletions

View File

@ -1399,7 +1399,7 @@ The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `
- `asr`: Audio sampling rate in Hertz
- `fps`: Frame rate
Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains) and following string meta fields:
Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains), `~=` (matches regex) and following string meta fields:
- `ext`: File extension
- `acodec`: Name of the audio codec in use
@ -1409,7 +1409,7 @@ Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends
- `format_id`: A short description of the format
- `language`: Language code
Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain).
Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain). The comparand of a string comparison needs to be quoted with either double or single quotes if it contains spaces or special characters other than `._-`.
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering.
@ -1552,8 +1552,9 @@ $ yt-dlp -S "proto"
# Download the best video with h264 codec, or the best video if there is no such video
$ yt-dlp -f "(bv*[vcodec^=avc1]+ba) / (bv*+ba/b)"
# Download the best video with either h264 or h265 codec,
# or the best video if there is no such video
$ yt-dlp -f "(bv*[vcodec~='^((he|a)vc|h26[45])']+ba) / (bv*+ba/b)"
# Download the best video with best codec no better than h264,
# or the best video with worst codec if there is no such video

View File

@ -1842,15 +1842,21 @@ class YoutubeDL(object):
'^=': lambda attr, value: attr.startswith(value),
'$=': lambda attr, value: attr.endswith(value),
'*=': lambda attr, value: value in attr,
'~=': lambda attr, value: value.search(attr) is not None
}
str_operator_rex = re.compile(r'''(?x)\s*
(?P<key>[a-zA-Z0-9._-]+)\s*
(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
(?P<value>[a-zA-Z0-9._-]+)\s*
(?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
(?P<quote>["'])?
(?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
(?(quote)(?P=quote))\s*
''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
m = str_operator_rex.fullmatch(filter_spec)
if m:
comparison_value = m.group('value')
if m.group('op') == '~=':
comparison_value = re.compile(m.group('value'))
else:
comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
str_op = STR_OPERATORS[m.group('op')]
if m.group('negation'):
op = lambda attr, value: not str_op(attr, value)

View File

@ -213,7 +213,7 @@ class GenericIE(InfoExtractor):
{
'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
'info_dict': {
'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
'id': 'https://phihag.de/2014/youtube-dl/rss2.xml',
'title': 'Zero Punctuation',
'description': 're:.*groundbreaking video review series.*'
},
@ -258,6 +258,9 @@ class GenericIE(InfoExtractor):
'episode_number': 1,
'season_number': 1,
'age_limit': 0,
'season': 'Season 1',
'direct': True,
'episode': 'Episode 1',
},
}],
'params': {
@ -274,6 +277,16 @@ class GenericIE(InfoExtractor):
},
'playlist_mincount': 100,
},
# RSS feed with guid
{
'url': 'https://www.omnycontent.com/d/playlist/a7b4f8fe-59d9-4afc-a79a-a90101378abf/bf2c1d80-3656-4449-9d00-a903004e8f84/efbff746-e7c1-463a-9d80-a903004e8f8f/podcast.rss',
'info_dict': {
'id': 'https://www.omnycontent.com/d/playlist/a7b4f8fe-59d9-4afc-a79a-a90101378abf/bf2c1d80-3656-4449-9d00-a903004e8f84/efbff746-e7c1-463a-9d80-a903004e8f8f/podcast.rss',
'description': 'md5:be809a44b63b0c56fb485caf68685520',
'title': 'The Little Red Podcast',
},
'playlist_mincount': 76,
},
# SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
{
'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
@ -2518,6 +2531,9 @@ class GenericIE(InfoExtractor):
if not next_url:
continue
if it.find('guid').text is not None:
next_url = smuggle_url(next_url, {'force_videoid': it.find('guid').text})
def itunes(key):
return xpath_text(
it, xpath_with_ns('./itunes:%s' % key, NS_MAP),

View File

@ -1832,7 +1832,7 @@ def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
"""
Return a datetime object from a string in the format YYYYMMDD or
(now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
(now|today|yesterday|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
format: string date format used to return datetime object from
precision: round the time portion of a datetime object.
@ -1871,13 +1871,17 @@ def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
return datetime_round(datetime.datetime.strptime(date_str, format), precision)
def date_from_str(date_str, format='%Y%m%d'):
def date_from_str(date_str, format='%Y%m%d', strict=False):
"""
Return a datetime object from a string in the format YYYYMMDD or
(now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
(now|today|yesterday|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
If "strict", only (now|today)[+-][0-9](day|week|month|year)(s)? is allowed
format: string date format used to return datetime object from
"""
if strict and not re.fullmatch(r'\d{8}|(now|today)[+-]\d+(day|week|month|year)(s)?', date_str):
raise ValueError(f'Invalid date format {date_str}')
return datetime_from_str(date_str, precision='microsecond', format=format).date()
@ -1924,11 +1928,11 @@ class DateRange(object):
def __init__(self, start=None, end=None):
"""start and end must be strings in the format accepted by date"""
if start is not None:
self.start = date_from_str(start)
self.start = date_from_str(start, strict=True)
else:
self.start = datetime.datetime.min.date()
if end is not None:
self.end = date_from_str(end)
self.end = date_from_str(end, strict=True)
else:
self.end = datetime.datetime.max.date()
if self.start > self.end: