Compare commits

..

5 Commits

Author SHA1 Message Date
coletdjnz
c7dcf0b31e
[extractor/youtube] Add androidSdkVersion parameter to Android Innertube clients
Required to prevent YouTube returning a bad player response in some cases.

See: https://github.com/yt-dlp/yt-dlp/pull/4593, https://github.com/TeamNewPipe/NewPipe/issues/8713, https://github.com/iv-org/invidious/issues/3230, https://github.com/Tyrrrz/YoutubeExplode/issues/647

Authored by: coletdjnz
2022-08-08 12:03:10 +12:00
Djeson
298d9c0e89
[extractor/ninegag] Extract uploader (#4597)
Closes #4587
Authored by: DjesonPV
2022-08-08 01:51:53 +05:30
pukkandan
a416623436
[extractor/youtube] Extract more format info 2022-08-08 01:47:07 +05:30
pukkandan
b8ed0f15d4
[extractor] Add field audio_channels 2022-08-08 01:35:36 +05:30
pukkandan
22b22b7d5c
[extractor/WASDTV:record] Fix _VALID_URL 2022-08-07 21:48:40 +05:30
6 changed files with 65 additions and 22 deletions

View File

@ -1276,6 +1276,7 @@ The available fields are:
- `vbr` (numeric): Average video bitrate in KBit/s - `vbr` (numeric): Average video bitrate in KBit/s
- `fps` (numeric): Frame rate - `fps` (numeric): Frame rate
- `dynamic_range` (string): The dynamic range of the video - `dynamic_range` (string): The dynamic range of the video
- `audio_channels` (numeric): The number of audio channels
- `stretched_ratio` (float): `width:height` of the video's pixels, if not square - `stretched_ratio` (float): `width:height` of the video's pixels, if not square
- `vcodec` (string): Name of the video codec in use - `vcodec` (string): Name of the video codec in use
- `container` (string): Name of the container format - `container` (string): Name of the container format
@ -1529,6 +1530,7 @@ The available fields are:
- `res`: Video resolution, calculated as the smallest dimension. - `res`: Video resolution, calculated as the smallest dimension.
- `fps`: Framerate of video - `fps`: Framerate of video
- `hdr`: The dynamic range of the video (`DV` > `HDR12` > `HDR10+` > `HDR10` > `HLG` > `SDR`) - `hdr`: The dynamic range of the video (`DV` > `HDR12` > `HDR10+` > `HDR10` > `HLG` > `SDR`)
- `channels`: The number of audio channels
- `tbr`: Total average bitrate in KBit/s - `tbr`: Total average bitrate in KBit/s
- `vbr`: Average video bitrate in KBit/s - `vbr`: Average video bitrate in KBit/s
- `abr`: Average audio bitrate in KBit/s - `abr`: Average audio bitrate in KBit/s

View File

@ -527,7 +527,8 @@ class YoutubeDL:
""" """
_NUMERIC_FIELDS = { _NUMERIC_FIELDS = {
'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx', 'width', 'height', 'asr', 'audio_channels', 'fps',
'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
'timestamp', 'release_timestamp', 'timestamp', 'release_timestamp',
'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count', 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
'average_rating', 'comment_count', 'age_limit', 'average_rating', 'comment_count', 'age_limit',
@ -539,7 +540,7 @@ class YoutubeDL:
_format_fields = { _format_fields = {
# NB: Keep in sync with the docstring of extractor/common.py # NB: Keep in sync with the docstring of extractor/common.py
'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note', 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
'preference', 'language', 'language_preference', 'quality', 'source_preference', 'preference', 'language', 'language_preference', 'quality', 'source_preference',
@ -2129,6 +2130,7 @@ class YoutubeDL:
'acodec': the_only_audio.get('acodec'), 'acodec': the_only_audio.get('acodec'),
'abr': the_only_audio.get('abr'), 'abr': the_only_audio.get('abr'),
'asr': the_only_audio.get('asr'), 'asr': the_only_audio.get('asr'),
'audio_channels': the_only_audio.get('audio_channels')
}) })
return new_dict return new_dict
@ -3569,6 +3571,7 @@ class YoutubeDL:
format_field(f, func=self.format_resolution, ignore=('audio only', 'images')), format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
format_field(f, 'fps', '\t%d', func=round), format_field(f, 'fps', '\t%d', func=round),
format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''), format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
format_field(f, 'audio_channels', '\t%s'),
delim, delim,
format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes), format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
format_field(f, 'tbr', '\t%dk', func=round), format_field(f, 'tbr', '\t%dk', func=round),
@ -3588,7 +3591,7 @@ class YoutubeDL:
delim=' '), delim=' '),
] for f in formats if f.get('preference') is None or f['preference'] >= -1000] ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
header_line = self._list_format_headers( header_line = self._list_format_headers(
'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO', 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO') delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
return render_table( return render_table(

View File

@ -154,6 +154,7 @@ class InfoExtractor:
* abr Average audio bitrate in KBit/s * abr Average audio bitrate in KBit/s
* acodec Name of the audio codec in use * acodec Name of the audio codec in use
* asr Audio sampling rate in Hertz * asr Audio sampling rate in Hertz
* audio_channels Number of audio channels
* vbr Average video bitrate in KBit/s * vbr Average video bitrate in KBit/s
* fps Frame rate * fps Frame rate
* vcodec Name of the video codec in use * vcodec Name of the video codec in use
@ -1668,7 +1669,7 @@ class InfoExtractor:
regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$' regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$'
default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality', default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
'res', 'fps', 'hdr:12', 'codec:vp9.2', 'size', 'br', 'asr', 'res', 'fps', 'hdr:12', 'channels', 'codec:vp9.2', 'size', 'br', 'asr',
'proto', 'ext', 'hasaud', 'source', 'id') # These must not be aliases 'proto', 'ext', 'hasaud', 'source', 'id') # These must not be aliases
ytdl_default = ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr', ytdl_default = ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr',
'height', 'width', 'proto', 'vext', 'abr', 'aext', 'height', 'width', 'proto', 'vext', 'abr', 'aext',
@ -1704,6 +1705,7 @@ class InfoExtractor:
'height': {'convert': 'float_none'}, 'height': {'convert': 'float_none'},
'width': {'convert': 'float_none'}, 'width': {'convert': 'float_none'},
'fps': {'convert': 'float_none'}, 'fps': {'convert': 'float_none'},
'channels': {'convert': 'float_none', 'field': 'audio_channels'},
'tbr': {'convert': 'float_none'}, 'tbr': {'convert': 'float_none'},
'vbr': {'convert': 'float_none'}, 'vbr': {'convert': 'float_none'},
'abr': {'convert': 'float_none'}, 'abr': {'convert': 'float_none'},
@ -1717,13 +1719,14 @@ class InfoExtractor:
'res': {'type': 'multiple', 'field': ('height', 'width'), 'res': {'type': 'multiple', 'field': ('height', 'width'),
'function': lambda it: (lambda l: min(l) if l else 0)(tuple(filter(None, it)))}, 'function': lambda it: (lambda l: min(l) if l else 0)(tuple(filter(None, it)))},
# For compatibility with youtube-dl # Actual field names
'format_id': {'type': 'alias', 'field': 'id'}, 'format_id': {'type': 'alias', 'field': 'id'},
'preference': {'type': 'alias', 'field': 'ie_pref'}, 'preference': {'type': 'alias', 'field': 'ie_pref'},
'language_preference': {'type': 'alias', 'field': 'lang'}, 'language_preference': {'type': 'alias', 'field': 'lang'},
'source_preference': {'type': 'alias', 'field': 'source'}, 'source_preference': {'type': 'alias', 'field': 'source'},
'protocol': {'type': 'alias', 'field': 'proto'}, 'protocol': {'type': 'alias', 'field': 'proto'},
'filesize_approx': {'type': 'alias', 'field': 'fs_approx'}, 'filesize_approx': {'type': 'alias', 'field': 'fs_approx'},
'audio_channels': {'type': 'alias', 'field': 'channels'},
# Deprecated # Deprecated
'dimension': {'type': 'alias', 'field': 'res', 'deprecated': True}, 'dimension': {'type': 'alias', 'field': 'res', 'deprecated': True},

View File

@ -3,7 +3,7 @@ from ..utils import (
ExtractorError, ExtractorError,
determine_ext, determine_ext,
int_or_none, int_or_none,
try_get, traverse_obj,
unescapeHTML, unescapeHTML,
url_or_none, url_or_none,
) )
@ -11,18 +11,20 @@ from ..utils import (
class NineGagIE(InfoExtractor): class NineGagIE(InfoExtractor):
IE_NAME = '9gag' IE_NAME = '9gag'
IE_DESC = '9GAG'
_VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)' _VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://9gag.com/gag/ae5Ag7B', 'url': 'https://9gag.com/gag/ae5Ag7B',
'info_dict': { 'info_dict': {
'id': 'ae5Ag7B', 'id': 'ae5Ag7B',
'ext': 'mp4', 'ext': 'webm',
'title': 'Capybara Agility Training', 'title': 'Capybara Agility Training',
'upload_date': '20191108', 'upload_date': '20191108',
'timestamp': 1573237208, 'timestamp': 1573237208,
'thumbnail': 'https://img-9gag-fun.9cache.com/photo/ae5Ag7B_460s.jpg',
'categories': ['Awesome'], 'categories': ['Awesome'],
'tags': ['Weimaraner', 'American Pit Bull Terrier'], 'tags': ['Awesome'],
'duration': 44, 'duration': 44,
'like_count': int, 'like_count': int,
'dislike_count': int, 'dislike_count': int,
@ -32,6 +34,26 @@ class NineGagIE(InfoExtractor):
# HTML escaped title # HTML escaped title
'url': 'https://9gag.com/gag/av5nvyb', 'url': 'https://9gag.com/gag/av5nvyb',
'only_matching': True, 'only_matching': True,
}, {
# Non Anonymous Uploader
'url': 'https://9gag.com/gag/ajgp66G',
'info_dict': {
'id': 'ajgp66G',
'ext': 'webm',
'title': 'Master Shifu! Or Splinter! You decide:',
'upload_date': '20220806',
'timestamp': 1659803411,
'thumbnail': 'https://img-9gag-fun.9cache.com/photo/ajgp66G_460s.jpg',
'categories': ['Funny'],
'tags': ['Funny'],
'duration': 26,
'like_count': int,
'dislike_count': int,
'comment_count': int,
'uploader': 'Peter Klaus',
'uploader_id': 'peterklaus12',
'uploader_url': 'https://9gag.com/u/peterklaus12',
}
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -46,8 +68,6 @@ class NineGagIE(InfoExtractor):
'The given url does not contain a video', 'The given url does not contain a video',
expected=True) expected=True)
title = unescapeHTML(post['title'])
duration = None duration = None
formats = [] formats = []
thumbnails = [] thumbnails = []
@ -98,7 +118,7 @@ class NineGagIE(InfoExtractor):
formats.append(common) formats.append(common)
self._sort_formats(formats) self._sort_formats(formats)
section = try_get(post, lambda x: x['postSection']['name']) section = traverse_obj(post, ('postSection', 'name'))
tags = None tags = None
post_tags = post.get('tags') post_tags = post.get('tags')
@ -110,18 +130,19 @@ class NineGagIE(InfoExtractor):
continue continue
tags.append(tag_key) tags.append(tag_key)
get_count = lambda x: int_or_none(post.get(x + 'Count'))
return { return {
'id': post_id, 'id': post_id,
'title': title, 'title': unescapeHTML(post.get('title')),
'timestamp': int_or_none(post.get('creationTs')), 'timestamp': int_or_none(post.get('creationTs')),
'duration': duration, 'duration': duration,
'uploader': traverse_obj(post, ('creator', 'fullName')),
'uploader_id': traverse_obj(post, ('creator', 'username')),
'uploader_url': url_or_none(traverse_obj(post, ('creator', 'profileUrl'))),
'formats': formats, 'formats': formats,
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'like_count': get_count('upVote'), 'like_count': int_or_none(post.get('upVoteCount')),
'dislike_count': get_count('downVote'), 'dislike_count': int_or_none(post.get('downVoteCount')),
'comment_count': get_count('comments'), 'comment_count': int_or_none(post.get('commentsCount')),
'age_limit': 18 if post.get('nsfw') == 1 else None, 'age_limit': 18 if post.get('nsfw') == 1 else None,
'categories': [section] if section else None, 'categories': [section] if section else None,
'tags': tags, 'tags': tags,

View File

@ -95,7 +95,7 @@ class WASDTVStreamIE(WASDTVBaseIE):
class WASDTVRecordIE(WASDTVBaseIE): class WASDTVRecordIE(WASDTVBaseIE):
IE_NAME = 'wasdtv:record' IE_NAME = 'wasdtv:record'
_VALID_URL = r'https?://wasd\.tv/[^/#?]+/videos\?record=(?P<id>\d+)$' _VALID_URL = r'https?://wasd\.tv/[^/#?]+(?:/videos)?\?record=(?P<id>\d+)$'
_TESTS = [{ _TESTS = [{
'url': 'https://wasd.tv/spacemita/videos?record=907755', 'url': 'https://wasd.tv/spacemita/videos?record=907755',
'md5': 'c9899dd85be4cc997816ff9f9ca516ce', 'md5': 'c9899dd85be4cc997816ff9f9ca516ce',
@ -110,6 +110,9 @@ class WASDTVRecordIE(WASDTVBaseIE):
'is_live': False, 'is_live': False,
'view_count': int, 'view_count': int,
}, },
}, {
'url': 'https://wasd.tv/spacemita?record=907755',
'only_matching': True,
}] }]
def _get_container(self, url): def _get_container(self, url):

View File

@ -109,7 +109,8 @@ INNERTUBE_CLIENTS = {
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'ANDROID', 'clientName': 'ANDROID',
'clientVersion': '17.28.34', 'clientVersion': '17.29.34',
'androidSdkVersion': 30
} }
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 3, 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
@ -120,7 +121,8 @@ INNERTUBE_CLIENTS = {
'INNERTUBE_CONTEXT': { 'INNERTUBE_CONTEXT': {
'client': { 'client': {
'clientName': 'ANDROID_EMBEDDED_PLAYER', 'clientName': 'ANDROID_EMBEDDED_PLAYER',
'clientVersion': '17.28.34', 'clientVersion': '17.29.34',
'androidSdkVersion': 30
}, },
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 55, 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
@ -132,6 +134,7 @@ INNERTUBE_CLIENTS = {
'client': { 'client': {
'clientName': 'ANDROID_MUSIC', 'clientName': 'ANDROID_MUSIC',
'clientVersion': '5.16.51', 'clientVersion': '5.16.51',
'androidSdkVersion': 30
} }
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 21, 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
@ -143,6 +146,7 @@ INNERTUBE_CLIENTS = {
'client': { 'client': {
'clientName': 'ANDROID_CREATOR', 'clientName': 'ANDROID_CREATOR',
'clientVersion': '22.28.100', 'clientVersion': '22.28.100',
'androidSdkVersion': 30
}, },
}, },
'INNERTUBE_CONTEXT_CLIENT_NAME': 14, 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
@ -2254,6 +2258,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'tags': [], 'tags': [],
'uploader_url': 'http://www.youtube.com/user/nao20010128nao', 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
} }
}, {
'note': '6 channel audio',
'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
'only_matching': True,
} }
] ]
@ -3138,7 +3146,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId')) pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
if pr_video_id and pr_video_id != video_id: if pr_video_id and pr_video_id != video_id:
self.report_warning( self.report_warning(
f'{client} client returned a player response for "{pr_video_id}" instead of "{video_id}"' + bug_reports_message()) f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
else: else:
prs.append(pr) prs.append(pr)
@ -3253,10 +3261,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'%s%s' % (audio_track.get('displayName') or '', '%s%s' % (audio_track.get('displayName') or '',
' (default)' if language_preference > 0 else ''), ' (default)' if language_preference > 0 else ''),
fmt.get('qualityLabel') or quality.replace('audio_quality_', ''), fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '), throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
'source_preference': -10 if throttled else -5 if itag == '22' else -1, 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
'fps': int_or_none(fmt.get('fps')) or None, 'fps': int_or_none(fmt.get('fps')) or None,
'audio_channels': fmt.get('audioChannels'),
'height': height, 'height': height,
'quality': q(quality), 'quality': q(quality),
'has_drm': bool(fmt.get('drmFamilies')), 'has_drm': bool(fmt.get('drmFamilies')),
@ -3577,7 +3588,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
formats.extend(self._extract_storyboard(player_responses, duration)) formats.extend(self._extract_storyboard(player_responses, duration))
# source_preference is lower for throttled/potentially damaged formats # source_preference is lower for throttled/potentially damaged formats
self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto')) self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'channels', 'source', 'codec:vp9.2', 'lang', 'proto'))
info = { info = {
'id': video_id, 'id': video_id,