[ie/nfl] Fix extractors (#11409)

Authored by: bashonly
This commit is contained in:
bashonly 2024-11-03 23:53:26 +00:00 committed by GitHub
parent d135823137
commit 838f4385de
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 88 additions and 110 deletions

View File

@ -33,24 +33,6 @@ class AnvatoIE(InfoExtractor):
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' # from anvplayer.min.js _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' # from anvplayer.min.js
_TESTS = [{ _TESTS = [{
# from https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14
'url': 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:899441',
'md5': '921919dab3cd0b849ff3d624831ae3e2',
'info_dict': {
'id': '899441',
'ext': 'mp4',
'title': 'Baker Mayfield\'s game-changing plays from 3-TD game Week 14',
'description': 'md5:85e05a3cc163f8c344340f220521136d',
'upload_date': '20201215',
'timestamp': 1608009755,
'thumbnail': r're:^https?://.*\.jpg',
'uploader': 'NFL',
'tags': ['Baltimore Ravens at Cleveland Browns (2020-REG-14)', 'Baker Mayfield', 'Game Highlights',
'Player Highlights', 'Cleveland Browns', 'league'],
'duration': 157,
'categories': ['Entertainment', 'Game', 'Highlights'],
},
}, {
# from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/ # from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/
'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455', 'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455',
'md5': '837718bcfb3a7778d022f857f7a9b19e', 'md5': '837718bcfb3a7778d022f857f7a9b19e',
@ -241,31 +223,6 @@ class AnvatoIE(InfoExtractor):
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582', 'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582',
} }
def _generate_nfl_token(self, anvack, mcp_id):
reroute = self._download_json(
'https://api.nfl.com/v1/reroute', mcp_id, data=b'grant_type=client_credentials',
headers={'X-Domain-Id': 100}, note='Fetching token info')
token_type = reroute.get('token_type') or 'Bearer'
auth_token = f'{token_type} {reroute["access_token"]}'
response = self._download_json(
'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({
'query': '''{
viewer {
mediaToken(anvack: "%s", id: %s) {
token
}
}
}''' % (anvack, mcp_id), # noqa: UP031
}).encode(), headers={
'Authorization': auth_token,
'Content-Type': 'application/json',
}, note='Fetching NFL API token')
return traverse_obj(response, ('data', 'viewer', 'mediaToken', 'token'))
_TOKEN_GENERATORS = {
'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': _generate_nfl_token,
}
def _server_time(self, access_key, video_id): def _server_time(self, access_key, video_id):
return int_or_none(traverse_obj(self._download_json( return int_or_none(traverse_obj(self._download_json(
f'{self._API_BASE_URL}/server_time', video_id, query={'anvack': access_key}, f'{self._API_BASE_URL}/server_time', video_id, query={'anvack': access_key},
@ -290,8 +247,6 @@ class AnvatoIE(InfoExtractor):
} }
if extracted_token is not None: if extracted_token is not None:
api['anvstk2'] = extracted_token api['anvstk2'] = extracted_token
elif self._TOKEN_GENERATORS.get(access_key) is not None:
api['anvstk2'] = self._TOKEN_GENERATORS[access_key](self, access_key, video_id)
elif self._ANVACK_TABLE.get(access_key) is not None: elif self._ANVACK_TABLE.get(access_key) is not None:
api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}') api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}')
else: else:

View File

@ -11,9 +11,12 @@ from ..utils import (
clean_html, clean_html,
determine_ext, determine_ext,
get_element_by_class, get_element_by_class,
traverse_obj, int_or_none,
make_archive_id,
url_or_none,
urlencode_postdata, urlencode_postdata,
) )
from ..utils.traversal import traverse_obj
class NFLBaseIE(InfoExtractor): class NFLBaseIE(InfoExtractor):
@ -75,22 +78,15 @@ class NFLBaseIE(InfoExtractor):
'osVersion': '10.0', 'osVersion': '10.0',
}, separators=(',', ':')).encode()).decode(), }, separators=(',', ':')).encode()).decode(),
'networkType': 'other', 'networkType': 'other',
'nflClaimGroupsToAdd': [], 'peacockUUID': 'undefined',
'nflClaimGroupsToRemove': [],
} }
_ACCOUNT_INFO = {} _ACCOUNT_INFO = {}
_API_KEY = None _API_KEY = '3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f'
_TOKEN = None _TOKEN = None
_TOKEN_EXPIRY = 0 _TOKEN_EXPIRY = 0
def _get_account_info(self, url, slug): def _get_account_info(self):
if not self._API_KEY:
webpage = self._download_webpage(url, slug, fatal=False) or ''
self._API_KEY = self._search_regex(
r'window\.gigyaApiKey\s*=\s*["\'](\w+)["\'];', webpage, 'API key',
fatal=False) or '3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f'
cookies = self._get_cookies('https://auth-id.nfl.com/') cookies = self._get_cookies('https://auth-id.nfl.com/')
login_token = traverse_obj(cookies, ( login_token = traverse_obj(cookies, (
(f'glt_{self._API_KEY}', lambda k, _: k.startswith('glt_')), {lambda x: x.value}), get_all=False) (f'glt_{self._API_KEY}', lambda k, _: k.startswith('glt_')), {lambda x: x.value}), get_all=False)
@ -103,7 +99,7 @@ class NFLBaseIE(InfoExtractor):
'or else try using --cookies-from-browser instead', expected=True) 'or else try using --cookies-from-browser instead', expected=True)
account = self._download_json( account = self._download_json(
'https://auth-id.nfl.com/accounts.getAccountInfo', slug, 'https://auth-id.nfl.com/accounts.getAccountInfo', None,
note='Downloading account info', data=urlencode_postdata({ note='Downloading account info', data=urlencode_postdata({
'include': 'profile,data', 'include': 'profile,data',
'lang': 'en', 'lang': 'en',
@ -111,7 +107,7 @@ class NFLBaseIE(InfoExtractor):
'sdk': 'js_latest', 'sdk': 'js_latest',
'login_token': login_token, 'login_token': login_token,
'authMode': 'cookie', 'authMode': 'cookie',
'pageURL': url, 'pageURL': 'https://www.nfl.com/',
'sdkBuild': traverse_obj(cookies, ( 'sdkBuild': traverse_obj(cookies, (
'gig_canary_ver', {lambda x: x.value.partition('-')[0]}), default='15170'), 'gig_canary_ver', {lambda x: x.value.partition('-')[0]}), default='15170'),
'format': 'json', 'format': 'json',
@ -126,53 +122,76 @@ class NFLBaseIE(InfoExtractor):
if len(self._ACCOUNT_INFO) != 3: if len(self._ACCOUNT_INFO) != 3:
raise ExtractorError('Failed to retrieve account info with provided cookies', expected=True) raise ExtractorError('Failed to retrieve account info with provided cookies', expected=True)
def _get_auth_token(self, url, slug): def _get_auth_token(self):
if self._TOKEN and self._TOKEN_EXPIRY > int(time.time() + 30): if self._TOKEN and self._TOKEN_EXPIRY > int(time.time() + 30):
return return
if not self._ACCOUNT_INFO:
self._get_account_info(url, slug)
token = self._download_json( token = self._download_json(
'https://api.nfl.com/identity/v3/token%s' % ( 'https://api.nfl.com/identity/v3/token%s' % (
'/refresh' if self._ACCOUNT_INFO.get('refreshToken') else ''), '/refresh' if self._ACCOUNT_INFO.get('refreshToken') else ''),
slug, headers={'Content-Type': 'application/json'}, note='Downloading access token', None, headers={'Content-Type': 'application/json'}, note='Downloading access token',
data=json.dumps({**self._CLIENT_DATA, **self._ACCOUNT_INFO}, separators=(',', ':')).encode()) data=json.dumps({**self._CLIENT_DATA, **self._ACCOUNT_INFO}, separators=(',', ':')).encode())
self._TOKEN = token['accessToken'] self._TOKEN = token['accessToken']
self._TOKEN_EXPIRY = token['expiresIn'] self._TOKEN_EXPIRY = token['expiresIn']
self._ACCOUNT_INFO['refreshToken'] = token['refreshToken'] self._ACCOUNT_INFO['refreshToken'] = token['refreshToken']
def _extract_video(self, mcp_id, is_live=False):
self._get_auth_token()
data = self._download_json(
f'https://api.nfl.com/play/v1/asset/{mcp_id}', mcp_id, headers={
'Authorization': f'Bearer {self._TOKEN}',
'Accept': 'application/json',
'Content-Type': 'application/json',
}, data=json.dumps({'init': True, 'live': is_live}, separators=(',', ':')).encode())
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
data['accessUrl'], mcp_id, 'mp4', m3u8_id='hls')
return {
'id': mcp_id,
'formats': formats,
'subtitles': subtitles,
'is_live': is_live,
'_old_archive_ids': [make_archive_id(AnvatoIE, mcp_id)],
**traverse_obj(data, ('metadata', {
'title': ('event', ('def_title', 'friendlyName'), {str}, any),
'description': ('event', 'def_description', {str}),
'duration': ('event', 'duration', {int_or_none}),
'thumbnails': ('thumbnails', ..., 'url', {'url': {url_or_none}}),
})),
}
def _parse_video_config(self, video_config, display_id): def _parse_video_config(self, video_config, display_id):
video_config = self._parse_json(video_config, display_id) video_config = self._parse_json(video_config, display_id)
is_live = traverse_obj(video_config, ('live', {bool})) or False
item = video_config['playlist'][0] item = video_config['playlist'][0]
mcp_id = item.get('mcpID') if mcp_id := item.get('mcpID'):
if mcp_id: return self._extract_video(mcp_id, is_live=is_live)
info = self.url_result(f'{self._ANVATO_PREFIX}{mcp_id}', AnvatoIE, mcp_id)
else: info = {'id': item.get('id') or item['entityId']}
media_id = item.get('id') or item['entityId']
title = item.get('title')
item_url = item['url'] item_url = item['url']
info = {'id': media_id}
ext = determine_ext(item_url) ext = determine_ext(item_url)
if ext == 'm3u8': if ext == 'm3u8':
info['formats'] = self._extract_m3u8_formats(item_url, media_id, 'mp4') info['formats'] = self._extract_m3u8_formats(item_url, info['id'], 'mp4')
else: else:
info['url'] = item_url info['url'] = item_url
if item.get('audio') is True: if item.get('audio') is True:
info['vcodec'] = 'none' info['vcodec'] = 'none'
is_live = video_config.get('live') is True
thumbnails = None thumbnails = None
image_url = item.get(item.get('imageSrc')) or item.get(item.get('posterImage')) if image_url := traverse_obj(item, 'imageSrc', 'posterImage', expected_type=url_or_none):
if image_url:
thumbnails = [{ thumbnails = [{
'url': image_url, 'url': image_url,
'ext': determine_ext(image_url, 'jpg'), 'ext': determine_ext(image_url, 'jpg'),
}] }]
info.update({ info.update({
'title': title, **traverse_obj(item, {
'title': ('title', {str}),
'description': ('description', {clean_html}),
}),
'is_live': is_live, 'is_live': is_live,
'description': clean_html(item.get('description')),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
}) })
return info return info
@ -188,24 +207,20 @@ class NFLIE(NFLBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': "Baker Mayfield's game-changing plays from 3-TD game Week 14", 'title': "Baker Mayfield's game-changing plays from 3-TD game Week 14",
'description': 'md5:85e05a3cc163f8c344340f220521136d', 'description': 'md5:85e05a3cc163f8c344340f220521136d',
'upload_date': '20201215', 'thumbnail': r're:https?://.+\.jpg',
'timestamp': 1608009755,
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'NFL',
'tags': 'count:6',
'duration': 157, 'duration': 157,
'categories': 'count:3', '_old_archive_ids': ['anvato 899441'],
}, },
}, { }, {
'url': 'https://www.chiefs.com/listen/patrick-mahomes-travis-kelce-react-to-win-over-dolphins-the-breakdown', 'url': 'https://www.chiefs.com/listen/patrick-mahomes-travis-kelce-react-to-win-over-dolphins-the-breakdown',
'md5': '6886b32c24b463038c760ceb55a34566', 'md5': '92a517f05bd3eb50fe50244bc621aec8',
'info_dict': { 'info_dict': {
'id': 'd87e8790-3e14-11eb-8ceb-ff05c2867f99', 'id': '8b7c3625-a461-4751-8db4-85f536f2bbd0',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Patrick Mahomes, Travis Kelce React to Win Over Dolphins | The Breakdown', 'title': 'Patrick Mahomes, Travis Kelce React to Win Over Dolphins | The Breakdown',
'description': 'md5:12ada8ee70e6762658c30e223e095075', 'description': 'md5:12ada8ee70e6762658c30e223e095075',
'thumbnail': 'https://static.clubs.nfl.com/image/private/t_editorial_landscape_12_desktop/v1571153441/chiefs/rfljejccnyhhkpkfq855',
}, },
'skip': 'HTTP Error 404: Not Found',
}, { }, {
'url': 'https://www.buffalobills.com/video/buffalo-bills-military-recognition-week-14', 'url': 'https://www.buffalobills.com/video/buffalo-bills-military-recognition-week-14',
'only_matching': True, 'only_matching': True,
@ -236,13 +251,16 @@ class NFLArticleIE(NFLBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
entries = []
def entries():
for video_config in re.findall(self._VIDEO_CONFIG_REGEX, webpage): for video_config in re.findall(self._VIDEO_CONFIG_REGEX, webpage):
entries.append(self._parse_video_config(video_config, display_id)) yield self._parse_video_config(video_config, display_id)
title = clean_html(get_element_by_class( title = clean_html(get_element_by_class(
'nfl-c-article__title', webpage)) or self._html_search_meta( 'nfl-c-article__title', webpage)) or self._html_search_meta(
['og:title', 'twitter:title'], webpage) ['og:title', 'twitter:title'], webpage)
return self.playlist_result(entries, display_id, title)
return self.playlist_result(entries(), display_id, title)
class NFLPlusReplayIE(NFLBaseIE): class NFLPlusReplayIE(NFLBaseIE):
@ -307,6 +325,9 @@ class NFLPlusReplayIE(NFLBaseIE):
'all_22': 'All-22', 'all_22': 'All-22',
} }
def _real_initialize(self):
self._get_account_info()
def _real_extract(self, url): def _real_extract(self, url):
slug, video_id = self._match_valid_url(url).group('slug', 'id') slug, video_id = self._match_valid_url(url).group('slug', 'id')
requested_types = self._configuration_arg('type', ['all']) requested_types = self._configuration_arg('type', ['all'])
@ -315,7 +336,7 @@ class NFLPlusReplayIE(NFLBaseIE):
requested_types = traverse_obj(self._REPLAY_TYPES, (None, requested_types)) requested_types = traverse_obj(self._REPLAY_TYPES, (None, requested_types))
if not video_id: if not video_id:
self._get_auth_token(url, slug) self._get_auth_token()
headers = {'Authorization': f'Bearer {self._TOKEN}'} headers = {'Authorization': f'Bearer {self._TOKEN}'}
game_id = self._download_json( game_id = self._download_json(
f'https://api.nfl.com/football/v2/games/externalId/slug/{slug}', slug, f'https://api.nfl.com/football/v2/games/externalId/slug/{slug}', slug,
@ -328,14 +349,13 @@ class NFLPlusReplayIE(NFLBaseIE):
'items', lambda _, v: v['subType'] == requested_types[0], 'mcpPlaybackId'), get_all=False) 'items', lambda _, v: v['subType'] == requested_types[0], 'mcpPlaybackId'), get_all=False)
if video_id: if video_id:
return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id) return self._extract_video(video_id)
def entries(): def entries():
for replay in traverse_obj( for replay in traverse_obj(
replays, ('items', lambda _, v: v['mcpPlaybackId'] and v['subType'] in requested_types), replays, ('items', lambda _, v: v['mcpPlaybackId'] and v['subType'] in requested_types),
): ):
video_id = replay['mcpPlaybackId'] yield self._extract_video(replay['mcpPlaybackId'])
yield self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id)
return self.playlist_result(entries(), slug) return self.playlist_result(entries(), slug)
@ -362,12 +382,15 @@ class NFLPlusEpisodeIE(NFLBaseIE):
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}] }]
def _real_initialize(self):
self._get_account_info()
def _real_extract(self, url): def _real_extract(self, url):
slug = self._match_id(url) slug = self._match_id(url)
self._get_auth_token(url, slug) self._get_auth_token()
video_id = self._download_json( video_id = self._download_json(
f'https://api.nfl.com/content/v1/videos/episodes/{slug}', slug, headers={ f'https://api.nfl.com/content/v1/videos/episodes/{slug}', slug, headers={
'Authorization': f'Bearer {self._TOKEN}', 'Authorization': f'Bearer {self._TOKEN}',
})['mcpPlaybackId'] })['mcpPlaybackId']
return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id) return self._extract_video(video_id)