From 838f4385de8300a4dd4e7ffbbf0e5b7b85fb52c2 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 3 Nov 2024 23:53:26 +0000 Subject: [PATCH] [ie/nfl] Fix extractors (#11409) Authored by: bashonly --- yt_dlp/extractor/anvato.py | 45 ----------- yt_dlp/extractor/nfl.py | 153 +++++++++++++++++++++---------------- 2 files changed, 88 insertions(+), 110 deletions(-) diff --git a/yt_dlp/extractor/anvato.py b/yt_dlp/extractor/anvato.py index bf3d60b5e..ba1d7df37 100644 --- a/yt_dlp/extractor/anvato.py +++ b/yt_dlp/extractor/anvato.py @@ -33,24 +33,6 @@ class AnvatoIE(InfoExtractor): _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' # from anvplayer.min.js _TESTS = [{ - # from https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14 - 'url': 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:899441', - 'md5': '921919dab3cd0b849ff3d624831ae3e2', - 'info_dict': { - 'id': '899441', - 'ext': 'mp4', - 'title': 'Baker Mayfield\'s game-changing plays from 3-TD game Week 14', - 'description': 'md5:85e05a3cc163f8c344340f220521136d', - 'upload_date': '20201215', - 'timestamp': 1608009755, - 'thumbnail': r're:^https?://.*\.jpg', - 'uploader': 'NFL', - 'tags': ['Baltimore Ravens at Cleveland Browns (2020-REG-14)', 'Baker Mayfield', 'Game Highlights', - 'Player Highlights', 'Cleveland Browns', 'league'], - 'duration': 157, - 'categories': ['Entertainment', 'Game', 'Highlights'], - }, - }, { # from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/ 'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455', 'md5': '837718bcfb3a7778d022f857f7a9b19e', @@ -241,31 +223,6 @@ class AnvatoIE(InfoExtractor): 'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582', } - def _generate_nfl_token(self, anvack, mcp_id): - reroute = self._download_json( - 'https://api.nfl.com/v1/reroute', mcp_id, data=b'grant_type=client_credentials', - headers={'X-Domain-Id': 100}, note='Fetching token info') - token_type = reroute.get('token_type') or 'Bearer' - auth_token = f'{token_type} {reroute["access_token"]}' - response = self._download_json( - 'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({ - 'query': '''{ - viewer { - mediaToken(anvack: "%s", id: %s) { - token - } - } -}''' % (anvack, mcp_id), # noqa: UP031 - }).encode(), headers={ - 'Authorization': auth_token, - 'Content-Type': 'application/json', - }, note='Fetching NFL API token') - return traverse_obj(response, ('data', 'viewer', 'mediaToken', 'token')) - - _TOKEN_GENERATORS = { - 'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': _generate_nfl_token, - } - def _server_time(self, access_key, video_id): return int_or_none(traverse_obj(self._download_json( f'{self._API_BASE_URL}/server_time', video_id, query={'anvack': access_key}, @@ -290,8 +247,6 @@ class AnvatoIE(InfoExtractor): } if extracted_token is not None: api['anvstk2'] = extracted_token - elif self._TOKEN_GENERATORS.get(access_key) is not None: - api['anvstk2'] = self._TOKEN_GENERATORS[access_key](self, access_key, video_id) elif self._ANVACK_TABLE.get(access_key) is not None: api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}') else: diff --git a/yt_dlp/extractor/nfl.py b/yt_dlp/extractor/nfl.py index c537c1c47..59213a44b 100644 --- a/yt_dlp/extractor/nfl.py +++ b/yt_dlp/extractor/nfl.py @@ -11,9 +11,12 @@ from ..utils import ( clean_html, determine_ext, get_element_by_class, - traverse_obj, + int_or_none, + make_archive_id, + url_or_none, urlencode_postdata, ) +from ..utils.traversal import traverse_obj class NFLBaseIE(InfoExtractor): @@ -75,22 +78,15 @@ class NFLBaseIE(InfoExtractor): 'osVersion': '10.0', }, separators=(',', ':')).encode()).decode(), 'networkType': 'other', - 'nflClaimGroupsToAdd': [], - 'nflClaimGroupsToRemove': [], + 'peacockUUID': 'undefined', } _ACCOUNT_INFO = {} - _API_KEY = None + _API_KEY = '3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f' _TOKEN = None _TOKEN_EXPIRY = 0 - def _get_account_info(self, url, slug): - if not self._API_KEY: - webpage = self._download_webpage(url, slug, fatal=False) or '' - self._API_KEY = self._search_regex( - r'window\.gigyaApiKey\s*=\s*["\'](\w+)["\'];', webpage, 'API key', - fatal=False) or '3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f' - + def _get_account_info(self): cookies = self._get_cookies('https://auth-id.nfl.com/') login_token = traverse_obj(cookies, ( (f'glt_{self._API_KEY}', lambda k, _: k.startswith('glt_')), {lambda x: x.value}), get_all=False) @@ -103,7 +99,7 @@ class NFLBaseIE(InfoExtractor): 'or else try using --cookies-from-browser instead', expected=True) account = self._download_json( - 'https://auth-id.nfl.com/accounts.getAccountInfo', slug, + 'https://auth-id.nfl.com/accounts.getAccountInfo', None, note='Downloading account info', data=urlencode_postdata({ 'include': 'profile,data', 'lang': 'en', @@ -111,7 +107,7 @@ class NFLBaseIE(InfoExtractor): 'sdk': 'js_latest', 'login_token': login_token, 'authMode': 'cookie', - 'pageURL': url, + 'pageURL': 'https://www.nfl.com/', 'sdkBuild': traverse_obj(cookies, ( 'gig_canary_ver', {lambda x: x.value.partition('-')[0]}), default='15170'), 'format': 'json', @@ -126,55 +122,78 @@ class NFLBaseIE(InfoExtractor): if len(self._ACCOUNT_INFO) != 3: raise ExtractorError('Failed to retrieve account info with provided cookies', expected=True) - def _get_auth_token(self, url, slug): + def _get_auth_token(self): if self._TOKEN and self._TOKEN_EXPIRY > int(time.time() + 30): return - if not self._ACCOUNT_INFO: - self._get_account_info(url, slug) - token = self._download_json( 'https://api.nfl.com/identity/v3/token%s' % ( '/refresh' if self._ACCOUNT_INFO.get('refreshToken') else ''), - slug, headers={'Content-Type': 'application/json'}, note='Downloading access token', + None, headers={'Content-Type': 'application/json'}, note='Downloading access token', data=json.dumps({**self._CLIENT_DATA, **self._ACCOUNT_INFO}, separators=(',', ':')).encode()) self._TOKEN = token['accessToken'] self._TOKEN_EXPIRY = token['expiresIn'] self._ACCOUNT_INFO['refreshToken'] = token['refreshToken'] + def _extract_video(self, mcp_id, is_live=False): + self._get_auth_token() + data = self._download_json( + f'https://api.nfl.com/play/v1/asset/{mcp_id}', mcp_id, headers={ + 'Authorization': f'Bearer {self._TOKEN}', + 'Accept': 'application/json', + 'Content-Type': 'application/json', + }, data=json.dumps({'init': True, 'live': is_live}, separators=(',', ':')).encode()) + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + data['accessUrl'], mcp_id, 'mp4', m3u8_id='hls') + + return { + 'id': mcp_id, + 'formats': formats, + 'subtitles': subtitles, + 'is_live': is_live, + '_old_archive_ids': [make_archive_id(AnvatoIE, mcp_id)], + **traverse_obj(data, ('metadata', { + 'title': ('event', ('def_title', 'friendlyName'), {str}, any), + 'description': ('event', 'def_description', {str}), + 'duration': ('event', 'duration', {int_or_none}), + 'thumbnails': ('thumbnails', ..., 'url', {'url': {url_or_none}}), + })), + } + def _parse_video_config(self, video_config, display_id): video_config = self._parse_json(video_config, display_id) + is_live = traverse_obj(video_config, ('live', {bool})) or False item = video_config['playlist'][0] - mcp_id = item.get('mcpID') - if mcp_id: - info = self.url_result(f'{self._ANVATO_PREFIX}{mcp_id}', AnvatoIE, mcp_id) + if mcp_id := item.get('mcpID'): + return self._extract_video(mcp_id, is_live=is_live) + + info = {'id': item.get('id') or item['entityId']} + + item_url = item['url'] + ext = determine_ext(item_url) + if ext == 'm3u8': + info['formats'] = self._extract_m3u8_formats(item_url, info['id'], 'mp4') else: - media_id = item.get('id') or item['entityId'] - title = item.get('title') - item_url = item['url'] - info = {'id': media_id} - ext = determine_ext(item_url) - if ext == 'm3u8': - info['formats'] = self._extract_m3u8_formats(item_url, media_id, 'mp4') - else: - info['url'] = item_url - if item.get('audio') is True: - info['vcodec'] = 'none' - is_live = video_config.get('live') is True - thumbnails = None - image_url = item.get(item.get('imageSrc')) or item.get(item.get('posterImage')) - if image_url: - thumbnails = [{ - 'url': image_url, - 'ext': determine_ext(image_url, 'jpg'), - }] - info.update({ - 'title': title, - 'is_live': is_live, - 'description': clean_html(item.get('description')), - 'thumbnails': thumbnails, - }) + info['url'] = item_url + if item.get('audio') is True: + info['vcodec'] = 'none' + + thumbnails = None + if image_url := traverse_obj(item, 'imageSrc', 'posterImage', expected_type=url_or_none): + thumbnails = [{ + 'url': image_url, + 'ext': determine_ext(image_url, 'jpg'), + }] + + info.update({ + **traverse_obj(item, { + 'title': ('title', {str}), + 'description': ('description', {clean_html}), + }), + 'is_live': is_live, + 'thumbnails': thumbnails, + }) return info @@ -188,24 +207,20 @@ class NFLIE(NFLBaseIE): 'ext': 'mp4', 'title': "Baker Mayfield's game-changing plays from 3-TD game Week 14", 'description': 'md5:85e05a3cc163f8c344340f220521136d', - 'upload_date': '20201215', - 'timestamp': 1608009755, - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'NFL', - 'tags': 'count:6', + 'thumbnail': r're:https?://.+\.jpg', 'duration': 157, - 'categories': 'count:3', + '_old_archive_ids': ['anvato 899441'], }, }, { 'url': 'https://www.chiefs.com/listen/patrick-mahomes-travis-kelce-react-to-win-over-dolphins-the-breakdown', - 'md5': '6886b32c24b463038c760ceb55a34566', + 'md5': '92a517f05bd3eb50fe50244bc621aec8', 'info_dict': { - 'id': 'd87e8790-3e14-11eb-8ceb-ff05c2867f99', + 'id': '8b7c3625-a461-4751-8db4-85f536f2bbd0', 'ext': 'mp3', 'title': 'Patrick Mahomes, Travis Kelce React to Win Over Dolphins | The Breakdown', 'description': 'md5:12ada8ee70e6762658c30e223e095075', + 'thumbnail': 'https://static.clubs.nfl.com/image/private/t_editorial_landscape_12_desktop/v1571153441/chiefs/rfljejccnyhhkpkfq855', }, - 'skip': 'HTTP Error 404: Not Found', }, { 'url': 'https://www.buffalobills.com/video/buffalo-bills-military-recognition-week-14', 'only_matching': True, @@ -236,13 +251,16 @@ class NFLArticleIE(NFLBaseIE): def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - entries = [] - for video_config in re.findall(self._VIDEO_CONFIG_REGEX, webpage): - entries.append(self._parse_video_config(video_config, display_id)) + + def entries(): + for video_config in re.findall(self._VIDEO_CONFIG_REGEX, webpage): + yield self._parse_video_config(video_config, display_id) + title = clean_html(get_element_by_class( 'nfl-c-article__title', webpage)) or self._html_search_meta( ['og:title', 'twitter:title'], webpage) - return self.playlist_result(entries, display_id, title) + + return self.playlist_result(entries(), display_id, title) class NFLPlusReplayIE(NFLBaseIE): @@ -307,6 +325,9 @@ class NFLPlusReplayIE(NFLBaseIE): 'all_22': 'All-22', } + def _real_initialize(self): + self._get_account_info() + def _real_extract(self, url): slug, video_id = self._match_valid_url(url).group('slug', 'id') requested_types = self._configuration_arg('type', ['all']) @@ -315,7 +336,7 @@ class NFLPlusReplayIE(NFLBaseIE): requested_types = traverse_obj(self._REPLAY_TYPES, (None, requested_types)) if not video_id: - self._get_auth_token(url, slug) + self._get_auth_token() headers = {'Authorization': f'Bearer {self._TOKEN}'} game_id = self._download_json( f'https://api.nfl.com/football/v2/games/externalId/slug/{slug}', slug, @@ -328,14 +349,13 @@ class NFLPlusReplayIE(NFLBaseIE): 'items', lambda _, v: v['subType'] == requested_types[0], 'mcpPlaybackId'), get_all=False) if video_id: - return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id) + return self._extract_video(video_id) def entries(): for replay in traverse_obj( replays, ('items', lambda _, v: v['mcpPlaybackId'] and v['subType'] in requested_types), ): - video_id = replay['mcpPlaybackId'] - yield self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id) + yield self._extract_video(replay['mcpPlaybackId']) return self.playlist_result(entries(), slug) @@ -362,12 +382,15 @@ class NFLPlusEpisodeIE(NFLBaseIE): 'params': {'skip_download': 'm3u8'}, }] + def _real_initialize(self): + self._get_account_info() + def _real_extract(self, url): slug = self._match_id(url) - self._get_auth_token(url, slug) + self._get_auth_token() video_id = self._download_json( f'https://api.nfl.com/content/v1/videos/episodes/{slug}', slug, headers={ 'Authorization': f'Bearer {self._TOKEN}', })['mcpPlaybackId'] - return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id) + return self._extract_video(video_id)