yt-dlp/yt_dlp/extractor/nfl.py

397 lines
16 KiB
Python
Raw Permalink Normal View History

import base64
import json
import re
import time
import uuid
from .anvato import AnvatoIE
from .common import InfoExtractor
from ..utils import (
ExtractorError,
2021-01-01 12:26:37 +00:00
clean_html,
determine_ext,
get_element_by_class,
int_or_none,
make_archive_id,
url_or_none,
urlencode_postdata,
)
from ..utils.traversal import traverse_obj
2021-01-01 12:26:37 +00:00
class NFLBaseIE(InfoExtractor):
_VALID_URL_BASE = r'''(?x)
2015-09-20 17:12:40 +00:00
https?://
(?P<host>
(?:www\.)?
(?:
(?:
nfl|
buffalobills|
miamidolphins|
patriots|
newyorkjets|
baltimoreravens|
bengals|
clevelandbrowns|
steelers|
houstontexans|
colts|
jaguars|
2021-01-01 12:26:37 +00:00
(?:titansonline|tennesseetitans)|
2015-09-20 17:12:40 +00:00
denverbroncos|
2021-01-01 12:26:37 +00:00
(?:kc)?chiefs|
2015-09-20 17:12:40 +00:00
raiders|
chargers|
dallascowboys|
giants|
philadelphiaeagles|
2021-01-01 12:26:37 +00:00
(?:redskins|washingtonfootball)|
2015-09-20 17:12:40 +00:00
chicagobears|
detroitlions|
packers|
vikings|
atlantafalcons|
panthers|
neworleanssaints|
buccaneers|
azcardinals|
2021-01-01 12:26:37 +00:00
(?:stlouis|the)rams|
2015-09-20 17:12:40 +00:00
49ers|
seahawks
)\.com|
.+?\.clubs\.nfl\.com
)
)/
'''
_VIDEO_CONFIG_REGEX = r'<script[^>]+id="[^"]*video-config-[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}[^"]*"[^>]*>\s*({.+});?\s*</script>'
_ANVATO_PREFIX = 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:'
2021-01-01 12:26:37 +00:00
_CLIENT_DATA = {
'clientKey': '4cFUW6DmwJpzT9L7LrG3qRAcABG5s04g',
'clientSecret': 'CZuvCL49d9OwfGsR',
'deviceId': str(uuid.uuid4()),
'deviceInfo': base64.b64encode(json.dumps({
'model': 'desktop',
'version': 'Chrome',
'osName': 'Windows',
'osVersion': '10.0',
}, separators=(',', ':')).encode()).decode(),
'networkType': 'other',
'peacockUUID': 'undefined',
}
_ACCOUNT_INFO = {}
_API_KEY = '3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f'
_TOKEN = None
_TOKEN_EXPIRY = 0
def _get_account_info(self):
cookies = self._get_cookies('https://auth-id.nfl.com/')
login_token = traverse_obj(cookies, (
(f'glt_{self._API_KEY}', lambda k, _: k.startswith('glt_')), {lambda x: x.value}), get_all=False)
if not login_token:
self.raise_login_required()
if 'ucid' not in cookies:
raise ExtractorError(
'Required cookies for the auth-id.nfl.com domain were not found among passed cookies. '
'If using --cookies, these cookies must be exported along with .nfl.com cookies, '
'or else try using --cookies-from-browser instead', expected=True)
account = self._download_json(
'https://auth-id.nfl.com/accounts.getAccountInfo', None,
note='Downloading account info', data=urlencode_postdata({
'include': 'profile,data',
'lang': 'en',
'APIKey': self._API_KEY,
'sdk': 'js_latest',
'login_token': login_token,
'authMode': 'cookie',
'pageURL': 'https://www.nfl.com/',
'sdkBuild': traverse_obj(cookies, (
'gig_canary_ver', {lambda x: x.value.partition('-')[0]}), default='15170'),
'format': 'json',
}), headers={'Content-Type': 'application/x-www-form-urlencoded'})
self._ACCOUNT_INFO = traverse_obj(account, {
'signatureTimestamp': 'signatureTimestamp',
'uid': 'UID',
'uidSignature': 'UIDSignature',
})
if len(self._ACCOUNT_INFO) != 3:
raise ExtractorError('Failed to retrieve account info with provided cookies', expected=True)
def _get_auth_token(self):
if self._TOKEN and self._TOKEN_EXPIRY > int(time.time() + 30):
return
token = self._download_json(
'https://api.nfl.com/identity/v3/token%s' % (
'/refresh' if self._ACCOUNT_INFO.get('refreshToken') else ''),
None, headers={'Content-Type': 'application/json'}, note='Downloading access token',
data=json.dumps({**self._CLIENT_DATA, **self._ACCOUNT_INFO}, separators=(',', ':')).encode())
self._TOKEN = token['accessToken']
self._TOKEN_EXPIRY = token['expiresIn']
self._ACCOUNT_INFO['refreshToken'] = token['refreshToken']
def _extract_video(self, mcp_id, is_live=False):
self._get_auth_token()
data = self._download_json(
f'https://api.nfl.com/play/v1/asset/{mcp_id}', mcp_id, headers={
'Authorization': f'Bearer {self._TOKEN}',
'Accept': 'application/json',
'Content-Type': 'application/json',
}, data=json.dumps({'init': True, 'live': is_live}, separators=(',', ':')).encode())
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
data['accessUrl'], mcp_id, 'mp4', m3u8_id='hls')
return {
'id': mcp_id,
'formats': formats,
'subtitles': subtitles,
'is_live': is_live,
'_old_archive_ids': [make_archive_id(AnvatoIE, mcp_id)],
**traverse_obj(data, ('metadata', {
'title': ('event', ('def_title', 'friendlyName'), {str}, any),
'description': ('event', 'def_description', {str}),
'duration': ('event', 'duration', {int_or_none}),
'thumbnails': ('thumbnails', ..., 'url', {'url': {url_or_none}}),
})),
}
2021-01-01 12:26:37 +00:00
def _parse_video_config(self, video_config, display_id):
video_config = self._parse_json(video_config, display_id)
is_live = traverse_obj(video_config, ('live', {bool})) or False
2021-01-01 12:26:37 +00:00
item = video_config['playlist'][0]
if mcp_id := item.get('mcpID'):
return self._extract_video(mcp_id, is_live=is_live)
info = {'id': item.get('id') or item['entityId']}
item_url = item['url']
ext = determine_ext(item_url)
if ext == 'm3u8':
info['formats'] = self._extract_m3u8_formats(item_url, info['id'], 'mp4')
2021-01-01 12:26:37 +00:00
else:
info['url'] = item_url
if item.get('audio') is True:
info['vcodec'] = 'none'
thumbnails = None
if image_url := traverse_obj(item, 'imageSrc', 'posterImage', expected_type=url_or_none):
thumbnails = [{
'url': image_url,
'ext': determine_ext(image_url, 'jpg'),
}]
info.update({
**traverse_obj(item, {
'title': ('title', {str}),
'description': ('description', {clean_html}),
}),
'is_live': is_live,
'thumbnails': thumbnails,
})
2021-01-01 12:26:37 +00:00
return info
class NFLIE(NFLBaseIE):
IE_NAME = 'nfl.com'
_VALID_URL = NFLBaseIE._VALID_URL_BASE + r'(?:videos?|listen|audio)/(?P<id>[^/#?&]+)'
2015-09-20 17:12:40 +00:00
_TESTS = [{
2021-01-01 12:26:37 +00:00
'url': 'https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14',
2015-09-20 17:12:40 +00:00
'info_dict': {
2021-01-01 12:26:37 +00:00
'id': '899441',
2015-09-20 17:12:40 +00:00
'ext': 'mp4',
2021-01-01 12:26:37 +00:00
'title': "Baker Mayfield's game-changing plays from 3-TD game Week 14",
'description': 'md5:85e05a3cc163f8c344340f220521136d',
'thumbnail': r're:https?://.+\.jpg',
'duration': 157,
'_old_archive_ids': ['anvato 899441'],
},
2015-09-20 17:12:40 +00:00
}, {
2021-01-01 12:26:37 +00:00
'url': 'https://www.chiefs.com/listen/patrick-mahomes-travis-kelce-react-to-win-over-dolphins-the-breakdown',
'md5': '92a517f05bd3eb50fe50244bc621aec8',
2015-09-20 17:12:40 +00:00
'info_dict': {
'id': '8b7c3625-a461-4751-8db4-85f536f2bbd0',
2021-01-01 12:26:37 +00:00
'ext': 'mp3',
'title': 'Patrick Mahomes, Travis Kelce React to Win Over Dolphins | The Breakdown',
'description': 'md5:12ada8ee70e6762658c30e223e095075',
'thumbnail': 'https://static.clubs.nfl.com/image/private/t_editorial_landscape_12_desktop/v1571153441/chiefs/rfljejccnyhhkpkfq855',
},
2015-09-20 17:12:40 +00:00
}, {
2021-01-01 12:26:37 +00:00
'url': 'https://www.buffalobills.com/video/buffalo-bills-military-recognition-week-14',
2015-09-20 17:12:40 +00:00
'only_matching': True,
}, {
2021-01-01 12:26:37 +00:00
'url': 'https://www.raiders.com/audio/instant-reactions-raiders-week-14-loss-to-indianapolis-colts-espn-jason-fitz',
2015-09-20 17:12:40 +00:00
'only_matching': True,
}]
def _real_extract(self, url):
2021-01-01 12:26:37 +00:00
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
return self._parse_video_config(self._search_regex(
self._VIDEO_CONFIG_REGEX, webpage, 'video config'), display_id)
2021-01-01 12:26:37 +00:00
class NFLArticleIE(NFLBaseIE):
IE_NAME = 'nfl.com:article'
_VALID_URL = NFLBaseIE._VALID_URL_BASE + r'news/(?P<id>[^/#?&]+)'
_TEST = {
'url': 'https://www.buffalobills.com/news/the-only-thing-we-ve-earned-is-the-noise-bills-coaches-discuss-handling-rising-e',
'info_dict': {
'id': 'the-only-thing-we-ve-earned-is-the-noise-bills-coaches-discuss-handling-rising-e',
'title': "'The only thing we've earned is the noise' | Bills coaches discuss handling rising expectations",
},
'playlist_count': 4,
}
2021-01-01 12:26:37 +00:00
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
def entries():
for video_config in re.findall(self._VIDEO_CONFIG_REGEX, webpage):
yield self._parse_video_config(video_config, display_id)
2021-01-01 12:26:37 +00:00
title = clean_html(get_element_by_class(
'nfl-c-article__title', webpage)) or self._html_search_meta(
['og:title', 'twitter:title'], webpage)
return self.playlist_result(entries(), display_id, title)
class NFLPlusReplayIE(NFLBaseIE):
IE_NAME = 'nfl.com:plus:replay'
_VALID_URL = r'https?://(?:www\.)?nfl\.com/plus/games/(?P<slug>[\w-]+)(?:/(?P<id>\d+))?'
_TESTS = [{
'url': 'https://www.nfl.com/plus/games/giants-at-vikings-2022-post-1/1572108',
'info_dict': {
'id': '1572108',
'ext': 'mp4',
'title': 'New York Giants at Minnesota Vikings',
'description': 'New York Giants play the Minnesota Vikings at U.S. Bank Stadium on January 15, 2023',
'uploader': 'NFL',
'upload_date': '20230116',
'timestamp': 1673864520,
'duration': 7157,
'categories': ['Game Highlights'],
'tags': ['Minnesota Vikings', 'New York Giants', 'Minnesota Vikings vs. New York Giants'],
'thumbnail': r're:^https?://.*\.jpg',
},
'params': {'skip_download': 'm3u8'},
}, {
'note': 'Subscription required',
'url': 'https://www.nfl.com/plus/games/giants-at-vikings-2022-post-1',
'playlist_count': 4,
'info_dict': {
'id': 'giants-at-vikings-2022-post-1',
},
}, {
'note': 'Subscription required',
'url': 'https://www.nfl.com/plus/games/giants-at-patriots-2011-pre-4',
'playlist_count': 2,
'info_dict': {
'id': 'giants-at-patriots-2011-pre-4',
},
}, {
'note': 'Subscription required',
'url': 'https://www.nfl.com/plus/games/giants-at-patriots-2011-pre-4',
'info_dict': {
'id': '950701',
'ext': 'mp4',
'title': 'Giants @ Patriots',
'description': 'Giants at Patriots on September 01, 2011',
'uploader': 'NFL',
'upload_date': '20210724',
'timestamp': 1627085874,
'duration': 1532,
'categories': ['Game Highlights'],
'tags': ['play-by-play'],
'thumbnail': r're:^https?://.*\.jpg',
},
'params': {
'skip_download': 'm3u8',
'extractor_args': {'nflplusreplay': {'type': ['condensed_game']}},
},
}]
_REPLAY_TYPES = {
'full_game': 'Full Game',
'full_game_spanish': 'Full Game - Spanish',
'condensed_game': 'Condensed Game',
'all_22': 'All-22',
}
def _real_initialize(self):
self._get_account_info()
def _real_extract(self, url):
slug, video_id = self._match_valid_url(url).group('slug', 'id')
requested_types = self._configuration_arg('type', ['all'])
if 'all' in requested_types:
requested_types = list(self._REPLAY_TYPES.keys())
requested_types = traverse_obj(self._REPLAY_TYPES, (None, requested_types))
if not video_id:
self._get_auth_token()
headers = {'Authorization': f'Bearer {self._TOKEN}'}
game_id = self._download_json(
f'https://api.nfl.com/football/v2/games/externalId/slug/{slug}', slug,
'Downloading game ID', query={'withExternalIds': 'true'}, headers=headers)['id']
replays = self._download_json(
'https://api.nfl.com/content/v1/videos/replays', slug, 'Downloading replays JSON',
query={'gameId': game_id}, headers=headers)
if len(requested_types) == 1:
video_id = traverse_obj(replays, (
'items', lambda _, v: v['subType'] == requested_types[0], 'mcpPlaybackId'), get_all=False)
if video_id:
return self._extract_video(video_id)
def entries():
for replay in traverse_obj(
replays, ('items', lambda _, v: v['mcpPlaybackId'] and v['subType'] in requested_types),
):
yield self._extract_video(replay['mcpPlaybackId'])
return self.playlist_result(entries(), slug)
class NFLPlusEpisodeIE(NFLBaseIE):
IE_NAME = 'nfl.com:plus:episode'
_VALID_URL = r'https?://(?:www\.)?nfl\.com/plus/episodes/(?P<id>[\w-]+)'
_TESTS = [{
'note': 'Subscription required',
'url': 'https://www.nfl.com/plus/episodes/kurt-s-qb-insider-conference-championships',
'info_dict': {
'id': '1576832',
'ext': 'mp4',
'title': 'Conference Championships',
'description': 'md5:944f7fab56f7a37430bf8473f5473857',
'uploader': 'NFL',
'upload_date': '20230127',
'timestamp': 1674782760,
'duration': 730,
'categories': ['Analysis'],
'tags': ['Cincinnati Bengals at Kansas City Chiefs (2022-POST-3)'],
'thumbnail': r're:^https?://.*\.jpg',
},
'params': {'skip_download': 'm3u8'},
}]
def _real_initialize(self):
self._get_account_info()
def _real_extract(self, url):
slug = self._match_id(url)
self._get_auth_token()
video_id = self._download_json(
f'https://api.nfl.com/content/v1/videos/episodes/{slug}', slug, headers={
'Authorization': f'Bearer {self._TOKEN}',
})['mcpPlaybackId']
return self._extract_video(video_id)