From 92790da2bb64de67cdc8ec9d8cc459e631feff03 Mon Sep 17 00:00:00 2001
From: nyuszika7h <nyuszika7h@gmail.com>
Date: Wed, 15 Sep 2021 03:45:10 +0200
Subject: [PATCH] [radlive] Add new extractor (#870)

Closes #312
Authored by: nyuszika7h
---
 yt_dlp/extractor/extractors.py |   5 +
 yt_dlp/extractor/radlive.py    | 179 +++++++++++++++++++++++++++++++++
 2 files changed, 184 insertions(+)
 create mode 100644 yt_dlp/extractor/radlive.py
diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py
index ecbb879770..bb1e21a07a 100644
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@@ -1123,6 +1123,11 @@ from .radiode import RadioDeIE
 from .radiojavan import RadioJavanIE
 from .radiobremen import RadioBremenIE
 from .radiofrance import RadioFranceIE
+from .radlive import (
+    RadLiveIE,
+    RadLiveChannelIE,
+    RadLiveSeasonIE,
+)
 from .rai import (
     RaiPlayIE,
     RaiPlayLiveIE,
diff --git a/yt_dlp/extractor/radlive.py b/yt_dlp/extractor/radlive.py
new file mode 100644
index 0000000000..2de7ab04a9
--- /dev/null
+++ b/yt_dlp/extractor/radlive.py
@@ -0,0 +1,179 @@
+import json
+
+from ..utils import ExtractorError, traverse_obj, try_get, unified_timestamp
+from .common import InfoExtractor
+
+
+class RadLiveIE(InfoExtractor):
+    IE_NAME = 'radlive'
+    _VALID_URL = r'https?://(?:www\.)?rad\.live/content/(?P<content_type>feature|episode)/(?P<id>[a-f0-9-]+)'
+    _TESTS = [{
+        'url': 'https://rad.live/content/feature/dc5acfbc-761b-4bec-9564-df999905116a',
+        'md5': '6219d5d31d52de87d21c9cf5b7cb27ff',
+        'info_dict': {
+            'id': 'dc5acfbc-761b-4bec-9564-df999905116a',
+            'ext': 'mp4',
+            'title': 'Deathpact - Digital Mirage 2 [Full Set]',
+            'language': 'en',
+            'thumbnail': 'https://static.12core.net/cb65ae077a079c68380e38f387fbc438.png',
+            'description': '',
+            'release_timestamp': 1600185600.0,
+            'channel': 'Proximity',
+            'channel_id': '9ce6dd01-70a4-4d59-afb6-d01f807cd009',
+            'channel_url': 'https://rad.live/content/channel/9ce6dd01-70a4-4d59-afb6-d01f807cd009',
+        }
+    }, {
+        'url': 'https://rad.live/content/episode/bbcf66ec-0d02-4ca0-8dc0-4213eb2429bf',
+        'md5': '40b2175f347592125d93e9a344080125',
+        'info_dict': {
+            'id': 'bbcf66ec-0d02-4ca0-8dc0-4213eb2429bf',
+            'ext': 'mp4',
+            'title': 'E01: Bad Jokes 1',
+            'language': 'en',
+            'thumbnail': 'https://lsp.littlstar.com/channels/WHISTLE/BAD_JOKES/SEASON_1/BAD_JOKES_101/poster.jpg',
+            'description': 'Bad Jokes - Champions, Adam Pally, Super Troopers, Team Edge and 2Hype',
+            'release_timestamp': None,
+            'channel': None,
+            'channel_id': None,
+            'channel_url': None,
+            'episode': 'E01: Bad Jokes 1',
+            'episode_number': 1,
+            'episode_id': '336',
+        },
+    }]
+
+    def _real_extract(self, url):
+        content_type, video_id = self._match_valid_url(url).groups()
+
+        webpage = self._download_webpage(url, video_id)
+
+        content_info = json.loads(self._search_regex(
+            r'<script[^>]*type=([\'"])application/json\1[^>]*>(?P<json>{.+?})</script>',
+            webpage, 'video info', group='json'))['props']['pageProps']['initialContentData']
+        video_info = content_info[content_type]
+
+        if not video_info:
+            raise ExtractorError('Unable to extract video info, make sure the URL is valid')
+
+        formats = self._extract_m3u8_formats(video_info['assets']['videos'][0]['url'], video_id)
+        self._sort_formats(formats)
+
+        data = video_info.get('structured_data', {})
+
+        release_date = unified_timestamp(traverse_obj(data, ('releasedEvent', 'startDate')))
+        channel = next(iter(content_info.get('channels', [])), {})
+        channel_id = channel.get('lrn', '').split(':')[-1] or None
+
+        result = {
+            'id': video_id,
+            'title': video_info['title'],
+            'formats': formats,
+            'language': traverse_obj(data, ('potentialAction', 'target', 'inLanguage')),
+            'thumbnail': traverse_obj(data, ('image', 'contentUrl')),
+            'description': data.get('description'),
+            'release_timestamp': release_date,
+            'channel': channel.get('name'),
+            'channel_id': channel_id,
+            'channel_url': f'https://rad.live/content/channel/{channel_id}' if channel_id else None,
+
+        }
+        if content_type == 'episode':
+            result.update({
+                # TODO: Get season number when downloading single episode
+                'episode': video_info.get('title'),
+                'episode_number': video_info.get('number'),
+                'episode_id': video_info.get('id'),
+            })
+
+        return result
+
+
+class RadLiveSeasonIE(RadLiveIE):
+    IE_NAME = 'radlive:season'
+    _VALID_URL = r'https?://(?:www\.)?rad\.live/content/season/(?P<id>[a-f0-9-]+)'
+    _TESTS = [{
+        'url': 'https://rad.live/content/season/08a290f7-c9ef-4e22-9105-c255995a2e75',
+        'md5': '40b2175f347592125d93e9a344080125',
+        'info_dict': {
+            'id': '08a290f7-c9ef-4e22-9105-c255995a2e75',
+            'title': 'Bad Jokes - Season 1',
+        },
+        'playlist_mincount': 5,
+    }]
+
+    @classmethod
+    def suitable(cls, url):
+        return False if RadLiveIE.suitable(url) else super(RadLiveSeasonIE, cls).suitable(url)
+
+    def _real_extract(self, url):
+        season_id = self._match_id(url)
+        webpage = self._download_webpage(url, season_id)
+
+        content_info = json.loads(self._search_regex(
+            r'<script[^>]*type=([\'"])application/json\1[^>]*>(?P<json>{.+?})</script>',
+            webpage, 'video info', group='json'))['props']['pageProps']['initialContentData']
+        video_info = content_info['season']
+
+        entries = [{
+            '_type': 'url_transparent',
+            'id': episode['structured_data']['url'].split('/')[-1],
+            'url': episode['structured_data']['url'],
+            'series': try_get(content_info, lambda x: x['series']['title']),
+            'season': video_info['title'],
+            'season_number': video_info.get('number'),
+            'season_id': video_info.get('id'),
+            'ie_key': RadLiveIE.ie_key(),
+        } for episode in video_info['episodes']]
+
+        return self.playlist_result(entries, season_id, video_info.get('title'))
+
+
+class RadLiveChannelIE(RadLiveIE):
+    IE_NAME = 'radlive:channel'
+    _VALID_URL = r'https?://(?:www\.)?rad\.live/content/channel/(?P<id>[a-f0-9-]+)'
+    _TESTS = [{
+        'url': 'https://rad.live/content/channel/5c4d8df4-6fa0-413c-81e3-873479b49274',
+        'md5': '625156a08b7f2b0b849f234e664457ac',
+        'info_dict': {
+            'id': '5c4d8df4-6fa0-413c-81e3-873479b49274',
+            'title': 'Whistle Sports',
+        },
+        'playlist_mincount': 7,
+    }]
+
+    _QUERY = '''
+query WebChannelListing ($lrn: ID!) {
+  channel (id:$lrn) {
+    name
+    features {
+      structured_data
+    }
+  }
+}'''
+
+    @classmethod
+    def suitable(cls, url):
+        return False if RadLiveIE.suitable(url) else super(RadLiveChannelIE, cls).suitable(url)
+
+    def _real_extract(self, url):
+        channel_id = self._match_id(url)
+
+        graphql = self._download_json(
+            'https://content.mhq.12core.net/graphql', channel_id,
+            headers={'Content-Type': 'application/json'},
+            data=json.dumps({
+                'query': self._QUERY,
+                'variables': {'lrn': f'lrn:12core:media:content:channel:{channel_id}'}
+            }).encode('utf-8'))
+
+        data = traverse_obj(graphql, ('data', 'channel'))
+        if not data:
+            raise ExtractorError('Unable to extract video info, make sure the URL is valid')
+
+        entries = [{
+            '_type': 'url_transparent',
+            'url': feature['structured_data']['url'],
+            'ie_key': RadLiveIE.ie_key(),
+        } for feature in data['features']]
+
+        return self.playlist_result(entries, channel_id, data.get('name'))