From cf87ca678ca13213e0af0f0bcc4f5719b7a1305e Mon Sep 17 00:00:00 2001 From: Nicolas Dato Date: Sat, 9 Dec 2023 11:40:32 -0300 Subject: [PATCH] [rudovideo] split it into RudoVideoIE and RudoVideoLiveIE --- yt_dlp/extractor/_extractors.py | 5 +- yt_dlp/extractor/rudovideo.py | 96 +++++++++++++++++++-------------- 2 files changed, 60 insertions(+), 41 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index e1bcd7e2e8..4d3d83f8f7 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1643,7 +1643,10 @@ from .rumble import ( RumbleIE, RumbleChannelIE, ) -from .rudovideo import RudoVideoLiveIE +from .rudovideo import ( + RudoVideoLiveIE, + RudoVideoIE, +) from .rutube import ( RutubeIE, RutubeChannelIE, diff --git a/yt_dlp/extractor/rudovideo.py b/yt_dlp/extractor/rudovideo.py index a443f8a62a..4511c19afe 100644 --- a/yt_dlp/extractor/rudovideo.py +++ b/yt_dlp/extractor/rudovideo.py @@ -2,9 +2,33 @@ from .common import InfoExtractor from ..utils import ExtractorError, traverse_obj, js_to_json, update_url_query -class RudoVideoLiveIE(InfoExtractor): - _VALID_URL = r'https?://rudo\.video/(?Plive|vod|podcast)/(?P[^/?]+)' - _EMBED_REGEX = [r']+src=[\'"](?P(?:https?:)//rudo\.video/(?:live|vod|podcast)/[^\'"]+)'] +class RudoVideoBaseIE(InfoExtractor): + def get_title(self, webpage): + return self._search_regex(r'var\s+titleVideo\s*=\s*[\'"]([^\'"]+)', webpage, 'title', default=None) or self._og_search_title(webpage) + + def get_thumbnail(self, webpage): + return self._search_regex(r'var\s+posterIMG\s*=\s*[\'"]([^?\'"]+)', webpage, 'thumbnail', default=None) or self._og_search_thumbnail(webpage) + + def get_creator(self, webpage): + return self._search_regex(r'var\s+videoAuthor\s*=\s*[\'"]([^?\'"]+)', webpage, "videoAuthor", default=None) + + def get_stream_url(self, webpage, video_id): + stream_url = self._search_regex(r'var\s+streamURL\s*=\s*[\'"]([^?\'"]+)', webpage, 'streamUrl', default=None) or self._search_regex(r']+src=[\'"]([^\'"]+)', webpage, 'sourceUrl', default=None) + youtube_url = self._search_regex(r'file:\s*[\'"]((?:https?:)//(?:www\.)?youtube.com[^\'"]+)', webpage, 'youtubeUrl', default=None) + if stream_url is None: + if youtube_url is None: + raise ExtractorError('Unable to extract stream url') + return self.url_result(youtube_url, display_id=video_id) + return stream_url + + def check_geo_restricted(self, webpage): + if 'Streaming is not available in your area.' in webpage: + self.raise_geo_restricted() + + +class RudoVideoIE(RudoVideoBaseIE): + _VALID_URL = r'https?://rudo\.video/(?:vod|podcast)/(?P[^/?]+)' + _EMBED_REGEX = [r']+src=[\'"](?P(?:https?:)//rudo\.video/(?:vod|podcast)/[^\'"]+)'] _TESTS = [{ 'url': 'https://rudo.video/podcast/cz2wrUy8l0o', 'md5': '28ed82b477708dc5e12e072da2449221', @@ -33,7 +57,29 @@ class RudoVideoLiveIE(InfoExtractor): 'ext': 'mp4', 'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$', }, - }, { + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + self.check_geo_restricted(webpage) + + stream_url = self.get_stream_url(webpage, video_id) + + return { + 'id': video_id, + 'title': self.get_title(webpage), + 'formats': self._extract_m3u8_formats(stream_url, video_id, live=True), + 'creator': self.get_creator(webpage), + 'thumbnail': self.get_thumbnail(webpage), + } + + +class RudoVideoLiveIE(RudoVideoBaseIE): + _VALID_URL = r'https?://rudo\.video/live/(?P[^/?]+)' + _EMBED_REGEX = [r']+src=[\'"](?P(?:https?:)//rudo\.video/live/[^\'"]+)'] + _TESTS = [{ 'url': 'https://rudo.video/live/bbtv', 'info_dict': { 'id': 'bbtv', @@ -61,43 +107,13 @@ class RudoVideoLiveIE(InfoExtractor): 'skip': 'Geo-restricted to Chile', }] - def get_title(self, webpage): - title = self._search_regex(r'var\s+titleVideo\s*=\s*[\'"]([^\'"]+)', webpage, 'title', default=None) - if title is None: - title = self._search_regex(r']+property=[\'"]og:title[\'"]\s+content=[\'"]([^\'"]+)', webpage, 'title', fatal=False) - return title - - def get_thumbnail(self, webpage): - thumbnail = self._search_regex(r'var\s+posterIMG\s*=\s*[\'"]([^?\'"]+)', webpage, 'thumbnail', default=None) - if thumbnail is None: - thumbnail = self._search_regex(r']+property=[\'"]og:image[\'"]\s+content=[\'"]([^\'"]+)', webpage, 'thumbnail', default=None) - return thumbnail - def _real_extract(self, url): video_id = self._match_id(url) - type = self._match_valid_url(url).group('type') webpage = self._download_webpage(url, video_id) - if 'Streaming is not available in your area.' in webpage: - self.raise_geo_restricted() - - stream_url = self._search_regex(r'var\s+streamURL\s*=\s*[\'"]([^?\'"]+)', webpage, 'streamUrl', default=None) - source_url = self._search_regex(r']+src=[\'"]([^\'"]+)', webpage, 'sourceUrl', default=None) - youtube_url = self._search_regex(r'file:\s*[\'"]((?:https?:)//(?:www\.)?youtube.com[^\'"]+)', webpage, 'youtubeUrl', default=None) - if stream_url is None: - if source_url is not None: - stream_url = source_url - elif youtube_url is not None: - return self.url_result(youtube_url, display_id=video_id) - else: - raise ExtractorError('Unable to extract stream url') - - title = self.get_title(webpage) - thumbnail = self.get_thumbnail(webpage) - is_live = None - if type == 'live': - is_live = True + self.check_geo_restricted(webpage) + stream_url = self.get_stream_url(webpage, video_id) token_array = self._search_json(r'