From cfce954490299482216b19d3808a9155a36a126f Mon Sep 17 00:00:00 2001 From: Nicolas Dato Date: Sat, 2 Dec 2023 18:05:32 -0300 Subject: [PATCH] [rudovideo] improvements to rudovideo extractor: * implementing PR suggestions * removing c13cl extractor in favor of RudoVideo with _EMBED_REGEX * supporting VODs and Podcasts from rudo.video * supporting embeded youtube * adding tests --- yt_dlp/extractor/_extractors.py | 3 +- yt_dlp/extractor/c13cl.py | 13 ---- yt_dlp/extractor/rudovideo.py | 121 +++++++++++++++++++++++++++----- 3 files changed, 104 insertions(+), 33 deletions(-) delete mode 100644 yt_dlp/extractor/c13cl.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 5ad10594c8..e1bcd7e2e8 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -278,7 +278,6 @@ from .businessinsider import BusinessInsiderIE from .bundesliga import BundesligaIE from .buzzfeed import BuzzFeedIE from .byutv import BYUtvIE -from .c13cl import C13ClIE from .c56 import C56IE from .cableav import CableAVIE from .callin import CallinIE @@ -1644,7 +1643,7 @@ from .rumble import ( RumbleIE, RumbleChannelIE, ) -from .rudovideo import RudoVideoIE +from .rudovideo import RudoVideoLiveIE from .rutube import ( RutubeIE, RutubeChannelIE, diff --git a/yt_dlp/extractor/c13cl.py b/yt_dlp/extractor/c13cl.py deleted file mode 100644 index a52070fd85..0000000000 --- a/yt_dlp/extractor/c13cl.py +++ /dev/null @@ -1,13 +0,0 @@ -from .common import InfoExtractor - - -class C13ClIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?13\.cl/en-vivo' - - def _real_extract(self, url): - display_id = 'C13' - webpage = self._download_webpage(url, display_id) - - stream_url = self._search_regex(r']+>[^/]+)' +class RudoVideoLiveIE(InfoExtractor): + _VALID_URL = r'https?://rudo\.video/(?Plive|vod|podcast)/(?P[^/?]+)' + _EMBED_REGEX = [r']+src=[\'"](?P(?:https?:)//rudo\.video/(?:live|vod|podcast)/[^\'"]+)'] + _TESTS = [{ + 'url': 'https://rudo.video/podcast/cz2wrUy8l0o', + 'md5': '28ed82b477708dc5e12e072da2449221', + 'info_dict': { + 'id': 'cz2wrUy8l0o', + 'title': 'Diego Cabot', + 'ext': 'mp4', + 'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$', + }, + }, { + 'url': 'https://rudo.video/podcast/bQkt07', + 'md5': '36b22a9863de0f47f00fc7532a32a898', + 'info_dict': { + 'id': 'bQkt07', + 'title': 'Tubular Bells', + 'ext': 'mp4', + 'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$', + }, + }, { + 'url': 'https://rudo.video/vod/bN5AaJ', + 'md5': '01324a329227e2591530ecb4f555c881', + 'info_dict': { + 'id': 'bN5AaJ', + 'title': 'Ucrania 19.03', + 'creator': 'La Tercera', + 'ext': 'mp4', + 'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$', + }, + }, { + 'url': 'https://rudo.video/live/bbtv', + 'info_dict': { + 'id': 'bbtv', + 'ext': 'mp4', + 'creator': 'BioBioTV', + 'live_status': 'is_live', + 'title': r're:^LIVE BBTV\s\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}$', + 'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$', + }, + }, { + 'url': 'https://rudo.video/live/c13', + 'info_dict': { + 'id': 'c13', + 'title': 'CANAL13', + 'ext': 'mp4', + }, + 'skip': 'Geo-restricted to Chile', + }, { + 'url': 'https://rudo.video/live/t13-13cl', + 'info_dict': { + 'id': 't13-13cl', + 'title': 'T13', + 'ext': 'mp4', + }, + 'skip': 'Geo-restricted to Chile', + }] + + def get_title(self, webpage): + title = self._search_regex(r'var\s+titleVideo\s*=\s*[\'"]([^\'"]+)', webpage, 'title', default=None) + if title is None: + title = self._search_regex(r']+property=[\'"]og:title[\'"]\s+content=[\'"]([^\'"]+)', webpage, 'title', fatal=False) + return title + + def get_thumbnail(self, webpage): + thumbnail = self._search_regex(r'var\s+posterIMG\s*=\s*[\'"]([^?\'"]+)', webpage, 'thumbnail', default=None) + if thumbnail is None: + thumbnail = self._search_regex(r']+property=[\'"]og:image[\'"]\s+content=[\'"]([^\'"]+)', webpage, 'thumbnail', default=None) + return thumbnail def _real_extract(self, url): video_id = self._match_id(url) + type = self._match_valid_url(url).group('type') webpage = self._download_webpage(url, video_id) - if "Streaming is not available in your area." in webpage: + if 'Streaming is not available in your area.' in webpage: self.raise_geo_restricted() - stream_url = self._search_regex(r'var\s+streamURL\s*=\s*\'([^?\']+)', webpage, "streamUrl") - token_array_string = self._search_regex(r'