From c5f01bf7d4b9426c87c3f8248de23934a56579e0 Mon Sep 17 00:00:00 2001 From: "Amir Y. Perehodnik" Date: Mon, 18 Dec 2023 17:52:43 +0200 Subject: [PATCH] [ie/Maariv] Add extractor (#8331) Authored by: amir16yp --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/maariv.py | 62 +++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 yt_dlp/extractor/maariv.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 94369ca66..b3c411394 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -991,6 +991,7 @@ from .lynda import ( LyndaIE, LyndaCourseIE ) +from .maariv import MaarivIE from .magellantv import MagellanTVIE from .magentamusik360 import MagentaMusik360IE from .mailru import ( diff --git a/yt_dlp/extractor/maariv.py b/yt_dlp/extractor/maariv.py new file mode 100644 index 000000000..425a8b3b4 --- /dev/null +++ b/yt_dlp/extractor/maariv.py @@ -0,0 +1,62 @@ +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_resolution, + unified_timestamp, + url_or_none, +) +from ..utils.traversal import traverse_obj + + +class MaarivIE(InfoExtractor): + IE_NAME = 'maariv.co.il' + _VALID_URL = r'https?://player\.maariv\.co\.il/public/player\.html\?(?:[^#]+&)?media=(?P\d+)' + _EMBED_REGEX = [rf']+\bsrc=[\'"](?P{_VALID_URL})'] + _TESTS = [{ + 'url': 'https://player.maariv.co.il/public/player.html?player=maariv-desktop&media=3611585', + 'info_dict': { + 'id': '3611585', + 'duration': 75, + 'ext': 'mp4', + 'upload_date': '20231009', + 'title': 'מבצע חרבות ברזל', + 'timestamp': 1696851301, + }, + }] + _WEBPAGE_TESTS = [{ + 'url': 'https://www.maariv.co.il/news/law/Article-1044008', + 'info_dict': { + 'id': '3611585', + 'duration': 75, + 'ext': 'mp4', + 'upload_date': '20231009', + 'title': 'מבצע חרבות ברזל', + 'timestamp': 1696851301, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + data = self._download_json( + f'https://dal.walla.co.il/media/{video_id}?origin=player.maariv.co.il', video_id)['data'] + + formats = [] + if hls_url := traverse_obj(data, ('video', 'url', {url_or_none})): + formats.extend(self._extract_m3u8_formats(hls_url, video_id, m3u8_id='hls', fatal=False)) + + for http_format in traverse_obj(data, ('video', 'stream_urls', ..., 'stream_url', {url_or_none})): + formats.append({ + 'url': http_format, + 'format_id': 'http', + **parse_resolution(http_format), + }) + + return { + 'id': video_id, + **traverse_obj(data, { + 'title': 'title', + 'duration': ('video', 'duration', {int_or_none}), + 'timestamp': ('upload_date', {unified_timestamp}), + }), + 'formats': formats, + }