From a8769f672b58135dc681b87dd0bdd8073c847bf0 Mon Sep 17 00:00:00 2001
From: grqx_wsl <173253225+grqx@users.noreply.github.com>
Date: Sun, 13 Oct 2024 12:46:03 +1300
Subject: [PATCH 01/26] [ie/boomplay] add extractors
---
yt_dlp/extractor/_extractors.py | 7 +
yt_dlp/extractor/boomplay.py | 283 ++++++++++++++++++++++++++++++++
2 files changed, 290 insertions(+)
create mode 100644 yt_dlp/extractor/boomplay.py
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 4b1f4c316d..1abca1ed93 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -280,6 +280,13 @@ from .blogger import BloggerIE
from .bloomberg import BloombergIE
from .bokecc import BokeCCIE
from .bongacams import BongaCamsIE
+from .boomplay import (
+ BoomPlayEpisodeIE,
+ BoomPlayMusicIE,
+ BoomPlayPlaylistIE,
+ BoomPlayPodcastIE,
+ BoomPlayVideoIE,
+)
from .boosty import BoostyIE
from .bostonglobe import BostonGlobeIE
from .box import BoxIE
diff --git a/yt_dlp/extractor/boomplay.py b/yt_dlp/extractor/boomplay.py
new file mode 100644
index 0000000000..dba8a1c9ca
--- /dev/null
+++ b/yt_dlp/extractor/boomplay.py
@@ -0,0 +1,283 @@
+import base64
+import functools
+import json
+import re
+
+from .common import InfoExtractor
+from ..aes import aes_cbc_decrypt_bytes, aes_cbc_encrypt_bytes, unpad_pkcs7
+from ..utils import (
+ ExtractorError,
+ clean_html,
+ get_element_by_attribute,
+ get_element_by_class,
+ get_elements_by_attribute,
+ int_or_none,
+ merge_dicts,
+ parse_duration,
+ strip_or_none,
+ unified_strdate,
+ url_or_none,
+ urlencode_postdata,
+)
+from ..utils.traversal import traverse_obj
+
+
+class BoomPlayBaseIE(InfoExtractor):
+ # Calculated from const values, see lhx.AESUtils.encrypt, see public.js
+ # Note that the real key/iv differs from `lhx.AESUtils.key`/`lhx.AESUtils.iv`
+ _KEY = b'boomplayVr3xopAM'
+ _IV = b'boomplay8xIsKTn9'
+
+ def _get_playurl(self, item_id, item_type):
+ resp = self._download_json(
+ 'https://www.boomplay.com/getResourceAddr', item_id,
+ note='Downloading play URL', errnote='Failed to download play URL',
+ data=urlencode_postdata({
+ 'param': base64.b64encode(aes_cbc_encrypt_bytes(json.dumps({
+ 'itemID': item_id,
+ 'itemType': item_type,
+ }).encode(), self._KEY, self._IV)).decode(),
+ }), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+ })
+ if not (source := resp.get('source')) and resp.get('code'):
+ raise ExtractorError(resp.get('desc') or 'Please solve the captcha')
+ return unpad_pkcs7(
+ aes_cbc_decrypt_bytes(base64.b64decode(source), self._KEY, self._IV)).decode()
+
+ def _extract_formats(self, _id, item_type='MUSIC', **kwargs):
+ if url := url_or_none(self._get_playurl(_id, item_type)):
+ return [{
+ 'format_id': '0',
+ 'vcodec': 'none' if item_type == 'MUSIC' else None,
+ 'url': url,
+ 'http_headers': {
+ 'Origin': 'https://www.boomplay.com',
+ 'Referer': 'https://www.boomplay.com',
+ 'X-Boomplay-Ref': 'Boomplay_WEBV1',
+ },
+ **kwargs,
+ }]
+ else:
+ self.raise_no_formats('No formats found')
+
+ def _extract_page_metadata(self, webpage, _id):
+ metadata_div = get_element_by_attribute(
+ 'class', r'[^\'"]*(?<=[\'"\s])summary(?=[\'"\s])[^\'"]*', webpage,
+ tag='div', escape_value=False) or ''
+ metadata_entries = re.findall(r'(?s)(?P