From 1a256e5d562fca110902f8a6e8bec255565c1a4b Mon Sep 17 00:00:00 2001
From: DmitryScaletta <dmitryscaletta@mail.ru>
Date: Tue, 13 Feb 2024 22:57:05 +0300
Subject: [PATCH 01/21] [PromoDJ] Add extractors

---
 yt_dlp/extractor/_extractors.py |  13 +
 yt_dlp/extractor/promodj.py     | 493 ++++++++++++++++++++++++++++++++
 2 files changed, 506 insertions(+)
 create mode 100644 yt_dlp/extractor/promodj.py

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index e7dd34c77..f35eab137 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1529,6 +1529,19 @@ from .prankcast import PrankCastIE, PrankCastPostIE
 from .premiershiprugby import PremiershipRugbyIE
 from .presstv import PressTVIE
 from .projectveritas import ProjectVeritasIE
+from .promodj import (
+    PromoDJPageIE,
+    PromoDJUserIE,
+    PromoDJUserMediaIE,
+    PromoDJUserPagesIE,
+    PromoDJUserPageIE,
+    PromoDJBlogPageIE,
+    PromoDJPlaylistIE,
+    PromoDJIE,
+    PromoDJEmbedIE,
+    PromoDJShortIE,
+    PromoDJRadioIE,
+)
 from .prosiebensat1 import ProSiebenSat1IE
 from .prx import (
     PRXStoryIE,
diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py
new file mode 100644
index 000000000..da9db44ad
--- /dev/null
+++ b/yt_dlp/extractor/promodj.py
@@ -0,0 +1,493 @@
+import datetime
+import functools
+import re
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import (
+    OnDemandPagedList,
+    clean_html,
+    dict_get,
+    extract_attributes,
+    float_or_none,
+    get_element_by_class,
+    get_elements_by_class,
+    int_or_none,
+    parse_duration,
+    str_or_none,
+    traverse_obj,
+    urlencode_postdata,
+    url_or_none,
+)
+
+# promodj.com
+
+# Playlist types:
+# /:login/:media_type - default
+# /:login/groups/:id/:slug - user defined (groups). Can contain audios and/or videos
+
+# A single media by default is attached to default playlist
+# But it can be reattached to a user playlist (group), and no longer appears in the default one
+
+# User pages
+# /:login - all non-empty playlists
+# /:login/music - all non-empty playlists with at least one audio (shows 10 audios per playlist max)
+# /:login/video - all non-empty playlists with at least one video (shows 10 videos per playlist max)
+# /:login/pages - a list of user pages
+# /:login/:page_name - a single user page
+# /:login/blog - a list of blog posts
+# /:login/blog/:id/:slug - a single blog post
+
+# If default playlist is empty, it redirects to the user's page
+# Pages and blog posts can contain: audios, videos, youtube videos
+
+# Tracks and remixes can be paid. See /shop page
+
+
+class PromoDJBaseIE(InfoExtractor):
+    _MEDIA_TYPES = [
+        'tracks',
+        'remixes',
+        'mixes',
+        'promos',
+        'lives',
+        'podcasts',
+        'radioshows',
+        'tools',
+        'realtones',  # doesn't appear on the site menu but still exists
+        'acapellas',  # redirects to /tools, creates default playlist
+        'samples',    # redirects to /tools, doesn't create default playlist
+        'videos',
+    ]
+    _PAGES = ['featured', 'shop', *_MEDIA_TYPES]
+
+    _BASE_URL_RE = r'https?://(?:www\.)?promodj\.com'
+    _MEDIA_TYPES_RE = '|'.join(_MEDIA_TYPES)
+    _NOT_PAGE_RE = '|'.join(['radio', *_PAGES])
+    _LOGIN_RE = rf'(?:(?!{_NOT_PAGE_RE}).)[\w-]+'
+
+    def _set_url_page(self, url, page):
+        parsed_url = urllib.parse.urlparse(url)
+        qs = urllib.parse.parse_qs(parsed_url.query)
+        qs['page'] = page
+        return parsed_url._replace(query=urllib.parse.urlencode(qs, doseq=True)).geturl()
+
+    def _fetch_page(self, url, parsed_media_types, playlist_id, page):
+        page_url = self._set_url_page(url, page + 1)
+        html = self._download_webpage(page_url, f'{playlist_id}-page-{page + 1}')
+        current_page = int(clean_html(get_element_by_class('NavigatorCurrentPage', html)) or '1')
+        if current_page != page + 1:
+            return
+
+        tracks_dump_html = get_element_by_class('tracks_dump', html)
+        for item_html in get_elements_by_class('player_standard', tracks_dump_html):
+            if 'music' in parsed_media_types:
+                a = get_element_by_class('title', item_html)
+            if 'video' in parsed_media_types and not a:
+                a = get_element_by_class('h5videoplayer_promodj_video__title', item_html)
+            if not a:
+                continue
+            if url := traverse_obj(extract_attributes(a), ('href', {url_or_none})):
+                yield self.url_result(url, PromoDJIE)
+
+    def _parse_playlist_links(self, html):
+        PLAYLISTS_RE = r'<a class=\"files_group_title\" href=\"([^\"]+)\">'
+        DEFAULT_VIDEO_PLAYLIST_RE = r'<h5><a href=\"https://promodj\.com/([\w-]+)/video\">Видео</a></h5>'
+
+        playlist_links = []
+
+        for playlist_url in re.findall(PLAYLISTS_RE, html):
+            playlist_links.append(playlist_url)
+
+        login = self._search_regex(
+            DEFAULT_VIDEO_PLAYLIST_RE, html, 'video playlist url', None)
+        if login:
+            playlist_links.append(f'https://promodj.com/{login}/videos')
+
+        return playlist_links
+
+    def _get_playlist_page_size(self, url):
+        is_default_playlist = '/groups/' not in url
+        return 30 if is_default_playlist else 20
+
+    def _fetch_media_data(self, ids, video_id):
+        data = {}
+        for i, id in enumerate(ids):
+            data[f'multi[{i}][method]'] = 'players/config'
+            data[f'multi[{i}][params][kind]'] = 'standalone.big'
+            data[f'multi[{i}][params][fileID]'] = id
+        return self._download_json(
+            'https://promodj.com/api/multi.json', video_id, data=urlencode_postdata(data),
+            headers={'Content-Type': 'application/x-www-form-urlencoded'})
+
+
+class PromoDJPageIE(PromoDJBaseIE):
+    _PAGES_RE = '|'.join(PromoDJBaseIE._PAGES)
+
+    _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<id>{_PAGES_RE})'
+    _TESTS = [{
+        'url': 'https://promodj.com/featured',
+        'only_matching': True,
+    }, {
+        # second page
+        'url': 'https://promodj.com/featured/rap?download=1&page=2',
+        'only_matching': True,
+    }, {
+        # filtered
+        'url': 'https://promodj.com/remixes?top=1',
+        'only_matching': True,
+    }, {
+        # with genre
+        'url': 'https://promodj.com/tracks/hip_hop',
+        'only_matching': True,
+    }, {
+        # with search
+        'url': 'https://promodj.com/mixes?kind=mixes&styleID=&searchfor=dance',
+        'only_matching': True,
+    }, {
+        # no download button
+        'url': 'https://promodj.com/shop',
+        'only_matching': True,
+    }]
+
+    _PAGE_SIZE = 20
+
+    def _real_extract(self, url):
+        page_type = self._match_id(url)
+        return self.playlist_result(
+            OnDemandPagedList(
+                functools.partial(self._fetch_page, url, ['music', 'video'], page_type),
+                self._PAGE_SIZE),
+            playlist_id=page_type)
+
+
+class PromoDJUserIE(PromoDJBaseIE):
+    _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})$'
+    _TESTS = [{
+        'url': 'https://promodj.com/djperetse',
+        'only_matching': True,
+    }, {
+        'url': 'https://promodj.com/dj-trojan',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        login = self._match_valid_url(url).group('login')
+        html = self._download_webpage(url, login)
+
+        def entries():
+            for playlist_url in self._parse_playlist_links(html):
+                yield self.url_result(playlist_url, PromoDJPlaylistIE)
+
+        return self.playlist_result(entries(), playlist_id=login)
+
+
+class PromoDJUserMediaIE(PromoDJBaseIE):
+    _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<type>music|video)$'
+    _TESTS = [{
+        'url': 'https://promodj.com/feel/music',
+        'only_matching': True,
+    }, {
+        'url': 'https://promodj.com/djmikis/video',
+        'only_matching': True,
+    }, {
+        # a user without any videos
+        'url': 'https://promodj.com/worobyev/video',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        login, type = self._match_valid_url(url).groups()
+        page_id = f'{login}-{type}'
+        html = self._download_webpage(url, page_id)
+
+        def entries():
+            for playlist_url in self._parse_playlist_links(html):
+                # TODO: parse only music or videos
+                yield self.url_result(playlist_url, PromoDJPlaylistIE)
+
+        return self.playlist_result(entries(), playlist_id=page_id)
+
+
+class PromoDJUserPagesIE(PromoDJBaseIE):
+    _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<type>(pages|blog))$'
+    _TESTS = [{
+        'url': 'https://promodj.com/djperetse/pages',
+        'only_matching': True,
+    }, {
+        'url': 'https://promodj.com/golub/blog',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        login, type = self._match_valid_url(url).groups()
+
+
+class PromoDJUserPageIE(PromoDJBaseIE):
+    _USER_PAGES = [
+        'pages',
+        'music',
+        'video',
+        'foto',
+        'avisha',
+        'blog',
+        'feedback',
+        'contact',
+        *PromoDJBaseIE._MEDIA_TYPES,
+    ]
+    _NOT_USER_PAGE_RE = '|'.join(_USER_PAGES)
+    _USER_PAGE_RE = rf'(?:(?!{_NOT_USER_PAGE_RE}).)[\w-]+'
+
+    _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<slug>{_USER_PAGE_RE})$'
+    _TESTS = [{
+        'url': 'https://promodj.com/djperetse/MaxMixes',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        login, slug = self._match_valid_url(url).groups()
+
+
+class PromoDJBlogPageIE(PromoDJBaseIE):
+    _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/blog/(?P<id>\d+)(?:/(?P<slug>\w+))?'
+    _TESTS = [{
+        # with small and big audio players and youtube video
+        'url': 'https://promodj.com/golub/blog/1163895/DJ_Andrey_Golubev_To_Depeche_Mode_with_love_part_9_special_dj_edits_mix',
+        'only_matching': True,
+    }, {
+        # with audio and video
+        'url': 'https://promodj.com/svetmusic/blog/1101958/SVET_I_Like_It_Extra_Sound_Recordings',
+        'only_matching': True,
+    }, {
+        # without any media
+        'url': 'https://promodj.com/svetmusic/blog/915878/DJ_SVET_pobeditel_konkursa_Burn_City_Sound',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        login, id, slug = self._match_valid_url(url).groups()
+
+
+class PromoDJPlaylistIE(PromoDJBaseIE):
+    _VALID_URL = [
+        rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<type>{PromoDJBaseIE._MEDIA_TYPES_RE})$',
+        rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<type>groups)/(?P<id>\d+)(?:/(?P<slug>\w+))?',
+    ]
+    _TESTS = [{
+        # default playlist: tracks (audio)
+        'url': 'https://promodj.com/gluk/tracks',
+        'only_matching': True,
+    }, {
+        # default playlist: video
+        'url': 'https://promodj.com/djperetse/videos',
+        'only_matching': True,
+    }, {
+        # user playlist: audio
+        'url': 'https://promodj.com/fonarev/groups/608158/Digital_Emotions_Night',
+        'only_matching': True,
+    }, {
+        # two pages
+        'url': 'https://promodj.com/lavrov/groups/677132/VINYL',
+        'only_matching': True,
+    }, {
+        # user playlist: video
+        'url': 'https://promodj.com/deeplecture/groups/672782/LAROCCA_TV',
+        'only_matching': True,
+    }, {
+        # user playlist: audio and video
+        'url': 'https://promodj.com/djperetse/groups/637358/Russkie_treki',
+        'only_matching': True,
+    }, {
+        # 900+ items
+        'url': 'https://promodj.com/fonarev/groups/17350/Digital_Emotions_Podcast',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        match = self._match_valid_url(url)
+        login = match.group('login')
+        type = match.group('type')
+        playlist_id = f'{login}-{type}' if len(match.groups()) == 2 else f'{login}-{type}-{match.group("id")}'
+        page_size = self._get_playlist_page_size(url)
+
+        entries = OnDemandPagedList(
+            functools.partial(self._fetch_page, url, ['music', 'video'], playlist_id),
+            page_size)
+        return self.playlist_result(entries, playlist_id=playlist_id)
+
+
+class PromoDJIE(PromoDJBaseIE):
+    _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/{PromoDJBaseIE._LOGIN_RE}/(?P<type>{PromoDJBaseIE._MEDIA_TYPES_RE})/(?P<id>\d+)(?:/\w+)?',
+    _TESTS = [{
+        'url': 'https://promodj.com/antonpavlovsky/remixes/6259208/David_Usher_Black_Black_Heart_Anton_Pavlovsky_Cover',
+        'only_matching': True,
+    }, {
+        'url': 'https://promodj.com/j-factory/samples/7560171/Amedici_BW1_Intro',
+        'only_matching': True,
+    }, {
+        # no download links in html
+        'url': 'https://promodj.com/gluk/tracks/4713922/DJ_Glyuk_Folk_ing_DJ_Steven_Smile_Remix_2005',
+        'only_matching': True,
+    }, {
+        # no player
+        'url': 'https://promodj.com/gluk/tracks/420310/IMpulse_Zakat',
+        'only_matching': True,
+    }, {
+        # without slug
+        'url': 'https://promodj.com/djlykov/tracks/7551590',
+        'only_matching': True,
+    }, {
+        # lossless
+        'url': 'https://promodj.com/modi-glu/tracks/6081339/Modi_Glyu_Anabel',
+        'only_matching': True,
+    }, {
+        # paid audio
+        'url': 'https://promodj.com/boyko/tracks/1435682/Dj_Boyko_Katy_Queen_Nad_Oblakami',
+        'only_matching': True,
+    }, {
+        'url': 'https://promodj.com/sergeyfedotov306/videos/7457627/V_Matrice_Sboy',
+        'only_matching': True,
+    }, {
+        'url': 'https://promodj.com/djperetse/videos/5868236/Fatalist_Project_feat_DJ_Peretse_Den_pobedi_Videoklip',
+        'only_matching': True,
+    }]
+
+    _IS_PAID_RE = r'<b>Цена:</b>'
+    # examples: MP3, 320 Кбит | MP4, 20157 Кбит | WAV, 1412 Кбит
+    _FORMATS_RE = r'<a\s+href=\"(?P<url>[^\"]+\.(?:mp3|mp4|wav))\">\s*(?P<format>MP3|MP4|WAV), (?P<bitrate>\d+) Кбит\s*</a>'
+    _VIEW_COUNT_RE = r'<b>(?:Прослушиваний|Просмотров):</b>\s*(\d+)'
+    # examples: 0:21, 1:07, 74:38
+    _DURATION_RE = r'<b>Продолжительность:</b>\s*(\d{1,}:\d{2})'
+    # examples: 818.4 Кб, 12.9 Мб, 4 Гб, 1.76 Гб
+    _SIZE_RE = r'<b>Размер:</b>\s*(?P<size>\d{1,3}(?:\.\d{1,2})?)\s*(?P<unit>Кб|Мб|Гб)'
+    # examples: сегодня 2:55, вчера 23:17, 1 июня 2016 3:46
+    _TIMESTAMP_RE = r'<b>Публикация:</b>\s*(?P<day>вчера|сегодня|\d{1,2})(?: (?P<month>[а-я]+) (?P<year>\d{4}))?\s*(?P<hours>\d{1,2}):(?P<minutes>\d{2})'
+    _TAGS_RE = r'<span\s+class=\"styles\">([^\n]+)</span>'
+
+    def _parse_ru_date(self, raw_date):
+        RU_MONTHS = ['января', 'февраля', 'марта', 'апреля', 'мая', 'июня', 'июля', 'августа', 'сентября', 'октября', 'ноября', 'декабря']
+        day, month, year, hours, minutes = raw_date
+        if day == 'сегодня':
+            d = datetime.date.today()
+            day = d.day
+            month = d.month
+            year = d.year
+        elif day == 'вчера':
+            d = datetime.date.today() - datetime.timedelta(days=1)
+            day = d.day
+            month = d.month
+            year = d.year
+        else:
+            day = int(day)
+            month = RU_MONTHS.index(month) + 1
+            year = int(year)
+        return datetime.datetime(year, month, day, int(hours), int(minutes)).timestamp()
+
+    def _parse_ru_size(self, raw_size):
+        RU_SIZE_UNITS = ['Б', 'Кб', 'Мб', 'Гб']
+        size, size_unit = raw_size
+        return int(float(size) * pow(1024, RU_SIZE_UNITS.index(size_unit)))
+
+    def _parse_media(self, html, id):
+        meta_html = get_element_by_class('clearfix', get_element_by_class('dj_bblock', html))
+
+        is_paid = re.search(self._IS_PAID_RE, meta_html)
+        formats_from_html = re.findall(self._FORMATS_RE, meta_html)
+
+        if is_paid or len(formats_from_html) == 0:
+            media_data_raw = self._search_regex(
+                r'({\"no_preroll\":false,\"seekAny\":true,\"sources\":[^\n]+)\);', html, 'media data')
+            media_data = self._parse_json(media_data_raw, id)
+            formats = [{
+                'url': source.get('URL'),
+                'size': int_or_none(source.get('size')),
+            } for source in traverse_obj(media_data, ('sources')) if url_or_none(source.get('URL'))]
+        else:
+            formats = [{
+                'url': url,
+                'format': format.lower(),
+                'tbr': int(bitrate),
+            } for url, format, bitrate in formats_from_html if url_or_none(url)]
+            # size field describes best quality. best quality always comes first
+            formats[0]['size'] = self._parse_ru_size(re.findall(self._SIZE_RE, meta_html)[0])
+
+        return {
+            'id': id,
+            'title': clean_html(get_element_by_class('file_title', html)),
+            'formats': formats,
+            'view_count': int_or_none(self._search_regex(self._VIEW_COUNT_RE, meta_html, 'view_count')),
+            'duration': parse_duration(self._search_regex(self._DURATION_RE, meta_html, 'duration')),
+            'timestamp': self._parse_ru_date(re.findall(self._TIMESTAMP_RE, meta_html)[0]),
+            'tags': self._html_search_regex(self._TAGS_RE, meta_html, 'tags').split(', '),
+        }
+
+    def _real_extract(self, url):
+        id = self._match_id(url)
+        html = self._download_webpage(url, id)
+        return self._parse_media(html, id)
+
+
+class PromoDJEmbedIE(PromoDJBaseIE):
+    _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/embed/(?P<id>\d+)/(?P<type>cover|big)'
+    _TESTS = [{
+        'url': 'https://promodj.com/embed/7555440/cover',
+        'only_matching': True,
+    }, {
+        'url': 'https://promodj.com/embed/7540163/big',
+        'only_matching': True,
+    }, {
+        # video (can be only big)
+        'url': 'https://promodj.com/embed/3922099/big',
+        'only_matching': True,
+    }]
+
+    def _get_full_url(self, media_data, id):
+        if media_data.get('video'):
+            video_config = self._parse_json(media_data['config'], id)
+            video = traverse_obj(video_config, ('playlist', 'item', 0))
+            return traverse_obj(video, ('title', '@ico_url'))
+        else:
+            return media_data.get('titleURL')
+
+    def _real_extract(self, url):
+        id = self._match_id(url)
+        url = self._get_full_url(self._fetch_media_data([id], id)[0], id)
+        return self.url_result(url, PromoDJIE, id)
+
+
+class PromoDJShortIE(PromoDJBaseIE):
+    _VALID_URL = r'https://pdj.cc/(?P<id>\w+)'
+    _TESTS = [{
+        'url': 'https://pdj.cc/fv8VD',
+        'only_matching': True,
+    }]
+
+    _PAGE_URL_REGEX = r'<meta property="og:url"\s*content="(?P<url>[^"]+)"'
+
+    def _real_extract(self, url):
+        id = self._match_id(url)
+        html = self._download_webpage(url, id)
+        url = re.findall(self._PAGE_URL_REGEX, html)[0]
+        return self.url_result(url, PromoDJIE, id)
+
+
+class PromoDJRadioIE(PromoDJBaseIE):
+    _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/radio#(?P<id>\w+)'
+    _TESTS = [{
+        'url': 'https://promodj.com/radio#dubstep',
+        'only_matching': True,
+    }, {
+        'url': 'https://promodj.com/radio#oldschool',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        id = self._match_id(url)
+        return {
+            'id': id,
+            'formats': [{
+                'url': f'https://radio.promodj.com/{id}-192',
+                'abr': 192,
+            }],
+            'is_live': True,
+        }

From 3416c1a0e88c599ac053946ae22a385381409158 Mon Sep 17 00:00:00 2001
From: DmitryScaletta <dmitryscaletta@mail.ru>
Date: Wed, 14 Feb 2024 00:46:01 +0300
Subject: [PATCH 02/21] [PromoDJ] Add user pages and blogs extractors

---
 yt_dlp/extractor/promodj.py | 66 ++++++++++++++++++++++++++++++++++---
 1 file changed, 62 insertions(+), 4 deletions(-)

diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py
index da9db44ad..e9820b60d 100644
--- a/yt_dlp/extractor/promodj.py
+++ b/yt_dlp/extractor/promodj.py
@@ -4,17 +4,16 @@ import re
 import urllib.parse
 
 from .common import InfoExtractor
+from .youtube import YoutubeIE
 from ..utils import (
     OnDemandPagedList,
     clean_html,
-    dict_get,
     extract_attributes,
-    float_or_none,
+    ExtractorError,
     get_element_by_class,
     get_elements_by_class,
     int_or_none,
     parse_duration,
-    str_or_none,
     traverse_obj,
     urlencode_postdata,
     url_or_none,
@@ -106,6 +105,14 @@ class PromoDJBaseIE(InfoExtractor):
 
         return playlist_links
 
+    def _parse_page_content(self, html):
+        for id in re.findall(r'CORE\.Player\(\'[^\']+\', \'(?:standalone|cover)\.big\', (\d+),', html):
+            yield self.url_result(f'https://promodj.com/embed/{id}/big', PromoDJEmbedIE, id)
+
+        for iframe_url in re.findall(r'<iframe[^>]+src=\"([^\"]+)\"', html):
+            if YoutubeIE.suitable(iframe_url):
+                yield self.url_result(iframe_url, YoutubeIE)
+
     def _get_playlist_page_size(self, url):
         is_default_playlist = '/groups/' not in url
         return 30 if is_default_playlist else 20
@@ -210,7 +217,7 @@ class PromoDJUserMediaIE(PromoDJBaseIE):
 
 
 class PromoDJUserPagesIE(PromoDJBaseIE):
-    _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<type>(pages|blog))$'
+    _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<type>pages|blog)$'
     _TESTS = [{
         'url': 'https://promodj.com/djperetse/pages',
         'only_matching': True,
@@ -219,8 +226,38 @@ class PromoDJUserPagesIE(PromoDJBaseIE):
         'only_matching': True,
     }]
 
+    _PAGE_SIZE = 10
+
+    def _parse_pages(self, url, playlist_id):
+        html = self._download_webpage(url, playlist_id)
+        content_html = get_element_by_class('dj_universal', get_element_by_class('dj_bblock', html))
+        print(re.findall(r'<a href=\"([^\"]+)\">([^<]+)</a>', content_html))
+        for page_url, page_title in re.findall(r'<a href=\"([^\"]+)\">([^<]+)</a>', content_html):
+            yield self.url_result(page_url, PromoDJUserPageIE, video_title=page_title)
+
+    def _fetch_blog_page(self, url, playlist_id, page):
+        page_url = self._set_url_page(url, page + 1)
+        html = self._download_webpage(page_url, f'{playlist_id}-page-{page + 1}')
+        current_page = int(clean_html(get_element_by_class('NavigatorCurrentPage', html)) or '1')
+        if current_page != page + 1:
+            return
+
+        for a in get_elements_by_class('post_title', html):
+            if not a:
+                continue
+            if url := traverse_obj(extract_attributes(a), ('href', {url_or_none})):
+                yield self.url_result(url, PromoDJBlogPageIE)
+
     def _real_extract(self, url):
         login, type = self._match_valid_url(url).groups()
+        playlist_id = f'{login}-{type}'
+        if type == 'pages':
+            entries = self._parse_pages(url, playlist_id)
+        elif type == 'blog':
+            entries = OnDemandPagedList(
+                functools.partial(self._fetch_blog_page, url, playlist_id),
+                self._PAGE_SIZE)
+        return self.playlist_result(entries, playlist_id)
 
 
 class PromoDJUserPageIE(PromoDJBaseIE):
@@ -246,6 +283,11 @@ class PromoDJUserPageIE(PromoDJBaseIE):
 
     def _real_extract(self, url):
         login, slug = self._match_valid_url(url).groups()
+        page_id = f'{login}-{slug}'
+        html = self._download_webpage(url, page_id)
+        content_html = get_element_by_class('perfect', html)
+        return self.playlist_result(
+            self._parse_page_content(content_html), playlist_id=page_id)
 
 
 class PromoDJBlogPageIE(PromoDJBaseIE):
@@ -266,6 +308,11 @@ class PromoDJBlogPageIE(PromoDJBaseIE):
 
     def _real_extract(self, url):
         login, id, slug = self._match_valid_url(url).groups()
+        page_id = f'{login}-blog-{id}-{slug}'
+        html = self._download_webpage(url, page_id)
+        content_html = get_element_by_class('post_body', html)
+        return self.playlist_result(
+            self._parse_page_content(content_html), playlist_id=page_id)
 
 
 class PromoDJPlaylistIE(PromoDJBaseIE):
@@ -439,9 +486,20 @@ class PromoDJEmbedIE(PromoDJBaseIE):
         # video (can be only big)
         'url': 'https://promodj.com/embed/3922099/big',
         'only_matching': True,
+    }, {
+        # blocked
+        'url': 'https://promodj.com/embed/5586967/big',
+        'only_matching': True,
+    }, {
+        # deleted
+        'url': 'https://promodj.com/embed/5606804/big',
+        'only_matching': True,
     }]
 
     def _get_full_url(self, media_data, id):
+        if player_error := media_data.get('player_error'):
+            raise ExtractorError(player_error, expected=True)
+
         if media_data.get('video'):
             video_config = self._parse_json(media_data['config'], id)
             video = traverse_obj(video_config, ('playlist', 'item', 0))

From ca0be3f1c1e3d452e690e37263b734e788f3438c Mon Sep 17 00:00:00 2001
From: DmitryScaletta <dmitryscaletta@mail.ru>
Date: Wed, 14 Feb 2024 04:36:08 +0300
Subject: [PATCH 03/21] [PromoDJ] Improve extractors

---
 yt_dlp/extractor/_extractors.py |   2 +
 yt_dlp/extractor/promodj.py     | 110 +++++++++++++++++++++-----------
 2 files changed, 76 insertions(+), 36 deletions(-)

diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index f35eab137..59873f479 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1537,6 +1537,8 @@ from .promodj import (
     PromoDJUserPageIE,
     PromoDJBlogPageIE,
     PromoDJPlaylistIE,
+    PromoDJMusicPlaylistIE,
+    PromoDJVideoPlaylistIE,
     PromoDJIE,
     PromoDJEmbedIE,
     PromoDJShortIE,
diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py
index e9820b60d..f58be2893 100644
--- a/yt_dlp/extractor/promodj.py
+++ b/yt_dlp/extractor/promodj.py
@@ -11,7 +11,7 @@ from ..utils import (
     extract_attributes,
     ExtractorError,
     get_element_by_class,
-    get_elements_by_class,
+    get_elements_html_by_class,
     int_or_none,
     parse_duration,
     traverse_obj,
@@ -63,7 +63,7 @@ class PromoDJBaseIE(InfoExtractor):
     _BASE_URL_RE = r'https?://(?:www\.)?promodj\.com'
     _MEDIA_TYPES_RE = '|'.join(_MEDIA_TYPES)
     _NOT_PAGE_RE = '|'.join(['radio', *_PAGES])
-    _LOGIN_RE = rf'(?:(?!{_NOT_PAGE_RE}).)[\w-]+'
+    _LOGIN_RE = rf'(?:(?!{_NOT_PAGE_RE}).)[\w.-]+'
 
     def _set_url_page(self, url, page):
         parsed_url = urllib.parse.urlparse(url)
@@ -71,27 +71,25 @@ class PromoDJBaseIE(InfoExtractor):
         qs['page'] = page
         return parsed_url._replace(query=urllib.parse.urlencode(qs, doseq=True)).geturl()
 
-    def _fetch_page(self, url, parsed_media_types, playlist_id, page):
+    def _fetch_page(self, url, media_types, playlist_id, page):
         page_url = self._set_url_page(url, page + 1)
         html = self._download_webpage(page_url, f'{playlist_id}-page-{page + 1}')
         current_page = int(clean_html(get_element_by_class('NavigatorCurrentPage', html)) or '1')
         if current_page != page + 1:
             return
 
-        tracks_dump_html = get_element_by_class('tracks_dump', html)
-        for item_html in get_elements_by_class('player_standard', tracks_dump_html):
-            if 'music' in parsed_media_types:
-                a = get_element_by_class('title', item_html)
-            if 'video' in parsed_media_types and not a:
-                a = get_element_by_class('h5videoplayer_promodj_video__title', item_html)
-            if not a:
+        for a in get_elements_html_by_class('player_standard_tool__play', html):
+            url = traverse_obj(extract_attributes(a), ('href', {url_or_none}))
+            if not url:
                 continue
-            if url := traverse_obj(extract_attributes(a), ('href', {url_or_none})):
+            url = url.replace('?play=1', '')
+            is_video = '/videos/' in url
+            if is_video and 'video' in media_types or not is_video and 'music' in media_types:
                 yield self.url_result(url, PromoDJIE)
 
     def _parse_playlist_links(self, html):
         PLAYLISTS_RE = r'<a class=\"files_group_title\" href=\"([^\"]+)\">'
-        DEFAULT_VIDEO_PLAYLIST_RE = r'<h5><a href=\"https://promodj\.com/([\w-]+)/video\">Видео</a></h5>'
+        DEFAULT_VIDEO_PLAYLIST_RE = r'<h5><a href=\"https://promodj\.com/([\w.-]+)/video\">Видео</a></h5>'
 
         playlist_links = []
 
@@ -210,8 +208,8 @@ class PromoDJUserMediaIE(PromoDJBaseIE):
 
         def entries():
             for playlist_url in self._parse_playlist_links(html):
-                # TODO: parse only music or videos
-                yield self.url_result(playlist_url, PromoDJPlaylistIE)
+                ie = PromoDJMusicPlaylistIE if type == 'music' else PromoDJVideoPlaylistIE
+                yield self.url_result(playlist_url, ie)
 
         return self.playlist_result(entries(), playlist_id=page_id)
 
@@ -231,7 +229,6 @@ class PromoDJUserPagesIE(PromoDJBaseIE):
     def _parse_pages(self, url, playlist_id):
         html = self._download_webpage(url, playlist_id)
         content_html = get_element_by_class('dj_universal', get_element_by_class('dj_bblock', html))
-        print(re.findall(r'<a href=\"([^\"]+)\">([^<]+)</a>', content_html))
         for page_url, page_title in re.findall(r'<a href=\"([^\"]+)\">([^<]+)</a>', content_html):
             yield self.url_result(page_url, PromoDJUserPageIE, video_title=page_title)
 
@@ -242,9 +239,7 @@ class PromoDJUserPagesIE(PromoDJBaseIE):
         if current_page != page + 1:
             return
 
-        for a in get_elements_by_class('post_title', html):
-            if not a:
-                continue
+        for a in get_elements_html_by_class('post_title_moderated', html):
             if url := traverse_obj(extract_attributes(a), ('href', {url_or_none})):
                 yield self.url_result(url, PromoDJBlogPageIE)
 
@@ -350,6 +345,8 @@ class PromoDJPlaylistIE(PromoDJBaseIE):
         'only_matching': True,
     }]
 
+    _MEDIA_TYPES = ['music', 'video']
+
     def _real_extract(self, url):
         match = self._match_valid_url(url)
         login = match.group('login')
@@ -358,11 +355,19 @@ class PromoDJPlaylistIE(PromoDJBaseIE):
         page_size = self._get_playlist_page_size(url)
 
         entries = OnDemandPagedList(
-            functools.partial(self._fetch_page, url, ['music', 'video'], playlist_id),
+            functools.partial(self._fetch_page, url, self._MEDIA_TYPES, playlist_id),
             page_size)
         return self.playlist_result(entries, playlist_id=playlist_id)
 
 
+class PromoDJMusicPlaylistIE(PromoDJPlaylistIE):
+    _MEDIA_TYPES = ['music']
+
+
+class PromoDJVideoPlaylistIE(PromoDJPlaylistIE):
+    _MEDIA_TYPES = ['video']
+
+
 class PromoDJIE(PromoDJBaseIE):
     _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/{PromoDJBaseIE._LOGIN_RE}/(?P<type>{PromoDJBaseIE._MEDIA_TYPES_RE})/(?P<id>\d+)(?:/\w+)?',
     _TESTS = [{
@@ -372,9 +377,13 @@ class PromoDJIE(PromoDJBaseIE):
         'url': 'https://promodj.com/j-factory/samples/7560171/Amedici_BW1_Intro',
         'only_matching': True,
     }, {
-        # no download links in html
+        # music: no download links in html
         'url': 'https://promodj.com/gluk/tracks/4713922/DJ_Glyuk_Folk_ing_DJ_Steven_Smile_Remix_2005',
         'only_matching': True,
+    }, {
+        # video: no download link in html
+        'url': 'https://promodj.com/psywanderer/videos/7559147/Chu_de_sa',
+        'only_matching': True,
     }, {
         # no player
         'url': 'https://promodj.com/gluk/tracks/420310/IMpulse_Zakat',
@@ -397,20 +406,37 @@ class PromoDJIE(PromoDJBaseIE):
     }, {
         'url': 'https://promodj.com/djperetse/videos/5868236/Fatalist_Project_feat_DJ_Peretse_Den_pobedi_Videoklip',
         'only_matching': True,
+    }, {
+        # avi
+        'url': 'https://promodj.com/djmikis/videos/5311597/Mikis_Live_SDJ_Show',
+        'only_matching': True,
+    }, {
+        # asf
+        'url': 'https://promodj.com/gigsiphonic/videos/7559341/Gigsiphonic_PODCAST_309_Extended_video_version',
+        'only_matching': True,
+    }, {
+        # not valid html
+        'url': 'https://promodj.com/martin.sehnal/videos/7555841/Martin_Sehnal_CII_33_Plus_CII_32_Clothes_on_the_peg_2_020_2_024_02_01th',
+        'only_matching': True,
     }]
 
     _IS_PAID_RE = r'<b>Цена:</b>'
-    # examples: MP3, 320 Кбит | MP4, 20157 Кбит | WAV, 1412 Кбит
-    _FORMATS_RE = r'<a\s+href=\"(?P<url>[^\"]+\.(?:mp3|mp4|wav))\">\s*(?P<format>MP3|MP4|WAV), (?P<bitrate>\d+) Кбит\s*</a>'
+    # examples: MP3, 320 Кбит | MP4, 20157 Кбит | WAV, 1412 Кбит | AVI, 1731 Кбит | ASF, 6905 Кбит
+    _FORMATS_RE = r'<a\s+href=\"(?P<url>[^\"]+)\">\s*(?P<format>\w+), (?P<bitrate>\d+) Кбит\s*</a>'
     _VIEW_COUNT_RE = r'<b>(?:Прослушиваний|Просмотров):</b>\s*(\d+)'
-    # examples: 0:21, 1:07, 74:38
-    _DURATION_RE = r'<b>Продолжительность:</b>\s*(\d{1,}:\d{2})'
-    # examples: 818.4 Кб, 12.9 Мб, 4 Гб, 1.76 Гб
-    _SIZE_RE = r'<b>Размер:</b>\s*(?P<size>\d{1,3}(?:\.\d{1,2})?)\s*(?P<unit>Кб|Мб|Гб)'
-    # examples: сегодня 2:55, вчера 23:17, 1 июня 2016 3:46
+    # examples: 0:21 | 1:07 | 74:38
+    _DURATION_RE = r'<b>Продолжительность:</b>\s*(\d+:\d{2})'
+    # examples: 818.4 Кб | 12.9 Мб | 4 Гб | 1.76 Гб | 1001.5 Мб
+    _SIZE_RE = r'<b>Размер:</b>\s*(?P<size>\d+(?:\.\d+)?)\s*(?P<unit>Кб|Мб|Гб)'
+    # examples: сегодня 2:55 | вчера 23:17 | 1 июня 2016 3:46
     _TIMESTAMP_RE = r'<b>Публикация:</b>\s*(?P<day>вчера|сегодня|\d{1,2})(?: (?P<month>[а-я]+) (?P<year>\d{4}))?\s*(?P<hours>\d{1,2}):(?P<minutes>\d{2})'
     _TAGS_RE = r'<span\s+class=\"styles\">([^\n]+)</span>'
 
+    # https://regex101.com/r/2ZkUmW/1
+    _MUSIC_DATA_REGEX = r'({\"no_preroll\":false,\"seekAny\":true,\"sources\":[^\n]+)\);'
+    # https://regex101.com/r/b9utBf/1
+    _VIDEO_DATA_REGEX = r'({\"video\":true,\"config\":[^\n]+)\);'
+
     def _parse_ru_date(self, raw_date):
         RU_MONTHS = ['января', 'февраля', 'марта', 'апреля', 'мая', 'июня', 'июля', 'августа', 'сентября', 'октября', 'ноября', 'декабря']
         day, month, year, hours, minutes = raw_date
@@ -435,20 +461,32 @@ class PromoDJIE(PromoDJBaseIE):
         size, size_unit = raw_size
         return int(float(size) * pow(1024, RU_SIZE_UNITS.index(size_unit)))
 
-    def _parse_media(self, html, id):
-        meta_html = get_element_by_class('clearfix', get_element_by_class('dj_bblock', html))
+    def _parse_media(self, html, id, type):
+        # html can be invalid
+        try:
+            meta_html = get_elements_html_by_class('dj_universal', html)[1]
+        except Exception:
+            meta_html = html
 
-        is_paid = re.search(self._IS_PAID_RE, meta_html)
         formats_from_html = re.findall(self._FORMATS_RE, meta_html)
+        has_formats = len(formats_from_html) != 0
+        is_paid = re.search(self._IS_PAID_RE, meta_html)
 
-        if is_paid or len(formats_from_html) == 0:
-            media_data_raw = self._search_regex(
-                r'({\"no_preroll\":false,\"seekAny\":true,\"sources\":[^\n]+)\);', html, 'media data')
+        if not has_formats and is_paid:
+            media_data_raw = self._search_regex(self._MUSIC_DATA_REGEX, html, 'media data')
             media_data = self._parse_json(media_data_raw, id)
             formats = [{
                 'url': source.get('URL'),
                 'size': int_or_none(source.get('size')),
             } for source in traverse_obj(media_data, ('sources')) if url_or_none(source.get('URL'))]
+        elif not has_formats and type == 'videos':
+            media_data_raw = self._search_regex(self._VIDEO_DATA_REGEX, html, 'media data')
+            media_data = self._parse_json(media_data_raw, id)
+            video_config = self._parse_json(media_data['config'], id)
+            video = traverse_obj(video_config, ('playlist', 'item', 0))
+            formats = [{
+                'url': traverse_obj(video, ('play', '@url', {url_or_none})),
+            }]
         else:
             formats = [{
                 'url': url,
@@ -462,16 +500,16 @@ class PromoDJIE(PromoDJBaseIE):
             'id': id,
             'title': clean_html(get_element_by_class('file_title', html)),
             'formats': formats,
-            'view_count': int_or_none(self._search_regex(self._VIEW_COUNT_RE, meta_html, 'view_count')),
+            'view_count': int_or_none(self._search_regex(self._VIEW_COUNT_RE, meta_html, 'view_count', default=None)),
             'duration': parse_duration(self._search_regex(self._DURATION_RE, meta_html, 'duration')),
             'timestamp': self._parse_ru_date(re.findall(self._TIMESTAMP_RE, meta_html)[0]),
             'tags': self._html_search_regex(self._TAGS_RE, meta_html, 'tags').split(', '),
         }
 
     def _real_extract(self, url):
-        id = self._match_id(url)
+        type, id = self._match_valid_url(url).groups()
         html = self._download_webpage(url, id)
-        return self._parse_media(html, id)
+        return self._parse_media(html, id, type)
 
 
 class PromoDJEmbedIE(PromoDJBaseIE):

From 13f116fce62ddab5a77ba5ba2d5c935e1acdb201 Mon Sep 17 00:00:00 2001
From: DmitryScaletta <dmitryscaletta@mail.ru>
Date: Wed, 14 Feb 2024 04:40:18 +0300
Subject: [PATCH 04/21] [PromoDJ] Rename media_types to allowed_media_cats

---
 yt_dlp/extractor/promodj.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py
index f58be2893..ccfa50d05 100644
--- a/yt_dlp/extractor/promodj.py
+++ b/yt_dlp/extractor/promodj.py
@@ -71,7 +71,7 @@ class PromoDJBaseIE(InfoExtractor):
         qs['page'] = page
         return parsed_url._replace(query=urllib.parse.urlencode(qs, doseq=True)).geturl()
 
-    def _fetch_page(self, url, media_types, playlist_id, page):
+    def _fetch_page(self, url, allowed_media_cats, playlist_id, page):
         page_url = self._set_url_page(url, page + 1)
         html = self._download_webpage(page_url, f'{playlist_id}-page-{page + 1}')
         current_page = int(clean_html(get_element_by_class('NavigatorCurrentPage', html)) or '1')
@@ -84,7 +84,7 @@ class PromoDJBaseIE(InfoExtractor):
                 continue
             url = url.replace('?play=1', '')
             is_video = '/videos/' in url
-            if is_video and 'video' in media_types or not is_video and 'music' in media_types:
+            if is_video and 'video' in allowed_media_cats or not is_video and 'music' in allowed_media_cats:
                 yield self.url_result(url, PromoDJIE)
 
     def _parse_playlist_links(self, html):
@@ -345,7 +345,7 @@ class PromoDJPlaylistIE(PromoDJBaseIE):
         'only_matching': True,
     }]
 
-    _MEDIA_TYPES = ['music', 'video']
+    _ALLOWED_MEDIA_CATS = ['music', 'video']
 
     def _real_extract(self, url):
         match = self._match_valid_url(url)
@@ -355,17 +355,17 @@ class PromoDJPlaylistIE(PromoDJBaseIE):
         page_size = self._get_playlist_page_size(url)
 
         entries = OnDemandPagedList(
-            functools.partial(self._fetch_page, url, self._MEDIA_TYPES, playlist_id),
+            functools.partial(self._fetch_page, url, self._ALLOWED_MEDIA_CATS, playlist_id),
             page_size)
         return self.playlist_result(entries, playlist_id=playlist_id)
 
 
 class PromoDJMusicPlaylistIE(PromoDJPlaylistIE):
-    _MEDIA_TYPES = ['music']
+    _ALLOWED_MEDIA_CATS = ['music']
 
 
 class PromoDJVideoPlaylistIE(PromoDJPlaylistIE):
-    _MEDIA_TYPES = ['video']
+    _ALLOWED_MEDIA_CATS = ['video']
 
 
 class PromoDJIE(PromoDJBaseIE):

From a634e7c5d1b288b145d8acaeb947a64790f368e7 Mon Sep 17 00:00:00 2001
From: DmitryScaletta <dmitryscaletta@mail.ru>
Date: Wed, 14 Feb 2024 14:00:03 +0300
Subject: [PATCH 05/21] [PromoDJ] Some refactoring

---
 yt_dlp/extractor/promodj.py | 34 ++++++++++++++++------------------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py
index ccfa50d05..b63113c30 100644
--- a/yt_dlp/extractor/promodj.py
+++ b/yt_dlp/extractor/promodj.py
@@ -74,8 +74,7 @@ class PromoDJBaseIE(InfoExtractor):
     def _fetch_page(self, url, allowed_media_cats, playlist_id, page):
         page_url = self._set_url_page(url, page + 1)
         html = self._download_webpage(page_url, f'{playlist_id}-page-{page + 1}')
-        current_page = int(clean_html(get_element_by_class('NavigatorCurrentPage', html)) or '1')
-        if current_page != page + 1:
+        if self._get_current_page(html) != page + 1:
             return
 
         for a in get_elements_html_by_class('player_standard_tool__play', html):
@@ -115,6 +114,9 @@ class PromoDJBaseIE(InfoExtractor):
         is_default_playlist = '/groups/' not in url
         return 30 if is_default_playlist else 20
 
+    def _get_current_page(self, html):
+        return int(clean_html(get_element_by_class('NavigatorCurrentPage', html)) or '1')
+
     def _fetch_media_data(self, ids, video_id):
         data = {}
         for i, id in enumerate(ids):
@@ -232,11 +234,10 @@ class PromoDJUserPagesIE(PromoDJBaseIE):
         for page_url, page_title in re.findall(r'<a href=\"([^\"]+)\">([^<]+)</a>', content_html):
             yield self.url_result(page_url, PromoDJUserPageIE, video_title=page_title)
 
-    def _fetch_blog_page(self, url, playlist_id, page):
+    def _fetch_blogs_page(self, url, playlist_id, page):
         page_url = self._set_url_page(url, page + 1)
         html = self._download_webpage(page_url, f'{playlist_id}-page-{page + 1}')
-        current_page = int(clean_html(get_element_by_class('NavigatorCurrentPage', html)) or '1')
-        if current_page != page + 1:
+        if self._get_current_page(html) != page + 1:
             return
 
         for a in get_elements_html_by_class('post_title_moderated', html):
@@ -250,7 +251,7 @@ class PromoDJUserPagesIE(PromoDJBaseIE):
             entries = self._parse_pages(url, playlist_id)
         elif type == 'blog':
             entries = OnDemandPagedList(
-                functools.partial(self._fetch_blog_page, url, playlist_id),
+                functools.partial(self._fetch_blogs_page, url, playlist_id),
                 self._PAGE_SIZE)
         return self.playlist_result(entries, playlist_id)
 
@@ -472,14 +473,7 @@ class PromoDJIE(PromoDJBaseIE):
         has_formats = len(formats_from_html) != 0
         is_paid = re.search(self._IS_PAID_RE, meta_html)
 
-        if not has_formats and is_paid:
-            media_data_raw = self._search_regex(self._MUSIC_DATA_REGEX, html, 'media data')
-            media_data = self._parse_json(media_data_raw, id)
-            formats = [{
-                'url': source.get('URL'),
-                'size': int_or_none(source.get('size')),
-            } for source in traverse_obj(media_data, ('sources')) if url_or_none(source.get('URL'))]
-        elif not has_formats and type == 'videos':
+        if not has_formats and type == 'videos':
             media_data_raw = self._search_regex(self._VIDEO_DATA_REGEX, html, 'media data')
             media_data = self._parse_json(media_data_raw, id)
             video_config = self._parse_json(media_data['config'], id)
@@ -487,6 +481,13 @@ class PromoDJIE(PromoDJBaseIE):
             formats = [{
                 'url': traverse_obj(video, ('play', '@url', {url_or_none})),
             }]
+        elif not has_formats or is_paid:
+            media_data_raw = self._search_regex(self._MUSIC_DATA_REGEX, html, 'media data')
+            media_data = self._parse_json(media_data_raw, id)
+            formats = [{
+                'url': source.get('URL'),
+                'size': int_or_none(source.get('size')),
+            } for source in traverse_obj(media_data, ('sources')) if url_or_none(source.get('URL'))]
         else:
             formats = [{
                 'url': url,
@@ -558,13 +559,10 @@ class PromoDJShortIE(PromoDJBaseIE):
         'only_matching': True,
     }]
 
-    _PAGE_URL_REGEX = r'<meta property="og:url"\s*content="(?P<url>[^"]+)"'
-
     def _real_extract(self, url):
         id = self._match_id(url)
         html = self._download_webpage(url, id)
-        url = re.findall(self._PAGE_URL_REGEX, html)[0]
-        return self.url_result(url, PromoDJIE, id)
+        return self.url_result(self._og_search_url(html), PromoDJIE, id)
 
 
 class PromoDJRadioIE(PromoDJBaseIE):

From 85b739eba7f4d667c3de941d6477ccb697ce212f Mon Sep 17 00:00:00 2001
From: DmitryScaletta <dmitryscaletta@mail.ru>
Date: Wed, 14 Feb 2024 16:23:17 +0300
Subject: [PATCH 06/21] [PromoDJ] Update media info extraction logic

---
 yt_dlp/extractor/promodj.py | 139 ++++++++++++++++++++++++------------
 1 file changed, 93 insertions(+), 46 deletions(-)

diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py
index b63113c30..937fa56c9 100644
--- a/yt_dlp/extractor/promodj.py
+++ b/yt_dlp/extractor/promodj.py
@@ -8,12 +8,17 @@ from .youtube import YoutubeIE
 from ..utils import (
     OnDemandPagedList,
     clean_html,
+    dict_get,
     extract_attributes,
     ExtractorError,
+    float_or_none,
     get_element_by_class,
     get_elements_html_by_class,
     int_or_none,
+    js_to_json,
+    merge_dicts,
     parse_duration,
+    str_or_none,
     traverse_obj,
     urlencode_postdata,
     url_or_none,
@@ -127,6 +132,44 @@ class PromoDJBaseIE(InfoExtractor):
             'https://promodj.com/api/multi.json', video_id, data=urlencode_postdata(data),
             headers={'Content-Type': 'application/x-www-form-urlencoded'})
 
+    def _parse_media_data(self, media_data, id):
+        if player_error := media_data.get('player_error'):
+            raise ExtractorError(player_error, expected=True)
+
+        if media_data.get('video'):
+            video = traverse_obj(
+                self._parse_json(media_data['config'], id), ('playlist', 'item', 0))
+            formats = [{
+                'url': traverse_obj(video, ('play', '@url', {url_or_none})),
+                **traverse_obj(media_data, {
+                    'width': ('width', {int_or_none}),
+                    'height': ('height', {int_or_none}),
+                })
+            }]
+            return {
+                'id': id,
+                'formats': formats,
+                **traverse_obj(video, {
+                    'title': ('title', 'line', 1, 0, '$', {str_or_none}),
+                    'webpage_url': ('title', '@ico_url', {url_or_none}),
+                    'duration': ('play', '@duration', {int_or_none}),
+                    'thumbnail': ('background', '@url', {url_or_none}),
+                    'channel': ('title', 'line', 0, 0, '$', {str_or_none}),
+                    'channel_url': ('title', 'line', 0, 0, '@url', {url_or_none}),
+                })
+            }
+
+        formats = [traverse_obj(source, {
+            'url': ('URL', {url_or_none}),
+            'size': ('size', {int_or_none}),
+        }) for source in traverse_obj(media_data, ('sources'))]
+        return {
+            'id': id,
+            'title': clean_html(dict_get(media_data, ('title_html', 'title'))),
+            'formats': formats,
+            'webpage_url': traverse_obj(media_data, ('titleURL', {url_or_none}))
+        }
+
 
 class PromoDJPageIE(PromoDJBaseIE):
     _PAGES_RE = '|'.join(PromoDJBaseIE._PAGES)
@@ -394,13 +437,21 @@ class PromoDJIE(PromoDJBaseIE):
         'url': 'https://promodj.com/djlykov/tracks/7551590',
         'only_matching': True,
     }, {
-        # lossless
+        # lossless wav
         'url': 'https://promodj.com/modi-glu/tracks/6081339/Modi_Glyu_Anabel',
         'only_matching': True,
     }, {
-        # paid audio
+        # lossless flac
+        'url': 'https://promodj.com/sashaorbeat/mixes/7422493/Sasha_Orbeat_Pure_Love_3',
+        'only_matching': True,
+    }, {
+        # paid lossless
         'url': 'https://promodj.com/boyko/tracks/1435682/Dj_Boyko_Katy_Queen_Nad_Oblakami',
         'only_matching': True,
+    }, {
+        # paid lossy
+        'url': 'https://promodj.com/tesla/tracks/342938/Library_Of_Bugs',
+        'only_matching': True,
     }, {
         'url': 'https://promodj.com/sergeyfedotov306/videos/7457627/V_Matrice_Sboy',
         'only_matching': True,
@@ -422,8 +473,9 @@ class PromoDJIE(PromoDJBaseIE):
     }]
 
     _IS_PAID_RE = r'<b>Цена:</b>'
-    # examples: MP3, 320 Кбит | MP4, 20157 Кбит | WAV, 1412 Кбит | AVI, 1731 Кбит | ASF, 6905 Кбит
-    _FORMATS_RE = r'<a\s+href=\"(?P<url>[^\"]+)\">\s*(?P<format>\w+), (?P<bitrate>\d+) Кбит\s*</a>'
+    # examples: MP3, 320 Кбит | MP4, 20157 Кбит | WAV, 1412 Кбит | AVI, 1731 Кбит | ASF, 6905 Кбит | FLAC, 1509 Кбит
+    # https://regex101.com/r/2AuaxB/1
+    _FORMATS_RE = r'(?:<a\s+href=\"(?P<url>[^\"]+)\">)?\s*(?P<format>\w+), (?P<bitrate>\d+) Кбит'
     _VIEW_COUNT_RE = r'<b>(?:Прослушиваний|Просмотров):</b>\s*(\d+)'
     # examples: 0:21 | 1:07 | 74:38
     _DURATION_RE = r'<b>Продолжительность:</b>\s*(\d+:\d{2})'
@@ -463,49 +515,50 @@ class PromoDJIE(PromoDJBaseIE):
         return int(float(size) * pow(1024, RU_SIZE_UNITS.index(size_unit)))
 
     def _parse_media(self, html, id, type):
+        # videos always have one format
+        # audios can have one or two formats
+
+        # always returns only one format
+        # if audio has two formats, returns only lossy
+        media_data = self._search_json(
+            '', html, 'media data', id,
+            contains_pattern=self._VIDEO_DATA_REGEX if type == 'videos' else self._MUSIC_DATA_REGEX,
+            transform_source=js_to_json)
+        metadata = self._parse_media_data(media_data, id)
+
         # html can be invalid
         try:
             meta_html = get_elements_html_by_class('dj_universal', html)[1]
         except Exception:
             meta_html = html
 
+        # returns one or two formats but sometimes without download links
+        # best quality always comes first
         formats_from_html = re.findall(self._FORMATS_RE, meta_html)
-        has_formats = len(formats_from_html) != 0
         is_paid = re.search(self._IS_PAID_RE, meta_html)
+        bitrate_key = 'tbr' if type == 'videos' else 'abr'
+        for i, match in enumerate(formats_from_html):
+            url, _, bitrate = match
+            is_last = i == len(formats_from_html) - 1
+            if is_last:
+                metadata['formats'][0][bitrate_key] = int(bitrate)
+            elif url_or_none(url) and not is_paid:
+                metadata['formats'].append({
+                    'url': url,
+                    bitrate_key: int(bitrate),
+                })
 
-        if not has_formats and type == 'videos':
-            media_data_raw = self._search_regex(self._VIDEO_DATA_REGEX, html, 'media data')
-            media_data = self._parse_json(media_data_raw, id)
-            video_config = self._parse_json(media_data['config'], id)
-            video = traverse_obj(video_config, ('playlist', 'item', 0))
-            formats = [{
-                'url': traverse_obj(video, ('play', '@url', {url_or_none})),
-            }]
-        elif not has_formats or is_paid:
-            media_data_raw = self._search_regex(self._MUSIC_DATA_REGEX, html, 'media data')
-            media_data = self._parse_json(media_data_raw, id)
-            formats = [{
-                'url': source.get('URL'),
-                'size': int_or_none(source.get('size')),
-            } for source in traverse_obj(media_data, ('sources')) if url_or_none(source.get('URL'))]
-        else:
-            formats = [{
-                'url': url,
-                'format': format.lower(),
-                'tbr': int(bitrate),
-            } for url, format, bitrate in formats_from_html if url_or_none(url)]
-            # size field describes best quality. best quality always comes first
-            formats[0]['size'] = self._parse_ru_size(re.findall(self._SIZE_RE, meta_html)[0])
+        # size field describes best quality
+        size = self._parse_ru_size(re.search(self._SIZE_RE, meta_html).groups())
+        metadata['formats'][-1]['size'] = size
 
-        return {
-            'id': id,
+        return merge_dicts(metadata, {
             'title': clean_html(get_element_by_class('file_title', html)),
-            'formats': formats,
             'view_count': int_or_none(self._search_regex(self._VIEW_COUNT_RE, meta_html, 'view_count', default=None)),
             'duration': parse_duration(self._search_regex(self._DURATION_RE, meta_html, 'duration')),
             'timestamp': self._parse_ru_date(re.findall(self._TIMESTAMP_RE, meta_html)[0]),
             'tags': self._html_search_regex(self._TAGS_RE, meta_html, 'tags').split(', '),
-        }
+        })
 
     def _real_extract(self, url):
         type, id = self._match_valid_url(url).groups()
@@ -535,21 +588,11 @@ class PromoDJEmbedIE(PromoDJBaseIE):
         'only_matching': True,
     }]
 
-    def _get_full_url(self, media_data, id):
-        if player_error := media_data.get('player_error'):
-            raise ExtractorError(player_error, expected=True)
-
-        if media_data.get('video'):
-            video_config = self._parse_json(media_data['config'], id)
-            video = traverse_obj(video_config, ('playlist', 'item', 0))
-            return traverse_obj(video, ('title', '@ico_url'))
-        else:
-            return media_data.get('titleURL')
-
     def _real_extract(self, url):
         id = self._match_id(url)
-        url = self._get_full_url(self._fetch_media_data([id], id)[0], id)
-        return self.url_result(url, PromoDJIE, id)
+        metadata = self._parse_media_data(
+            self._fetch_media_data([id], id)[0], id)
+        return self.url_result(metadata['webpage_url'], PromoDJIE, id)
 
 
 class PromoDJShortIE(PromoDJBaseIE):
@@ -562,7 +605,11 @@ class PromoDJShortIE(PromoDJBaseIE):
     def _real_extract(self, url):
         id = self._match_id(url)
         html = self._download_webpage(url, id)
-        return self.url_result(self._og_search_url(html), PromoDJIE, id)
+        try:
+            url = self._og_search_url(html)
+        except Exception:
+            raise ExtractorError('Unable to extract full URL')
+        return self.url_result(url, PromoDJIE, id)
 
 
 class PromoDJRadioIE(PromoDJBaseIE):

From 5b148c0f796fa2d2d0971574235250e9ebe61f1b Mon Sep 17 00:00:00 2001
From: DmitryScaletta <dmitryscaletta@mail.ru>
Date: Wed, 14 Feb 2024 17:55:59 +0300
Subject: [PATCH 07/21] [PromoDJ] Improve video formats, update tests

---
 yt_dlp/extractor/promodj.py | 259 +++++++++++++++++++++++++++++-------
 1 file changed, 212 insertions(+), 47 deletions(-)

diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py
index 937fa56c9..9a79399fa 100644
--- a/yt_dlp/extractor/promodj.py
+++ b/yt_dlp/extractor/promodj.py
@@ -11,7 +11,6 @@ from ..utils import (
     dict_get,
     extract_attributes,
     ExtractorError,
-    float_or_none,
     get_element_by_class,
     get_elements_html_by_class,
     int_or_none,
@@ -126,7 +125,7 @@ class PromoDJBaseIE(InfoExtractor):
         data = {}
         for i, id in enumerate(ids):
             data[f'multi[{i}][method]'] = 'players/config'
-            data[f'multi[{i}][params][kind]'] = 'standalone.big'
+            data[f'multi[{i}][params][kind]'] = 'cover.big'
             data[f'multi[{i}][params][fileID]'] = id
         return self._download_json(
             'https://promodj.com/api/multi.json', video_id, data=urlencode_postdata(data),
@@ -140,7 +139,7 @@ class PromoDJBaseIE(InfoExtractor):
             video = traverse_obj(
                 self._parse_json(media_data['config'], id), ('playlist', 'item', 0))
             formats = [{
-                'url': traverse_obj(video, ('play', '@url', {url_or_none})),
+                'url': traverse_obj(video, ('play', '@url')).replace('?returnurl=1', ''),
                 **traverse_obj(media_data, {
                     'width': ('width', {int_or_none}),
                     'height': ('height', {int_or_none}),
@@ -163,10 +162,12 @@ class PromoDJBaseIE(InfoExtractor):
             'url': ('URL', {url_or_none}),
             'size': ('size', {int_or_none}),
         }) for source in traverse_obj(media_data, ('sources'))]
+        thumbnails = [{'url': url} for url in traverse_obj(media_data, ('coverURL', ('600', '1200', '2000'))) if url_or_none]
         return {
             'id': id,
             'title': clean_html(dict_get(media_data, ('title_html', 'title'))),
             'formats': formats,
+            'thumbnails': thumbnails,
             'webpage_url': traverse_obj(media_data, ('titleURL', {url_or_none}))
         }
 
@@ -416,60 +417,216 @@ class PromoDJIE(PromoDJBaseIE):
     _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/{PromoDJBaseIE._LOGIN_RE}/(?P<type>{PromoDJBaseIE._MEDIA_TYPES_RE})/(?P<id>\d+)(?:/\w+)?',
     _TESTS = [{
         'url': 'https://promodj.com/antonpavlovsky/remixes/6259208/David_Usher_Black_Black_Heart_Anton_Pavlovsky_Cover',
-        'only_matching': True,
+        'info_dict': {
+            'id': '6259208',
+            'ext': 'mp3',
+            'title': 'David Usher - Black Black Heart (Anton Pavlovsky Cover)',
+            'tags': ['Lounge', 'Deep House'],
+            'upload_date': '20170323',
+            'timestamp': 1490258400.0,
+            'duration': 173.0,
+            'size': 7654604,
+            'view_count': int,
+        },
     }, {
         'url': 'https://promodj.com/j-factory/samples/7560171/Amedici_BW1_Intro',
-        'only_matching': True,
+        'info_dict': {
+            'id': '7560171',
+            'ext': 'mp3',
+            'title': 'Amedici - BW1 - Intro',
+            'tags': ['Multitrack master', 'Fx'],
+            'upload_date': '20240212',
+            'timestamp': 1707748800.0,
+            'duration': 21.0,
+            'size': 838041,
+            'view_count': int,
+        },
     }, {
         # music: no download links in html
         'url': 'https://promodj.com/gluk/tracks/4713922/DJ_Glyuk_Folk_ing_DJ_Steven_Smile_Remix_2005',
-        'only_matching': True,
+        'info_dict': {
+            'id': '4713922',
+            'ext': 'mp3',
+            'title': 'DJ Глюк - Folk\'ing [DJ Steven Smile Remix] (2005)',
+            'tags': ['Pumping House', 'Hard House'],
+            'upload_date': '20140404',
+            'timestamp': 1396605480.0,
+            'duration': 299.0,
+            'size': 12058624,
+            'view_count': int,
+        },
     }, {
         # video: no download link in html
         'url': 'https://promodj.com/psywanderer/videos/7559147/Chu_de_sa',
-        'only_matching': True,
+        'info_dict': {
+            'id': '7559147',
+            'ext': 'mp4',
+            'title': 'Чу де са',
+            'tags': ['Jazz-Rap', 'Jazzstep'],
+            'thumbnail': r're:^https?://',
+            'upload_date': '20240210',
+            'timestamp': 1707533820.0,
+            'duration': 388720,
+            'view_count': int,
+            'channel': 'PsyWanderer',
+            'channel_url': 'https://promodj.com/psywanderer',
+        },
     }, {
-        # no player
+        # no player (external link)
         'url': 'https://promodj.com/gluk/tracks/420310/IMpulse_Zakat',
-        'only_matching': True,
+        'info_dict': {
+            'id': '420310',
+            'ext': 'mp3',
+            'title': 'IMpulse - Закат',
+            'tags': ['House', 'Electro House'],
+            'thumbnail': r're:^https?://',
+            'upload_date': '20081024',
+            'timestamp': 1224846120.0,
+            'duration': 133.0,
+            'size': 1048576,
+            'view_count': int,
+        },
+        'params': {
+            'skip_download': 'Link is broken',
+        },
     }, {
         # without slug
         'url': 'https://promodj.com/djlykov/tracks/7551590',
-        'only_matching': True,
+        'info_dict': {
+            'id': '7551590',
+            'ext': 'mp3',
+            'title': 'Lykov - Benjamin (Radio Edit) [MOUSE-P]',
+            'tags': ['Dance Pop', 'Eurodance'],
+            'upload_date': '20240122',
+            'timestamp': 1705919280.0,
+            'duration': 233.0,
+            'size': 9332326,
+            'view_count': int,
+        },
     }, {
         # lossless wav
         'url': 'https://promodj.com/modi-glu/tracks/6081339/Modi_Glyu_Anabel',
-        'only_matching': True,
+        'info_dict': {
+            'id': '6081339',
+            'ext': 'wav',
+            'title': 'Моди Глю " Анабель"',
+            'tags': ['Chillout', 'Downtempo'],
+            'upload_date': '20161029',
+            'timestamp': 1477767780.0,
+            'duration': 236.0,
+            'size': 42257612,
+            'view_count': int,
+        },
     }, {
         # lossless flac
         'url': 'https://promodj.com/sashaorbeat/mixes/7422493/Sasha_Orbeat_Pure_Love_3',
-        'only_matching': True,
+        'info_dict': {
+            'id': '7422493',
+            'ext': 'flac',
+            'title': 'Sasha Orbeat — Pure Love 3',
+            'tags': ['Lo-Fi', 'Downtempo'],
+            'upload_date': '20230213',
+            'timestamp': 1676306160.0,
+            'duration': 3631.0,
+            'size': 685139558,
+            'view_count': int,
+        },
     }, {
         # paid lossless
         'url': 'https://promodj.com/boyko/tracks/1435682/Dj_Boyko_Katy_Queen_Nad_Oblakami',
-        'only_matching': True,
+        'info_dict': {
+            'id': '1435682',
+            'ext': 'mp3',
+            'title': 'Dj Boyko & Katy Queen - Над Облаками',
+            'tags': ['House', 'Trance'],
+            'upload_date': '20100404',
+            'timestamp': 1270376700.0,
+            'duration': 321.0,
+            'size': 56623104,
+            'view_count': int,
+        },
     }, {
         # paid lossy
         'url': 'https://promodj.com/tesla/tracks/342938/Library_Of_Bugs',
-        'only_matching': True,
-    }, {
-        'url': 'https://promodj.com/sergeyfedotov306/videos/7457627/V_Matrice_Sboy',
-        'only_matching': True,
+        'info_dict': {
+            'id': '342938',
+            'ext': 'mp3',
+            'title': 'Library Of Bugs',
+            'tags': ['Minimal Techno', 'Tech House'],
+            'upload_date': '20080827',
+            'timestamp': 1219841220.0,
+            'duration': 64.0,
+            'size': 2097152,
+            'view_count': int,
+        },
     }, {
+        # mp4
         'url': 'https://promodj.com/djperetse/videos/5868236/Fatalist_Project_feat_DJ_Peretse_Den_pobedi_Videoklip',
-        'only_matching': True,
+        'info_dict': {
+            'id': '5868236',
+            'ext': 'mp4',
+            'title': 'Fatalist Project feat. DJ Peretse - День победы (Видеоклип)',
+            'tags': ['House', 'Progressive House'],
+            'thumbnail': r're:^https?://',
+            'upload_date': '20160505',
+            'timestamp': 1462419720.0,
+            'duration': 265045,
+            'size': 165465292,
+            'view_count': int,
+            'channel': 'DJ Peretse',
+            'channel_url': 'https://promodj.com/djperetse',
+        },
     }, {
         # avi
         'url': 'https://promodj.com/djmikis/videos/5311597/Mikis_Live_SDJ_Show',
-        'only_matching': True,
+        'info_dict': {
+            'id': '5311597',
+            'ext': 'avi',
+            'title': 'Mikis Live @ SDJ Show',
+            'tags': ['Club House'],
+            'thumbnail': r're:^https?://',
+            'upload_date': '20150409',
+            'timestamp': 1428579840.0,
+            'duration': 1716240,
+            'size': 371195904,
+            'view_count': int,
+            'channel': 'MIKIS',
+            'channel_url': 'https://promodj.com/djmikis',
+        },
     }, {
         # asf
         'url': 'https://promodj.com/gigsiphonic/videos/7559341/Gigsiphonic_PODCAST_309_Extended_video_version',
-        'only_matching': True,
+        'info_dict': {
+            'id': '7559341',
+            'ext': 'asf',
+            'title': 'Gigsiphonic - PODCAST 309 (Extended video version)',
+            'tags': ['Synthwave', 'Synth-Pop'],
+            'thumbnail': r're:^https?://',
+            'upload_date': '20240210',
+            'timestamp': 1707580080.0,
+            'duration': 4309200,
+            'size': 3715146711,
+            'view_count': int,
+            'channel': 'Gigsiphonic',
+            'channel_url': 'https://promodj.com/gigsiphonic',
+        },
     }, {
         # not valid html
         'url': 'https://promodj.com/martin.sehnal/videos/7555841/Martin_Sehnal_CII_33_Plus_CII_32_Clothes_on_the_peg_2_020_2_024_02_01th',
-        'only_matching': True,
+        'info_dict': {
+            'id': '7555841',
+            'ext': 'avi',
+            'title': 'Martin Sehnal - CII 33 ( Plus CII 32 ) Clothes on the peg 2 020 ( 2 024 02. 01th ) )',
+            'tags': ['Easy Listening', 'Drum & Bass'],
+            'thumbnail': r're:^https?://',
+            'upload_date': '20240201',
+            'timestamp': 1706827560.0,
+            'duration': 30000,
+            'size': 2340757176,
+            'view_count': int,
+            'channel_url': 'https://promodj.com/martin.sehnal',
+            'channel': 'Martin Sehnal',
+        },
     }]
 
     _IS_PAID_RE = r'<b>Цена:</b>'
@@ -514,16 +671,19 @@ class PromoDJIE(PromoDJBaseIE):
         size, size_unit = raw_size
         return int(float(size) * pow(1024, RU_SIZE_UNITS.index(size_unit)))
 
-    def _parse_media(self, html, id, type):
-        # videos always have one format
-        # audios can have one or two formats
+    # music: always have lossy format (mp3), sometimes have lossless (wav or flac) format
+    # video: sometimes have source format (mp4, avi, asf), always have converted for web format (mp4)
+    def _real_extract(self, url):
+        type, id = self._match_valid_url(url).groups()
+        html = self._download_webpage(url, id)
 
-        # always returns only one format
-        # if audio has two formats, returns only lossy
+        # always returns only one format: lossy mp3 for music or converted mp4 for video
         media_data = self._search_json(
             '', html, 'media data', id,
             contains_pattern=self._VIDEO_DATA_REGEX if type == 'videos' else self._MUSIC_DATA_REGEX,
-            transform_source=js_to_json)
+            transform_source=js_to_json, fatal=False, default=None)
+        if not media_data:
+            media_data = self._fetch_media_data([id], id)[0]
         metadata = self._parse_media_data(media_data, id)
 
         # html can be invalid
@@ -532,25 +692,35 @@ class PromoDJIE(PromoDJBaseIE):
         except Exception:
             meta_html = html
 
-        # returns one or two formats but sometimes without download links
-        # best quality always comes first
+        # music: lossy format or lossless and lossy formats
+        # video: source format
+        # download links can be missing
+        # best quality format always comes first
         formats_from_html = re.findall(self._FORMATS_RE, meta_html)
         is_paid = re.search(self._IS_PAID_RE, meta_html)
-        bitrate_key = 'tbr' if type == 'videos' else 'abr'
-        for i, match in enumerate(formats_from_html):
-            url, _, bitrate = match
-            is_last = i == len(formats_from_html) - 1
-            if is_last:
-                metadata['formats'][0][bitrate_key] = int(bitrate)
-            elif url_or_none(url) and not is_paid:
-                metadata['formats'].append({
-                    'url': url,
-                    bitrate_key: int(bitrate),
-                })
-
         # size field describes best quality
         size = self._parse_ru_size(re.search(self._SIZE_RE, meta_html).groups())
-        metadata['formats'][-1]['size'] = size
+        if type == 'videos':
+            for url, _, bitrate in formats_from_html:
+                if url_or_none(url):
+                    metadata['formats'].append({
+                        'url': url,
+                        'tbr': int(bitrate),
+                        'size': size,
+                        'quality': 1,
+                    })
+        else:
+            for i, match in enumerate(formats_from_html):
+                url, _, bitrate = match
+                is_last = i == len(formats_from_html) - 1
+                if is_last:
+                    metadata['formats'][0]['abr'] = int(bitrate)
+                elif url_or_none(url) and not is_paid:
+                    metadata['formats'].append({
+                        'url': url,
+                        'abr': int(bitrate),
+                    })
+            metadata['formats'][-1]['size'] = size
 
         return merge_dicts(metadata, {
             'title': clean_html(get_element_by_class('file_title', html)),
@@ -560,11 +730,6 @@ class PromoDJIE(PromoDJBaseIE):
             'tags': self._html_search_regex(self._TAGS_RE, meta_html, 'tags').split(', '),
         })
 
-    def _real_extract(self, url):
-        type, id = self._match_valid_url(url).groups()
-        html = self._download_webpage(url, id)
-        return self._parse_media(html, id, type)
-
 
 class PromoDJEmbedIE(PromoDJBaseIE):
     _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/embed/(?P<id>\d+)/(?P<type>cover|big)'

From 0c8466572fff01e9ec87fe4189d64d351314f8b9 Mon Sep 17 00:00:00 2001
From: DmitryScaletta <dmitryscaletta@mail.ru>
Date: Thu, 15 Feb 2024 00:53:12 +0300
Subject: [PATCH 08/21] [PromoDJ] Add more tests

---
 yt_dlp/extractor/promodj.py | 306 +++++++++++++++++++++++++++++-------
 1 file changed, 246 insertions(+), 60 deletions(-)

diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py
index 9a79399fa..fecf30589 100644
--- a/yt_dlp/extractor/promodj.py
+++ b/yt_dlp/extractor/promodj.py
@@ -81,11 +81,11 @@ class PromoDJBaseIE(InfoExtractor):
         if self._get_current_page(html) != page + 1:
             return
 
-        for a in get_elements_html_by_class('player_standard_tool__play', html):
+        for a in get_elements_html_by_class('player_standard_tool__comments', html):
             url = traverse_obj(extract_attributes(a), ('href', {url_or_none}))
             if not url:
                 continue
-            url = url.replace('?play=1', '')
+            url = url.replace('#comments', '')
             is_video = '/videos/' in url
             if is_video and 'video' in allowed_media_cats or not is_video and 'music' in allowed_media_cats:
                 yield self.url_result(url, PromoDJIE)
@@ -139,6 +139,7 @@ class PromoDJBaseIE(InfoExtractor):
             video = traverse_obj(
                 self._parse_json(media_data['config'], id), ('playlist', 'item', 0))
             formats = [{
+                'format_id': 'web',
                 'url': traverse_obj(video, ('play', '@url')).replace('?returnurl=1', ''),
                 **traverse_obj(media_data, {
                     'width': ('width', {int_or_none}),
@@ -162,7 +163,9 @@ class PromoDJBaseIE(InfoExtractor):
             'url': ('URL', {url_or_none}),
             'size': ('size', {int_or_none}),
         }) for source in traverse_obj(media_data, ('sources'))]
-        thumbnails = [{'url': url} for url in traverse_obj(media_data, ('coverURL', ('600', '1200', '2000'))) if url_or_none]
+        thumbnails = [{
+            'url': url,
+        } for url in traverse_obj(media_data, ('coverURL', ('600', '1200', '2000'))) if url_or_none(url)]
         return {
             'id': id,
             'title': clean_html(dict_get(media_data, ('title_html', 'title'))),
@@ -178,11 +181,13 @@ class PromoDJPageIE(PromoDJBaseIE):
     _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<id>{_PAGES_RE})'
     _TESTS = [{
         'url': 'https://promodj.com/featured',
-        'only_matching': True,
-    }, {
-        # second page
-        'url': 'https://promodj.com/featured/rap?download=1&page=2',
-        'only_matching': True,
+        'info_dict': {
+            'id': 'featured',
+        },
+        'playlist_count': 40,
+        'params': {
+            'playlistend': 40,
+        },
     }, {
         # filtered
         'url': 'https://promodj.com/remixes?top=1',
@@ -196,9 +201,25 @@ class PromoDJPageIE(PromoDJBaseIE):
         'url': 'https://promodj.com/mixes?kind=mixes&styleID=&searchfor=dance',
         'only_matching': True,
     }, {
-        # no download button
+        # shop
         'url': 'https://promodj.com/shop',
-        'only_matching': True,
+        'info_dict': {
+            'id': 'shop',
+        },
+        'playlist_count': 20,
+        'params': {
+            'playlistend': 20,
+        },
+    }, {
+        # videos
+        'url': 'https://promodj.com/videos',
+        'info_dict': {
+            'id': 'videos',
+        },
+        'playlist_count': 20,
+        'params': {
+            'playlistend': 20,
+        },
     }]
 
     _PAGE_SIZE = 20
@@ -215,11 +236,25 @@ class PromoDJPageIE(PromoDJBaseIE):
 class PromoDJUserIE(PromoDJBaseIE):
     _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})$'
     _TESTS = [{
-        'url': 'https://promodj.com/djperetse',
-        'only_matching': True,
-    }, {
         'url': 'https://promodj.com/dj-trojan',
-        'only_matching': True,
+        'info_dict': {
+            'id': 'dj-trojan',
+        },
+        'playlist_mincount': 89,
+    }, {
+        # with default video playlist
+        'url': 'https://promodj.com/djperetse',
+        'info_dict': {
+            'id': 'djperetse',
+        },
+        'playlist_mincount': 15,
+    }, {
+        # without any playlists
+        'url': 'https://promodj.com/slim96',
+        'info_dict': {
+            'id': 'slim96',
+        },
+        'playlist_count': 0,
     }]
 
     def _real_extract(self, url):
@@ -236,15 +271,31 @@ class PromoDJUserIE(PromoDJBaseIE):
 class PromoDJUserMediaIE(PromoDJBaseIE):
     _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<type>music|video)$'
     _TESTS = [{
-        'url': 'https://promodj.com/feel/music',
-        'only_matching': True,
+        'url': 'https://promodj.com/worobyev/music',
+        'info_dict': {
+            'id': 'worobyev-music',
+        },
+        'playlist_mincount': 11,
     }, {
-        'url': 'https://promodj.com/djmikis/video',
-        'only_matching': True,
+        # no music
+        'url': 'https://promodj.com/xsev71/music',
+        'info_dict': {
+            'id': 'xsev71-music',
+        },
+        'playlist_count': 0,
     }, {
-        # a user without any videos
+        'url': 'https://promodj.com/cosmonaut/video',
+        'info_dict': {
+            'id': 'cosmonaut-video',
+        },
+        'playlist_mincount': 2,
+    }, {
+        # no video
         'url': 'https://promodj.com/worobyev/video',
-        'only_matching': True,
+        'info_dict': {
+            'id': 'worobyev-video',
+        },
+        'playlist_count': 0,
     }]
 
     def _real_extract(self, url):
@@ -264,19 +315,40 @@ class PromoDJUserPagesIE(PromoDJBaseIE):
     _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<type>pages|blog)$'
     _TESTS = [{
         'url': 'https://promodj.com/djperetse/pages',
-        'only_matching': True,
+        'info_dict': {
+            'id': 'djperetse-pages',
+        },
+        'playlist_count': 10,
     }, {
-        'url': 'https://promodj.com/golub/blog',
-        'only_matching': True,
+        # no pages
+        'url': 'https://promodj.com/djlosev/pages',
+        'info_dict': {
+            'id': 'djlosev-pages',
+        },
+        'playlist_count': 0,
+    }, {
+        'url': 'https://promodj.com/ivanroudyk/blog',
+        'info_dict': {
+            'id': 'ivanroudyk-blog',
+        },
+        'playlist_mincount': 37,
+    }, {
+        # no blog
+        'url': 'https://promodj.com/worobyev/blog',
+        'info_dict': {
+            'id': 'worobyev-blog',
+        },
+        'playlist_count': 0,
     }]
 
     _PAGE_SIZE = 10
 
     def _parse_pages(self, url, playlist_id):
         html = self._download_webpage(url, playlist_id)
-        content_html = get_element_by_class('dj_universal', get_element_by_class('dj_bblock', html))
-        for page_url, page_title in re.findall(r'<a href=\"([^\"]+)\">([^<]+)</a>', content_html):
-            yield self.url_result(page_url, PromoDJUserPageIE, video_title=page_title)
+        content_html = get_element_by_class('dj_content ', html)
+        if pages_html := get_element_by_class('dj_universal', content_html):
+            for page_url, page_title in re.findall(r'<a href=\"([^\"]+)\">([^<]+)</a>', pages_html):
+                yield self.url_result(page_url, PromoDJUserPageIE, video_title=page_title)
 
     def _fetch_blogs_page(self, url, playlist_id, page):
         page_url = self._set_url_page(url, page + 1)
@@ -318,7 +390,10 @@ class PromoDJUserPageIE(PromoDJBaseIE):
     _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<slug>{_USER_PAGE_RE})$'
     _TESTS = [{
         'url': 'https://promodj.com/djperetse/MaxMixes',
-        'only_matching': True,
+        'info_dict': {
+            'id': 'djperetse-MaxMixes',
+        },
+        'playlist_count': 5,
     }]
 
     def _real_extract(self, url):
@@ -331,24 +406,40 @@ class PromoDJUserPageIE(PromoDJBaseIE):
 
 
 class PromoDJBlogPageIE(PromoDJBaseIE):
-    _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/blog/(?P<id>\d+)(?:/(?P<slug>\w+))?'
+    _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/blog/(?P<id>\d+)(?:/\w+)?'
     _TESTS = [{
         # with small and big audio players and youtube video
         'url': 'https://promodj.com/golub/blog/1163895/DJ_Andrey_Golubev_To_Depeche_Mode_with_love_part_9_special_dj_edits_mix',
-        'only_matching': True,
+        'info_dict': {
+            'id': 'golub-blog-1163895',
+        },
+        'playlist_count': 13,
     }, {
         # with audio and video
         'url': 'https://promodj.com/svetmusic/blog/1101958/SVET_I_Like_It_Extra_Sound_Recordings',
-        'only_matching': True,
+        'info_dict': {
+            'id': 'svetmusic-blog-1101958',
+        },
+        'playlist_count': 5,
     }, {
         # without any media
         'url': 'https://promodj.com/svetmusic/blog/915878/DJ_SVET_pobeditel_konkursa_Burn_City_Sound',
-        'only_matching': True,
+        'info_dict': {
+            'id': 'svetmusic-blog-915878',
+        },
+        'playlist_count': 0,
+    }, {
+        # with deleted and blocked music
+        'url': 'https://promodj.com/djperetse/blog/1048739/DJ_Peretse_i_Coca_Cola_obyavlyayut_MEGAMIX_BATTLE_2015',
+        'info_dict': {
+            'id': 'djperetse-blog-1048739',
+        },
+        'playlist_count': 29,
     }]
 
     def _real_extract(self, url):
-        login, id, slug = self._match_valid_url(url).groups()
-        page_id = f'{login}-blog-{id}-{slug}'
+        login, id = self._match_valid_url(url).groups()
+        page_id = f'{login}-blog-{id}'
         html = self._download_webpage(url, page_id)
         content_html = get_element_by_class('post_body', html)
         return self.playlist_result(
@@ -361,29 +452,57 @@ class PromoDJPlaylistIE(PromoDJBaseIE):
         rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<type>groups)/(?P<id>\d+)(?:/(?P<slug>\w+))?',
     ]
     _TESTS = [{
-        # default playlist: tracks (audio)
+        # default playlist: music (with songs without player)
         'url': 'https://promodj.com/gluk/tracks',
-        'only_matching': True,
+        'info_dict': {
+            'id': 'gluk-tracks',
+        },
+        'playlist_mincount': 29,
+    }, {
+        # default playlist: with pagination
+        'url': 'https://promodj.com/gluk/mixes',
+        'info_dict': {
+            'id': 'gluk-mixes',
+        },
+        'playlist_count': 60,
+        'params': {
+            'playlistend': 60,
+        },
     }, {
         # default playlist: video
         'url': 'https://promodj.com/djperetse/videos',
-        'only_matching': True,
+        'info_dict': {
+            'id': 'djperetse-videos',
+        },
+        'playlist_mincount': 6,
     }, {
         # user playlist: audio
         'url': 'https://promodj.com/fonarev/groups/608158/Digital_Emotions_Night',
-        'only_matching': True,
+        'info_dict': {
+            'id': 'fonarev-groups-608158',
+        },
+        'playlist_mincount': 9,
     }, {
-        # two pages
+        # user playlist: with pagination
         'url': 'https://promodj.com/lavrov/groups/677132/VINYL',
-        'only_matching': True,
+        'info_dict': {
+            'id': 'lavrov-groups-677132',
+        },
+        'playlist_mincount': 33,
     }, {
         # user playlist: video
         'url': 'https://promodj.com/deeplecture/groups/672782/LAROCCA_TV',
-        'only_matching': True,
+        'info_dict': {
+            'id': 'deeplecture-groups-672782',
+        },
+        'playlist_mincount': 4,
     }, {
         # user playlist: audio and video
         'url': 'https://promodj.com/djperetse/groups/637358/Russkie_treki',
-        'only_matching': True,
+        'info_dict': {
+            'id': 'djperetse-groups-637358',
+        },
+        'playlist_mincount': 17,
     }, {
         # 900+ items
         'url': 'https://promodj.com/fonarev/groups/17350/Digital_Emotions_Podcast',
@@ -406,10 +525,12 @@ class PromoDJPlaylistIE(PromoDJBaseIE):
 
 
 class PromoDJMusicPlaylistIE(PromoDJPlaylistIE):
+    _VALID_URL = []
     _ALLOWED_MEDIA_CATS = ['music']
 
 
 class PromoDJVideoPlaylistIE(PromoDJPlaylistIE):
+    _VALID_URL = []
     _ALLOWED_MEDIA_CATS = ['video']
 
 
@@ -429,18 +550,17 @@ class PromoDJIE(PromoDJBaseIE):
             'view_count': int,
         },
     }, {
+        # samples type
         'url': 'https://promodj.com/j-factory/samples/7560171/Amedici_BW1_Intro',
-        'info_dict': {
-            'id': '7560171',
-            'ext': 'mp3',
-            'title': 'Amedici - BW1 - Intro',
-            'tags': ['Multitrack master', 'Fx'],
-            'upload_date': '20240212',
-            'timestamp': 1707748800.0,
-            'duration': 21.0,
-            'size': 838041,
-            'view_count': int,
-        },
+        'only_matching': True,
+    }, {
+        # acapellas type
+        'url': 'https://promodj.com/cosmonaut/acapellas/200970/Kosmonavt_golosovoe_ID',
+        'only_matching': True,
+    }, {
+        # realtones type
+        'url': 'https://promodj.com/plashstringer/realtones/965489/bomba_bomba',
+        'only_matching': True,
     }, {
         # music: no download links in html
         'url': 'https://promodj.com/gluk/tracks/4713922/DJ_Glyuk_Folk_ing_DJ_Steven_Smile_Remix_2005',
@@ -489,6 +609,10 @@ class PromoDJIE(PromoDJBaseIE):
         'params': {
             'skip_download': 'Link is broken',
         },
+    }, {
+        # no player (the link from html is broken but the link from API is ok)
+        'url': 'https://promodj.com/scratchin/remixes/374580/Katya_First_Perestala_DJ_Ivan_Scratchin_Mix',
+        'only_matching': True,
     }, {
         # without slug
         'url': 'https://promodj.com/djlykov/tracks/7551590',
@@ -632,7 +756,7 @@ class PromoDJIE(PromoDJBaseIE):
     _IS_PAID_RE = r'<b>Цена:</b>'
     # examples: MP3, 320 Кбит | MP4, 20157 Кбит | WAV, 1412 Кбит | AVI, 1731 Кбит | ASF, 6905 Кбит | FLAC, 1509 Кбит
     # https://regex101.com/r/2AuaxB/1
-    _FORMATS_RE = r'(?:<a\s+href=\"(?P<url>[^\"]+)\">)?\s*(?P<format>\w+), (?P<bitrate>\d+) Кбит'
+    _FORMATS_RE = r'(?:<a\s+href=\"(?P<url>[^\"]+)\">)?\s*\w+, (?P<bitrate>\d+) Кбит'
     _VIEW_COUNT_RE = r'<b>(?:Прослушиваний|Просмотров):</b>\s*(\d+)'
     # examples: 0:21 | 1:07 | 74:38
     _DURATION_RE = r'<b>Продолжительность:</b>\s*(\d+:\d{2})'
@@ -701,9 +825,10 @@ class PromoDJIE(PromoDJBaseIE):
         # size field describes best quality
         size = self._parse_ru_size(re.search(self._SIZE_RE, meta_html).groups())
         if type == 'videos':
-            for url, _, bitrate in formats_from_html:
+            for url, bitrate in formats_from_html:
                 if url_or_none(url):
                     metadata['formats'].append({
+                        'format_id': 'source',
                         'url': url,
                         'tbr': int(bitrate),
                         'size': size,
@@ -711,7 +836,7 @@ class PromoDJIE(PromoDJBaseIE):
                     })
         else:
             for i, match in enumerate(formats_from_html):
-                url, _, bitrate = match
+                url, bitrate = match
                 is_last = i == len(formats_from_html) - 1
                 if is_last:
                     metadata['formats'][0]['abr'] = int(bitrate)
@@ -726,7 +851,7 @@ class PromoDJIE(PromoDJBaseIE):
             'title': clean_html(get_element_by_class('file_title', html)),
             'view_count': int_or_none(self._search_regex(self._VIEW_COUNT_RE, meta_html, 'view_count', default=None)),
             'duration': parse_duration(self._search_regex(self._DURATION_RE, meta_html, 'duration')),
-            'timestamp': self._parse_ru_date(re.findall(self._TIMESTAMP_RE, meta_html)[0]),
+            'timestamp': self._parse_ru_date(re.search(self._TIMESTAMP_RE, meta_html).groups()),
             'tags': self._html_search_regex(self._TAGS_RE, meta_html, 'tags').split(', '),
         })
 
@@ -735,14 +860,47 @@ class PromoDJEmbedIE(PromoDJBaseIE):
     _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/embed/(?P<id>\d+)/(?P<type>cover|big)'
     _TESTS = [{
         'url': 'https://promodj.com/embed/7555440/cover',
-        'only_matching': True,
+        'info_dict': {
+            'id': '7555440',
+            'ext': 'mp3',
+            'title': 'Kolya Funk - Exclusive Mix (February 2024)',
+            'tags': ['House', 'Indie Dance'],
+            'upload_date': '20240131',
+            'timestamp': 1706738400.0,
+            'duration': 3697.0,
+            'size': 148478361,
+            'view_count': int,
+        },
     }, {
         'url': 'https://promodj.com/embed/7540163/big',
-        'only_matching': True,
+        'info_dict': {
+            'id': '7540163',
+            'ext': 'mp3',
+            'title': 'Khalif - Amore (Akif Pro Remix)',
+            'tags': ['Deep House', 'Slap House'],
+            'upload_date': '20231224',
+            'timestamp': 1703418600.0,
+            'duration': 157.0,
+            'size': 8178892,
+            'view_count': int,
+        },
     }, {
         # video (can be only big)
         'url': 'https://promodj.com/embed/3922099/big',
-        'only_matching': True,
+        'info_dict': {
+            'id': '3922099',
+            'ext': 'mp4',
+            'title': 'Will I Am & Britney Spears - Scream & Shout (DJ Nejtrino & DJ Stranger Remix) Video Full HD',
+            'tags': ['Club House', 'Vocal House'],
+            'thumbnail': r're:^https?://',
+            'upload_date': '20130211',
+            'timestamp': 1360583760.0,
+            'duration': 234560,
+            'size': 309644492,
+            'view_count': int,
+            'channel_url': 'https://promodj.com/dj-stranger',
+            'channel': 'DJ Stranger',
+        },
     }, {
         # blocked
         'url': 'https://promodj.com/embed/5586967/big',
@@ -763,8 +921,36 @@ class PromoDJEmbedIE(PromoDJBaseIE):
 class PromoDJShortIE(PromoDJBaseIE):
     _VALID_URL = r'https://pdj.cc/(?P<id>\w+)'
     _TESTS = [{
+        # music
         'url': 'https://pdj.cc/fv8VD',
-        'only_matching': True,
+        'info_dict': {
+            'id': '7422493',
+            'ext': 'flac',
+            'title': 'Sasha Orbeat — Pure Love 3',
+            'tags': ['Lo-Fi', 'Downtempo'],
+            'upload_date': '20230213',
+            'timestamp': 1676306160.0,
+            'duration': 3631.0,
+            'size': 685139558,
+            'view_count': int,
+        },
+    }, {
+        # video
+        'url': 'https://pdj.cc/fvcpX',
+        'info_dict': {
+            'id': '7435905',
+            'ext': 'mp4',
+            'title': 'JULIA - DEBRI FM (guest mix 18.03.23)',
+            'tags': ['Drum & Bass'],
+            'thumbnail': r're:^https?://',
+            'upload_date': '20230321',
+            'timestamp': 1679441100.0,
+            'duration': 2329640,
+            'size': 2952790016,
+            'view_count': int,
+            'channel': 'JULIA',
+            'channel_url': 'https://promodj.com/julia-breaks',
+        },
     }]
 
     def _real_extract(self, url):

From c820715205c9b8999e38c75c324a84eaff8eab84 Mon Sep 17 00:00:00 2001
From: DmitryScaletta <dmitryscaletta@mail.ru>
Date: Thu, 15 Feb 2024 11:53:12 +0300
Subject: [PATCH 09/21] [PromoDJ] Fix parse data and size functions

---
 yt_dlp/extractor/promodj.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py
index fecf30589..974cad7e2 100644
--- a/yt_dlp/extractor/promodj.py
+++ b/yt_dlp/extractor/promodj.py
@@ -761,7 +761,7 @@ class PromoDJIE(PromoDJBaseIE):
     # examples: 0:21 | 1:07 | 74:38
     _DURATION_RE = r'<b>Продолжительность:</b>\s*(\d+:\d{2})'
     # examples: 818.4 Кб | 12.9 Мб | 4 Гб | 1.76 Гб | 1001.5 Мб
-    _SIZE_RE = r'<b>Размер:</b>\s*(?P<size>\d+(?:\.\d+)?)\s*(?P<unit>Кб|Мб|Гб)'
+    _SIZE_RE = r'<b>Размер:</b>\s*(?P<size>\d+(?:\.\d+)?)\s*(?P<unit>Б|Кб|Мб|Гб|Тб)'
     # examples: сегодня 2:55 | вчера 23:17 | 1 июня 2016 3:46
     _TIMESTAMP_RE = r'<b>Публикация:</b>\s*(?P<day>вчера|сегодня|\d{1,2})(?: (?P<month>[а-я]+) (?P<year>\d{4}))?\s*(?P<hours>\d{1,2}):(?P<minutes>\d{2})'
     _TAGS_RE = r'<span\s+class=\"styles\">([^\n]+)</span>'
@@ -771,9 +771,8 @@ class PromoDJIE(PromoDJBaseIE):
     # https://regex101.com/r/b9utBf/1
     _VIDEO_DATA_REGEX = r'({\"video\":true,\"config\":[^\n]+)\);'
 
-    def _parse_ru_date(self, raw_date):
+    def _parse_ru_date(self, day, month, year, hours, minutes):
         RU_MONTHS = ['января', 'февраля', 'марта', 'апреля', 'мая', 'июня', 'июля', 'августа', 'сентября', 'октября', 'ноября', 'декабря']
-        day, month, year, hours, minutes = raw_date
         if day == 'сегодня':
             d = datetime.date.today()
             day = d.day
@@ -790,10 +789,9 @@ class PromoDJIE(PromoDJBaseIE):
             year = int(year)
         return datetime.datetime(year, month, day, int(hours), int(minutes)).timestamp()
 
-    def _parse_ru_size(self, raw_size):
-        RU_SIZE_UNITS = ['Б', 'Кб', 'Мб', 'Гб']
-        size, size_unit = raw_size
-        return int(float(size) * pow(1024, RU_SIZE_UNITS.index(size_unit)))
+    def _parse_ru_size(self, size, unit):
+        RU_SIZE_UNITS = ['Б', 'Кб', 'Мб', 'Гб', 'Тб']
+        return int(float(size) * pow(1024, RU_SIZE_UNITS.index(unit)))
 
     # music: always have lossy format (mp3), sometimes have lossless (wav or flac) format
     # video: sometimes have source format (mp4, avi, asf), always have converted for web format (mp4)
@@ -823,7 +821,7 @@ class PromoDJIE(PromoDJBaseIE):
         formats_from_html = re.findall(self._FORMATS_RE, meta_html)
         is_paid = re.search(self._IS_PAID_RE, meta_html)
         # size field describes best quality
-        size = self._parse_ru_size(re.search(self._SIZE_RE, meta_html).groups())
+        size = self._parse_ru_size(*re.search(self._SIZE_RE, meta_html).groups())
         if type == 'videos':
             for url, bitrate in formats_from_html:
                 if url_or_none(url):
@@ -851,7 +849,7 @@ class PromoDJIE(PromoDJBaseIE):
             'title': clean_html(get_element_by_class('file_title', html)),
             'view_count': int_or_none(self._search_regex(self._VIEW_COUNT_RE, meta_html, 'view_count', default=None)),
             'duration': parse_duration(self._search_regex(self._DURATION_RE, meta_html, 'duration')),
-            'timestamp': self._parse_ru_date(re.search(self._TIMESTAMP_RE, meta_html).groups()),
+            'timestamp': self._parse_ru_date(*re.search(self._TIMESTAMP_RE, meta_html).groups()),
             'tags': self._html_search_regex(self._TAGS_RE, meta_html, 'tags').split(', '),
         })
 

From c837d90e126e7e8b2a6f03221a6163d5f3e7a03d Mon Sep 17 00:00:00 2001
From: DmitryScaletta <dmitryscaletta@mail.ru>
Date: Thu, 15 Feb 2024 12:21:53 +0300
Subject: [PATCH 10/21] [PromoDJ] Add support for user's best media playlist

---
 yt_dlp/extractor/promodj.py | 30 +++++++++++++++++++++++-------
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py
index 974cad7e2..8d5a14c49 100644
--- a/yt_dlp/extractor/promodj.py
+++ b/yt_dlp/extractor/promodj.py
@@ -114,10 +114,6 @@ class PromoDJBaseIE(InfoExtractor):
             if YoutubeIE.suitable(iframe_url):
                 yield self.url_result(iframe_url, YoutubeIE)
 
-    def _get_playlist_page_size(self, url):
-        is_default_playlist = '/groups/' not in url
-        return 30 if is_default_playlist else 20
-
     def _get_current_page(self, html):
         return int(clean_html(get_element_by_class('NavigatorCurrentPage', html)) or '1')
 
@@ -382,6 +378,7 @@ class PromoDJUserPageIE(PromoDJBaseIE):
         'blog',
         'feedback',
         'contact',
+        'uenno',
         *PromoDJBaseIE._MEDIA_TYPES,
     ]
     _NOT_USER_PAGE_RE = '|'.join(_USER_PAGES)
@@ -447,8 +444,11 @@ class PromoDJBlogPageIE(PromoDJBaseIE):
 
 
 class PromoDJPlaylistIE(PromoDJBaseIE):
+    _PLAYLIST_TYPES = ['uenno', *PromoDJBaseIE._MEDIA_TYPES]
+    _PLAYLIST_TYPES_RE = '|'.join(_PLAYLIST_TYPES)
+
     _VALID_URL = [
-        rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<type>{PromoDJBaseIE._MEDIA_TYPES_RE})$',
+        rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<type>{_PLAYLIST_TYPES_RE})$',
         rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<type>groups)/(?P<id>\d+)(?:/(?P<slug>\w+))?',
     ]
     _TESTS = [{
@@ -507,20 +507,36 @@ class PromoDJPlaylistIE(PromoDJBaseIE):
         # 900+ items
         'url': 'https://promodj.com/fonarev/groups/17350/Digital_Emotions_Podcast',
         'only_matching': True,
+    }, {
+        # user's best music and video
+        'url': 'https://promodj.com/djbaribyn/uenno',
+        'info_dict': {
+            'id': 'djbaribyn-uenno',
+        },
+        'playlist_count': 15,
+        'params': {
+            'playlistend': 15,
+        }
     }]
 
     _ALLOWED_MEDIA_CATS = ['music', 'video']
 
+    def _get_page_size(self, url):
+        if '/uenno' in url:
+            return 15
+        if '/groups/' in url:
+            return 30
+        return 20
+
     def _real_extract(self, url):
         match = self._match_valid_url(url)
         login = match.group('login')
         type = match.group('type')
         playlist_id = f'{login}-{type}' if len(match.groups()) == 2 else f'{login}-{type}-{match.group("id")}'
-        page_size = self._get_playlist_page_size(url)
 
         entries = OnDemandPagedList(
             functools.partial(self._fetch_page, url, self._ALLOWED_MEDIA_CATS, playlist_id),
-            page_size)
+            self._get_page_size(url))
         return self.playlist_result(entries, playlist_id=playlist_id)
 
 

From e6f3e6de0e152640771055618f46a4aa609ca7e1 Mon Sep 17 00:00:00 2001
From: DmitryScaletta <dmitryscaletta@mail.ru>
Date: Thu, 15 Feb 2024 12:34:50 +0300
Subject: [PATCH 11/21] [PromoDJ] Fix paid music metadata

---
 yt_dlp/extractor/promodj.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py
index 8d5a14c49..4f29a4347 100644
--- a/yt_dlp/extractor/promodj.py
+++ b/yt_dlp/extractor/promodj.py
@@ -769,7 +769,6 @@ class PromoDJIE(PromoDJBaseIE):
         },
     }]
 
-    _IS_PAID_RE = r'<b>Цена:</b>'
     # examples: MP3, 320 Кбит | MP4, 20157 Кбит | WAV, 1412 Кбит | AVI, 1731 Кбит | ASF, 6905 Кбит | FLAC, 1509 Кбит
     # https://regex101.com/r/2AuaxB/1
     _FORMATS_RE = r'(?:<a\s+href=\"(?P<url>[^\"]+)\">)?\s*\w+, (?P<bitrate>\d+) Кбит'
@@ -835,7 +834,7 @@ class PromoDJIE(PromoDJBaseIE):
         # download links can be missing
         # best quality format always comes first
         formats_from_html = re.findall(self._FORMATS_RE, meta_html)
-        is_paid = re.search(self._IS_PAID_RE, meta_html)
+        is_paid = '<b>Цена:</b>' in meta_html
         # size field describes best quality
         size = self._parse_ru_size(*re.search(self._SIZE_RE, meta_html).groups())
         if type == 'videos':
@@ -848,13 +847,13 @@ class PromoDJIE(PromoDJBaseIE):
                         'size': size,
                         'quality': 1,
                     })
-        else:
+        elif not is_paid:
             for i, match in enumerate(formats_from_html):
                 url, bitrate = match
                 is_last = i == len(formats_from_html) - 1
                 if is_last:
                     metadata['formats'][0]['abr'] = int(bitrate)
-                elif url_or_none(url) and not is_paid:
+                elif url_or_none(url):
                     metadata['formats'].append({
                         'url': url,
                         'abr': int(bitrate),

From 7e96492ba0dd902261c3e308a6c6d8d129c80af8 Mon Sep 17 00:00:00 2001
From: DmitryScaletta <dmitryscaletta@mail.ru>
Date: Thu, 15 Feb 2024 12:39:46 +0300
Subject: [PATCH 12/21] [PromoDJ] Fix page size for playlists

---
 yt_dlp/extractor/promodj.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py
index 4f29a4347..cdf33741f 100644
--- a/yt_dlp/extractor/promodj.py
+++ b/yt_dlp/extractor/promodj.py
@@ -521,10 +521,10 @@ class PromoDJPlaylistIE(PromoDJBaseIE):
 
     _ALLOWED_MEDIA_CATS = ['music', 'video']
 
-    def _get_page_size(self, url):
-        if '/uenno' in url:
+    def _get_page_size(self, type):
+        if type == 'uenno':
             return 15
-        if '/groups/' in url:
+        if type == 'groups':
             return 30
         return 20
 
@@ -536,7 +536,7 @@ class PromoDJPlaylistIE(PromoDJBaseIE):
 
         entries = OnDemandPagedList(
             functools.partial(self._fetch_page, url, self._ALLOWED_MEDIA_CATS, playlist_id),
-            self._get_page_size(url))
+            self._get_page_size(type))
         return self.playlist_result(entries, playlist_id=playlist_id)
 
 

From 1b3c186424df4f3acfa89cf9685768d5d150e088 Mon Sep 17 00:00:00 2001
From: DmitryScaletta <dmitryscaletta@mail.ru>
Date: Thu, 15 Feb 2024 12:41:29 +0300
Subject: [PATCH 13/21] [PromoDJ] Fix page size for playlists

---
 yt_dlp/extractor/promodj.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py
index cdf33741f..dae880352 100644
--- a/yt_dlp/extractor/promodj.py
+++ b/yt_dlp/extractor/promodj.py
@@ -525,8 +525,8 @@ class PromoDJPlaylistIE(PromoDJBaseIE):
         if type == 'uenno':
             return 15
         if type == 'groups':
-            return 30
-        return 20
+            return 20
+        return 30
 
     def _real_extract(self, url):
         match = self._match_valid_url(url)

From 99dec4d6ed065ba8da0ac2a7533d72f1b8759b6b Mon Sep 17 00:00:00 2001
From: DmitryScaletta <dmitryscaletta@mail.ru>
Date: Thu, 15 Feb 2024 13:48:13 +0300
Subject: [PATCH 14/21] [PromoDJ] Add music format ids

---
 yt_dlp/extractor/promodj.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py
index dae880352..0c61b039f 100644
--- a/yt_dlp/extractor/promodj.py
+++ b/yt_dlp/extractor/promodj.py
@@ -155,10 +155,11 @@ class PromoDJBaseIE(InfoExtractor):
                 })
             }
 
-        formats = [traverse_obj(source, {
-            'url': ('URL', {url_or_none}),
-            'size': ('size', {int_or_none}),
-        }) for source in traverse_obj(media_data, ('sources'))]
+        formats = [{
+            'format_id': 'lossy',
+            'url': traverse_obj(source, ('URL', {url_or_none})),
+            'size': traverse_obj(source, ('size', {int_or_none})),
+        } for source in traverse_obj(media_data, ('sources'))]
         thumbnails = [{
             'url': url,
         } for url in traverse_obj(media_data, ('coverURL', ('600', '1200', '2000'))) if url_or_none(url)]
@@ -855,6 +856,7 @@ class PromoDJIE(PromoDJBaseIE):
                     metadata['formats'][0]['abr'] = int(bitrate)
                 elif url_or_none(url):
                     metadata['formats'].append({
+                        'format_id': 'lossless',
                         'url': url,
                         'abr': int(bitrate),
                     })

From 900bc5f708199d3c4bf5190d156942d686e935ed Mon Sep 17 00:00:00 2001
From: DmitryScaletta <dmitryscaletta@mail.ru>
Date: Thu, 15 Feb 2024 21:32:19 +0300
Subject: [PATCH 15/21] [PromoDJ] Refactor fetch_media_data and regexes

---
 yt_dlp/extractor/promodj.py | 60 ++++++++++++++++++-------------------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py
index 0c61b039f..e629efb01 100644
--- a/yt_dlp/extractor/promodj.py
+++ b/yt_dlp/extractor/promodj.py
@@ -65,9 +65,8 @@ class PromoDJBaseIE(InfoExtractor):
     _PAGES = ['featured', 'shop', *_MEDIA_TYPES]
 
     _BASE_URL_RE = r'https?://(?:www\.)?promodj\.com'
-    _MEDIA_TYPES_RE = '|'.join(_MEDIA_TYPES)
-    _NOT_PAGE_RE = '|'.join(['radio', *_PAGES])
-    _LOGIN_RE = rf'(?:(?!{_NOT_PAGE_RE}).)[\w.-]+'
+    _NOT_LOGIN_LIST = '|'.join(['radio', *_PAGES])
+    _LOGIN_RE = rf'(?:(?!{_NOT_LOGIN_LIST}).)[\w.-]+'
 
     def _set_url_page(self, url, page):
         parsed_url = urllib.parse.urlparse(url)
@@ -117,15 +116,15 @@ class PromoDJBaseIE(InfoExtractor):
     def _get_current_page(self, html):
         return int(clean_html(get_element_by_class('NavigatorCurrentPage', html)) or '1')
 
-    def _fetch_media_data(self, ids, video_id):
-        data = {}
-        for i, id in enumerate(ids):
-            data[f'multi[{i}][method]'] = 'players/config'
-            data[f'multi[{i}][params][kind]'] = 'cover.big'
-            data[f'multi[{i}][params][fileID]'] = id
+    def _fetch_media_data(self, id):
+        data = {
+            'multi[0][method]': 'players/config',
+            'multi[0][params][kind]': 'cover.big',
+            'multi[0][params][fileID]': id,
+        }
         return self._download_json(
-            'https://promodj.com/api/multi.json', video_id, data=urlencode_postdata(data),
-            headers={'Content-Type': 'application/x-www-form-urlencoded'})
+            'https://promodj.com/api/multi.json', id, data=urlencode_postdata(data),
+            headers={'Content-Type': 'application/x-www-form-urlencoded'})[0]
 
     def _parse_media_data(self, media_data, id):
         if player_error := media_data.get('player_error'):
@@ -173,9 +172,9 @@ class PromoDJBaseIE(InfoExtractor):
 
 
 class PromoDJPageIE(PromoDJBaseIE):
-    _PAGES_RE = '|'.join(PromoDJBaseIE._PAGES)
+    _PAGES_LIST = '|'.join(PromoDJBaseIE._PAGES)
 
-    _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<id>{_PAGES_RE})'
+    _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<id>{_PAGES_LIST})'
     _TESTS = [{
         'url': 'https://promodj.com/featured',
         'info_dict': {
@@ -370,7 +369,7 @@ class PromoDJUserPagesIE(PromoDJBaseIE):
 
 
 class PromoDJUserPageIE(PromoDJBaseIE):
-    _USER_PAGES = [
+    _USER_PATHS = [
         'pages',
         'music',
         'video',
@@ -382,8 +381,8 @@ class PromoDJUserPageIE(PromoDJBaseIE):
         'uenno',
         *PromoDJBaseIE._MEDIA_TYPES,
     ]
-    _NOT_USER_PAGE_RE = '|'.join(_USER_PAGES)
-    _USER_PAGE_RE = rf'(?:(?!{_NOT_USER_PAGE_RE}).)[\w-]+'
+    _NOT_USER_PAGE_LIST = '|'.join(_USER_PATHS)
+    _USER_PAGE_RE = rf'(?:(?!{_NOT_USER_PAGE_LIST}).)[\w-]+'
 
     _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<slug>{_USER_PAGE_RE})$'
     _TESTS = [{
@@ -445,12 +444,11 @@ class PromoDJBlogPageIE(PromoDJBaseIE):
 
 
 class PromoDJPlaylistIE(PromoDJBaseIE):
-    _PLAYLIST_TYPES = ['uenno', *PromoDJBaseIE._MEDIA_TYPES]
-    _PLAYLIST_TYPES_RE = '|'.join(_PLAYLIST_TYPES)
+    _PLAYLIST_TYPES_LIST = '|'.join(['uenno', *PromoDJBaseIE._MEDIA_TYPES])
 
     _VALID_URL = [
-        rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<type>{_PLAYLIST_TYPES_RE})$',
-        rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<type>groups)/(?P<id>\d+)(?:/(?P<slug>\w+))?',
+        rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<type>{_PLAYLIST_TYPES_LIST})$',
+        rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<type>groups)/(?P<id>\d+)(?:/\w+)?',
     ]
     _TESTS = [{
         # default playlist: music (with songs without player)
@@ -552,7 +550,9 @@ class PromoDJVideoPlaylistIE(PromoDJPlaylistIE):
 
 
 class PromoDJIE(PromoDJBaseIE):
-    _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/{PromoDJBaseIE._LOGIN_RE}/(?P<type>{PromoDJBaseIE._MEDIA_TYPES_RE})/(?P<id>\d+)(?:/\w+)?',
+    _MEDIA_TYPES_LIST = '|'.join(PromoDJBaseIE._MEDIA_TYPES)
+
+    _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/{PromoDJBaseIE._LOGIN_RE}/(?P<type>{_MEDIA_TYPES_LIST})/(?P<id>\d+)(?:/\w+)?',
     _TESTS = [{
         'url': 'https://promodj.com/antonpavlovsky/remixes/6259208/David_Usher_Black_Black_Heart_Anton_Pavlovsky_Cover',
         'info_dict': {
@@ -683,7 +683,7 @@ class PromoDJIE(PromoDJBaseIE):
             'upload_date': '20100404',
             'timestamp': 1270376700.0,
             'duration': 321.0,
-            'size': 56623104,
+            'size': 5128821,
             'view_count': int,
         },
     }, {
@@ -697,7 +697,7 @@ class PromoDJIE(PromoDJBaseIE):
             'upload_date': '20080827',
             'timestamp': 1219841220.0,
             'duration': 64.0,
-            'size': 2097152,
+            'size': 1014431,
             'view_count': int,
         },
     }, {
@@ -783,9 +783,9 @@ class PromoDJIE(PromoDJBaseIE):
     _TAGS_RE = r'<span\s+class=\"styles\">([^\n]+)</span>'
 
     # https://regex101.com/r/2ZkUmW/1
-    _MUSIC_DATA_REGEX = r'({\"no_preroll\":false,\"seekAny\":true,\"sources\":[^\n]+)\);'
+    _MUSIC_DATA_RE = r'({\"no_preroll\":false,\"seekAny\":true,\"sources\":[^\n]+)\);'
     # https://regex101.com/r/b9utBf/1
-    _VIDEO_DATA_REGEX = r'({\"video\":true,\"config\":[^\n]+)\);'
+    _VIDEO_DATA_RE = r'({\"video\":true,\"config\":[^\n]+)\);'
 
     def _parse_ru_date(self, day, month, year, hours, minutes):
         RU_MONTHS = ['января', 'февраля', 'марта', 'апреля', 'мая', 'июня', 'июля', 'августа', 'сентября', 'октября', 'ноября', 'декабря']
@@ -818,10 +818,10 @@ class PromoDJIE(PromoDJBaseIE):
         # always returns only one format: lossy mp3 for music or converted mp4 for video
         media_data = self._search_json(
             '', html, 'media data', id,
-            contains_pattern=self._VIDEO_DATA_REGEX if type == 'videos' else self._MUSIC_DATA_REGEX,
+            contains_pattern=self._VIDEO_DATA_RE if type == 'videos' else self._MUSIC_DATA_RE,
             transform_source=js_to_json, fatal=False, default=None)
         if not media_data:
-            media_data = self._fetch_media_data([id], id)[0]
+            media_data = self._fetch_media_data(id)
         metadata = self._parse_media_data(media_data, id)
 
         # html can be invalid
@@ -872,7 +872,7 @@ class PromoDJIE(PromoDJBaseIE):
 
 
 class PromoDJEmbedIE(PromoDJBaseIE):
-    _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/embed/(?P<id>\d+)/(?P<type>cover|big)'
+    _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/embed/(?P<id>\d+)/(?:cover|big)'
     _TESTS = [{
         'url': 'https://promodj.com/embed/7555440/cover',
         'info_dict': {
@@ -929,12 +929,12 @@ class PromoDJEmbedIE(PromoDJBaseIE):
     def _real_extract(self, url):
         id = self._match_id(url)
         metadata = self._parse_media_data(
-            self._fetch_media_data([id], id)[0], id)
+            self._fetch_media_data(id), id)
         return self.url_result(metadata['webpage_url'], PromoDJIE, id)
 
 
 class PromoDJShortIE(PromoDJBaseIE):
-    _VALID_URL = r'https://pdj.cc/(?P<id>\w+)'
+    _VALID_URL = r'https://(?:www\\.)?pdj.cc/(?P<id>\w+)'
     _TESTS = [{
         # music
         'url': 'https://pdj.cc/fv8VD',

From ed61b73bcd3b9f14746d4f6be203ed2da5727b2b Mon Sep 17 00:00:00 2001
From: DmitryScaletta <dmitryscaletta@mail.ru>
Date: Thu, 15 Feb 2024 21:50:13 +0300
Subject: [PATCH 16/21] [PromoDJ] Remove player's width and height

---
 yt_dlp/extractor/promodj.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py
index e629efb01..4fdcad85a 100644
--- a/yt_dlp/extractor/promodj.py
+++ b/yt_dlp/extractor/promodj.py
@@ -136,10 +136,6 @@ class PromoDJBaseIE(InfoExtractor):
             formats = [{
                 'format_id': 'web',
                 'url': traverse_obj(video, ('play', '@url')).replace('?returnurl=1', ''),
-                **traverse_obj(media_data, {
-                    'width': ('width', {int_or_none}),
-                    'height': ('height', {int_or_none}),
-                })
             }]
             return {
                 'id': id,

From 345d01a175f0609c175a2141f7d552d919fae05e Mon Sep 17 00:00:00 2001
From: DmitryScaletta <dmitryscaletta@mail.ru>
Date: Fri, 16 Feb 2024 23:22:57 +0300
Subject: [PATCH 17/21] [PromoDJ] Fix negative lookahead check

---
 yt_dlp/extractor/promodj.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py
index 4fdcad85a..da1beb8b2 100644
--- a/yt_dlp/extractor/promodj.py
+++ b/yt_dlp/extractor/promodj.py
@@ -66,7 +66,7 @@ class PromoDJBaseIE(InfoExtractor):
 
     _BASE_URL_RE = r'https?://(?:www\.)?promodj\.com'
     _NOT_LOGIN_LIST = '|'.join(['radio', *_PAGES])
-    _LOGIN_RE = rf'(?:(?!{_NOT_LOGIN_LIST}).)[\w.-]+'
+    _LOGIN_RE = rf'(?!{_NOT_LOGIN_LIST})[\w.-]+'
 
     def _set_url_page(self, url, page):
         parsed_url = urllib.parse.urlparse(url)
@@ -378,7 +378,7 @@ class PromoDJUserPageIE(PromoDJBaseIE):
         *PromoDJBaseIE._MEDIA_TYPES,
     ]
     _NOT_USER_PAGE_LIST = '|'.join(_USER_PATHS)
-    _USER_PAGE_RE = rf'(?:(?!{_NOT_USER_PAGE_LIST}).)[\w-]+'
+    _USER_PAGE_RE = rf'(?!{_NOT_USER_PAGE_LIST})[\w-]+'
 
     _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<slug>{_USER_PAGE_RE})$'
     _TESTS = [{

From e32ba3fc218d15ff59c63d95e3c6210184199c20 Mon Sep 17 00:00:00 2001
From: DmitryScaletta <dmitryscaletta@mail.ru>
Date: Sat, 17 Feb 2024 04:16:05 +0300
Subject: [PATCH 18/21] [PromoDJ] Fix login regex

---
 yt_dlp/extractor/promodj.py | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py
index da1beb8b2..f42d6ed2d 100644
--- a/yt_dlp/extractor/promodj.py
+++ b/yt_dlp/extractor/promodj.py
@@ -65,8 +65,8 @@ class PromoDJBaseIE(InfoExtractor):
     _PAGES = ['featured', 'shop', *_MEDIA_TYPES]
 
     _BASE_URL_RE = r'https?://(?:www\.)?promodj\.com'
-    _NOT_LOGIN_LIST = '|'.join(['radio', *_PAGES])
-    _LOGIN_RE = rf'(?!{_NOT_LOGIN_LIST})[\w.-]+'
+    _NOT_LOGIN_LIST = '|'.join(['radio', 'embed', *_PAGES])
+    _LOGIN_RE = rf'(?!(?:{_NOT_LOGIN_LIST})(?:/|$))[\w.-]+'
 
     def _set_url_page(self, url, page):
         parsed_url = urllib.parse.urlparse(url)
@@ -247,6 +247,10 @@ class PromoDJUserIE(PromoDJBaseIE):
             'id': 'slim96',
         },
         'playlist_count': 0,
+    }, {
+        # login starts with page name
+        'url': 'https://promodj.com/radio.remix',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -288,6 +292,10 @@ class PromoDJUserMediaIE(PromoDJBaseIE):
             'id': 'worobyev-video',
         },
         'playlist_count': 0,
+    }, {
+        # login starts with page name
+        'url': 'https://promodj.com/radio.remix/music',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -378,15 +386,18 @@ class PromoDJUserPageIE(PromoDJBaseIE):
         *PromoDJBaseIE._MEDIA_TYPES,
     ]
     _NOT_USER_PAGE_LIST = '|'.join(_USER_PATHS)
-    _USER_PAGE_RE = rf'(?!{_NOT_USER_PAGE_LIST})[\w-]+'
 
-    _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<slug>{_USER_PAGE_RE})$'
+    _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P<login>{PromoDJBaseIE._LOGIN_RE})/(?P<slug>(?!(?:{_NOT_USER_PAGE_LIST})$)[\w-]+$)'
     _TESTS = [{
         'url': 'https://promodj.com/djperetse/MaxMixes',
         'info_dict': {
             'id': 'djperetse-MaxMixes',
         },
         'playlist_count': 5,
+    }, {
+        # user page starts with media type (not a real link)
+        'url': 'https://promodj.com/djperetse/remixes-best',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):

From 49ac5d31a38dc4cffe46b907544cdff619255e48 Mon Sep 17 00:00:00 2001
From: DmitryScaletta <dmitryscaletta@mail.ru>
Date: Sat, 17 Feb 2024 05:06:06 +0300
Subject: [PATCH 19/21] [PromoDJ] Update radio extractor and add tests

---
 yt_dlp/extractor/promodj.py | 47 ++++++++++++++++++++++++++++++++-----
 1 file changed, 41 insertions(+), 6 deletions(-)

diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py
index f42d6ed2d..8600b93d1 100644
--- a/yt_dlp/extractor/promodj.py
+++ b/yt_dlp/extractor/promodj.py
@@ -6,16 +6,17 @@ import urllib.parse
 from .common import InfoExtractor
 from .youtube import YoutubeIE
 from ..utils import (
-    OnDemandPagedList,
     clean_html,
     dict_get,
     extract_attributes,
     ExtractorError,
     get_element_by_class,
+    get_element_html_by_id,
     get_elements_html_by_class,
     int_or_none,
     js_to_json,
     merge_dicts,
+    OnDemandPagedList,
     parse_duration,
     str_or_none,
     traverse_obj,
@@ -989,19 +990,53 @@ class PromoDJRadioIE(PromoDJBaseIE):
     _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/radio#(?P<id>\w+)'
     _TESTS = [{
         'url': 'https://promodj.com/radio#dubstep',
-        'only_matching': True,
+        'info_dict': {
+            'id': 'dubstep',
+            'ext': 'mp3',
+            'title': r're:^Dubstep ',
+            'description': 'Всё лучше под дабстеп',
+            'thumbnail': r're:^https?://',
+            'live_status': 'is_live',
+        },
     }, {
         'url': 'https://promodj.com/radio#oldschool',
-        'only_matching': True,
+        'info_dict': {
+            'id': 'oldschool',
+            'ext': 'mp3',
+            'title': r're:^Old-School ',
+            'description': 'То самое доброе, старое, вечное',
+            'thumbnail': r're:^https?://',
+            'live_status': 'is_live',
+        },
     }]
 
     def _real_extract(self, url):
-        id = self._match_id(url)
+        slug = self._match_id(url)
+        html = self._download_webpage(url, slug)
+        radio_span = get_element_html_by_id(f'radio_{slug}', html)
+        if not radio_span:
+            raise ExtractorError('Radio channel is offline or not exists', expected=True)
+        id = self._search_regex(r'amba="radio:(\d+)"', radio_span, 'id')
+        tooltip_html = self._download_webpage(
+            f'https://promodj.com/ajax/tooltip.html?wtf=radio:{id}', slug,
+            note='Downloading tooltip webpage')
+        title = clean_html(self._search_regex(
+            r'<h1[^>]*><b>([^<]+)</b></h1>', tooltip_html, 'title', default=None))
+        description = clean_html(self._search_regex(
+            r'<div>([^<]+)</div>', tooltip_html, 'description', default=None))
+        thumbnail = self._search_regex(
+            rf'#radio_{slug}:after {{ background-image: url\(([^)]+)\); }}',
+            html, 'thumbnail', default=None)
+
         return {
-            'id': id,
+            'id': slug,
+            'title': title,
+            'description': description,
+            'thumbnail': url_or_none(thumbnail),
             'formats': [{
-                'url': f'https://radio.promodj.com/{id}-192',
+                'url': f'https://radio.promodj.com/{slug}-192',
                 'abr': 192,
+                'ext': 'mp3',
             }],
             'is_live': True,
         }

From 2416fddcfbd3dd22caa78fc2cf7018d82a7d2efc Mon Sep 17 00:00:00 2001
From: DmitryScaletta <dmitryscaletta@mail.ru>
Date: Sat, 17 Feb 2024 05:24:49 +0300
Subject: [PATCH 20/21] [PromoDJ] Add codecs

---
 yt_dlp/extractor/promodj.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py
index 8600b93d1..235ff0187 100644
--- a/yt_dlp/extractor/promodj.py
+++ b/yt_dlp/extractor/promodj.py
@@ -155,6 +155,8 @@ class PromoDJBaseIE(InfoExtractor):
             'format_id': 'lossy',
             'url': traverse_obj(source, ('URL', {url_or_none})),
             'size': traverse_obj(source, ('size', {int_or_none})),
+            'acodec': 'mp3',
+            'vcodec': 'none',
         } for source in traverse_obj(media_data, ('sources'))]
         thumbnails = [{
             'url': url,
@@ -780,7 +782,7 @@ class PromoDJIE(PromoDJBaseIE):
 
     # examples: MP3, 320 Кбит | MP4, 20157 Кбит | WAV, 1412 Кбит | AVI, 1731 Кбит | ASF, 6905 Кбит | FLAC, 1509 Кбит
     # https://regex101.com/r/2AuaxB/1
-    _FORMATS_RE = r'(?:<a\s+href=\"(?P<url>[^\"]+)\">)?\s*\w+, (?P<bitrate>\d+) Кбит'
+    _FORMATS_RE = r'(?:<a\s+href=\"(?P<url>[^\"]+)\">)?\s*(?P<format>\w+), (?P<bitrate>\d+) Кбит'
     _VIEW_COUNT_RE = r'<b>(?:Прослушиваний|Просмотров):</b>\s*(\d+)'
     # examples: 0:21 | 1:07 | 74:38
     _DURATION_RE = r'<b>Продолжительность:</b>\s*(\d+:\d{2})'
@@ -847,18 +849,19 @@ class PromoDJIE(PromoDJBaseIE):
         # size field describes best quality
         size = self._parse_ru_size(*re.search(self._SIZE_RE, meta_html).groups())
         if type == 'videos':
-            for url, bitrate in formats_from_html:
+            for url, format, bitrate in formats_from_html:
                 if url_or_none(url):
                     metadata['formats'].append({
                         'format_id': 'source',
                         'url': url,
                         'tbr': int(bitrate),
                         'size': size,
+                        'container': format.lower(),
                         'quality': 1,
                     })
         elif not is_paid:
             for i, match in enumerate(formats_from_html):
-                url, bitrate = match
+                url, format, bitrate = match
                 is_last = i == len(formats_from_html) - 1
                 if is_last:
                     metadata['formats'][0]['abr'] = int(bitrate)
@@ -867,6 +870,8 @@ class PromoDJIE(PromoDJBaseIE):
                         'format_id': 'lossless',
                         'url': url,
                         'abr': int(bitrate),
+                        'acodec': format.lower(),
+                        'vcodec': 'none',
                     })
             metadata['formats'][-1]['size'] = size
 
@@ -1037,6 +1042,8 @@ class PromoDJRadioIE(PromoDJBaseIE):
                 'url': f'https://radio.promodj.com/{slug}-192',
                 'abr': 192,
                 'ext': 'mp3',
+                'acodec': 'mp3',
+                'vcodec': 'none',
             }],
             'is_live': True,
         }

From 107bed866fc9017373607256aa8ee37fabea6555 Mon Sep 17 00:00:00 2001
From: DmitryScaletta <dmitryscaletta@mail.ru>
Date: Tue, 27 Feb 2024 02:32:49 +0300
Subject: [PATCH 21/21] [PromoDJ] Sort imports

---
 yt_dlp/extractor/promodj.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py
index 235ff0187..494652123 100644
--- a/yt_dlp/extractor/promodj.py
+++ b/yt_dlp/extractor/promodj.py
@@ -6,17 +6,17 @@ import urllib.parse
 from .common import InfoExtractor
 from .youtube import YoutubeIE
 from ..utils import (
+    ExtractorError,
+    OnDemandPagedList,
     clean_html,
     dict_get,
     extract_attributes,
-    ExtractorError,
     get_element_by_class,
     get_element_html_by_id,
     get_elements_html_by_class,
     int_or_none,
     js_to_json,
     merge_dicts,
-    OnDemandPagedList,
     parse_duration,
     str_or_none,
     traverse_obj,