From 1a256e5d562fca110902f8a6e8bec255565c1a4b Mon Sep 17 00:00:00 2001 From: DmitryScaletta Date: Tue, 13 Feb 2024 22:57:05 +0300 Subject: [PATCH 01/21] [PromoDJ] Add extractors --- yt_dlp/extractor/_extractors.py | 13 + yt_dlp/extractor/promodj.py | 493 ++++++++++++++++++++++++++++++++ 2 files changed, 506 insertions(+) create mode 100644 yt_dlp/extractor/promodj.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index e7dd34c77..f35eab137 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1529,6 +1529,19 @@ from .prankcast import PrankCastIE, PrankCastPostIE from .premiershiprugby import PremiershipRugbyIE from .presstv import PressTVIE from .projectveritas import ProjectVeritasIE +from .promodj import ( + PromoDJPageIE, + PromoDJUserIE, + PromoDJUserMediaIE, + PromoDJUserPagesIE, + PromoDJUserPageIE, + PromoDJBlogPageIE, + PromoDJPlaylistIE, + PromoDJIE, + PromoDJEmbedIE, + PromoDJShortIE, + PromoDJRadioIE, +) from .prosiebensat1 import ProSiebenSat1IE from .prx import ( PRXStoryIE, diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py new file mode 100644 index 000000000..da9db44ad --- /dev/null +++ b/yt_dlp/extractor/promodj.py @@ -0,0 +1,493 @@ +import datetime +import functools +import re +import urllib.parse + +from .common import InfoExtractor +from ..utils import ( + OnDemandPagedList, + clean_html, + dict_get, + extract_attributes, + float_or_none, + get_element_by_class, + get_elements_by_class, + int_or_none, + parse_duration, + str_or_none, + traverse_obj, + urlencode_postdata, + url_or_none, +) + +# promodj.com + +# Playlist types: +# /:login/:media_type - default +# /:login/groups/:id/:slug - user defined (groups). Can contain audios and/or videos + +# A single media by default is attached to default playlist +# But it can be reattached to a user playlist (group), and no longer appears in the default one + +# User pages +# /:login - all non-empty playlists +# /:login/music - all non-empty playlists with at least one audio (shows 10 audios per playlist max) +# /:login/video - all non-empty playlists with at least one video (shows 10 videos per playlist max) +# /:login/pages - a list of user pages +# /:login/:page_name - a single user page +# /:login/blog - a list of blog posts +# /:login/blog/:id/:slug - a single blog post + +# If default playlist is empty, it redirects to the user's page +# Pages and blog posts can contain: audios, videos, youtube videos + +# Tracks and remixes can be paid. See /shop page + + +class PromoDJBaseIE(InfoExtractor): + _MEDIA_TYPES = [ + 'tracks', + 'remixes', + 'mixes', + 'promos', + 'lives', + 'podcasts', + 'radioshows', + 'tools', + 'realtones', # doesn't appear on the site menu but still exists + 'acapellas', # redirects to /tools, creates default playlist + 'samples', # redirects to /tools, doesn't create default playlist + 'videos', + ] + _PAGES = ['featured', 'shop', *_MEDIA_TYPES] + + _BASE_URL_RE = r'https?://(?:www\.)?promodj\.com' + _MEDIA_TYPES_RE = '|'.join(_MEDIA_TYPES) + _NOT_PAGE_RE = '|'.join(['radio', *_PAGES]) + _LOGIN_RE = rf'(?:(?!{_NOT_PAGE_RE}).)[\w-]+' + + def _set_url_page(self, url, page): + parsed_url = urllib.parse.urlparse(url) + qs = urllib.parse.parse_qs(parsed_url.query) + qs['page'] = page + return parsed_url._replace(query=urllib.parse.urlencode(qs, doseq=True)).geturl() + + def _fetch_page(self, url, parsed_media_types, playlist_id, page): + page_url = self._set_url_page(url, page + 1) + html = self._download_webpage(page_url, f'{playlist_id}-page-{page + 1}') + current_page = int(clean_html(get_element_by_class('NavigatorCurrentPage', html)) or '1') + if current_page != page + 1: + return + + tracks_dump_html = get_element_by_class('tracks_dump', html) + for item_html in get_elements_by_class('player_standard', tracks_dump_html): + if 'music' in parsed_media_types: + a = get_element_by_class('title', item_html) + if 'video' in parsed_media_types and not a: + a = get_element_by_class('h5videoplayer_promodj_video__title', item_html) + if not a: + continue + if url := traverse_obj(extract_attributes(a), ('href', {url_or_none})): + yield self.url_result(url, PromoDJIE) + + def _parse_playlist_links(self, html): + PLAYLISTS_RE = r'' + DEFAULT_VIDEO_PLAYLIST_RE = r'
Видео
' + + playlist_links = [] + + for playlist_url in re.findall(PLAYLISTS_RE, html): + playlist_links.append(playlist_url) + + login = self._search_regex( + DEFAULT_VIDEO_PLAYLIST_RE, html, 'video playlist url', None) + if login: + playlist_links.append(f'https://promodj.com/{login}/videos') + + return playlist_links + + def _get_playlist_page_size(self, url): + is_default_playlist = '/groups/' not in url + return 30 if is_default_playlist else 20 + + def _fetch_media_data(self, ids, video_id): + data = {} + for i, id in enumerate(ids): + data[f'multi[{i}][method]'] = 'players/config' + data[f'multi[{i}][params][kind]'] = 'standalone.big' + data[f'multi[{i}][params][fileID]'] = id + return self._download_json( + 'https://promodj.com/api/multi.json', video_id, data=urlencode_postdata(data), + headers={'Content-Type': 'application/x-www-form-urlencoded'}) + + +class PromoDJPageIE(PromoDJBaseIE): + _PAGES_RE = '|'.join(PromoDJBaseIE._PAGES) + + _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{_PAGES_RE})' + _TESTS = [{ + 'url': 'https://promodj.com/featured', + 'only_matching': True, + }, { + # second page + 'url': 'https://promodj.com/featured/rap?download=1&page=2', + 'only_matching': True, + }, { + # filtered + 'url': 'https://promodj.com/remixes?top=1', + 'only_matching': True, + }, { + # with genre + 'url': 'https://promodj.com/tracks/hip_hop', + 'only_matching': True, + }, { + # with search + 'url': 'https://promodj.com/mixes?kind=mixes&styleID=&searchfor=dance', + 'only_matching': True, + }, { + # no download button + 'url': 'https://promodj.com/shop', + 'only_matching': True, + }] + + _PAGE_SIZE = 20 + + def _real_extract(self, url): + page_type = self._match_id(url) + return self.playlist_result( + OnDemandPagedList( + functools.partial(self._fetch_page, url, ['music', 'video'], page_type), + self._PAGE_SIZE), + playlist_id=page_type) + + +class PromoDJUserIE(PromoDJBaseIE): + _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{PromoDJBaseIE._LOGIN_RE})$' + _TESTS = [{ + 'url': 'https://promodj.com/djperetse', + 'only_matching': True, + }, { + 'url': 'https://promodj.com/dj-trojan', + 'only_matching': True, + }] + + def _real_extract(self, url): + login = self._match_valid_url(url).group('login') + html = self._download_webpage(url, login) + + def entries(): + for playlist_url in self._parse_playlist_links(html): + yield self.url_result(playlist_url, PromoDJPlaylistIE) + + return self.playlist_result(entries(), playlist_id=login) + + +class PromoDJUserMediaIE(PromoDJBaseIE): + _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{PromoDJBaseIE._LOGIN_RE})/(?Pmusic|video)$' + _TESTS = [{ + 'url': 'https://promodj.com/feel/music', + 'only_matching': True, + }, { + 'url': 'https://promodj.com/djmikis/video', + 'only_matching': True, + }, { + # a user without any videos + 'url': 'https://promodj.com/worobyev/video', + 'only_matching': True, + }] + + def _real_extract(self, url): + login, type = self._match_valid_url(url).groups() + page_id = f'{login}-{type}' + html = self._download_webpage(url, page_id) + + def entries(): + for playlist_url in self._parse_playlist_links(html): + # TODO: parse only music or videos + yield self.url_result(playlist_url, PromoDJPlaylistIE) + + return self.playlist_result(entries(), playlist_id=page_id) + + +class PromoDJUserPagesIE(PromoDJBaseIE): + _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{PromoDJBaseIE._LOGIN_RE})/(?P(pages|blog))$' + _TESTS = [{ + 'url': 'https://promodj.com/djperetse/pages', + 'only_matching': True, + }, { + 'url': 'https://promodj.com/golub/blog', + 'only_matching': True, + }] + + def _real_extract(self, url): + login, type = self._match_valid_url(url).groups() + + +class PromoDJUserPageIE(PromoDJBaseIE): + _USER_PAGES = [ + 'pages', + 'music', + 'video', + 'foto', + 'avisha', + 'blog', + 'feedback', + 'contact', + *PromoDJBaseIE._MEDIA_TYPES, + ] + _NOT_USER_PAGE_RE = '|'.join(_USER_PAGES) + _USER_PAGE_RE = rf'(?:(?!{_NOT_USER_PAGE_RE}).)[\w-]+' + + _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{PromoDJBaseIE._LOGIN_RE})/(?P{_USER_PAGE_RE})$' + _TESTS = [{ + 'url': 'https://promodj.com/djperetse/MaxMixes', + 'only_matching': True, + }] + + def _real_extract(self, url): + login, slug = self._match_valid_url(url).groups() + + +class PromoDJBlogPageIE(PromoDJBaseIE): + _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{PromoDJBaseIE._LOGIN_RE})/blog/(?P\d+)(?:/(?P\w+))?' + _TESTS = [{ + # with small and big audio players and youtube video + 'url': 'https://promodj.com/golub/blog/1163895/DJ_Andrey_Golubev_To_Depeche_Mode_with_love_part_9_special_dj_edits_mix', + 'only_matching': True, + }, { + # with audio and video + 'url': 'https://promodj.com/svetmusic/blog/1101958/SVET_I_Like_It_Extra_Sound_Recordings', + 'only_matching': True, + }, { + # without any media + 'url': 'https://promodj.com/svetmusic/blog/915878/DJ_SVET_pobeditel_konkursa_Burn_City_Sound', + 'only_matching': True, + }] + + def _real_extract(self, url): + login, id, slug = self._match_valid_url(url).groups() + + +class PromoDJPlaylistIE(PromoDJBaseIE): + _VALID_URL = [ + rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{PromoDJBaseIE._LOGIN_RE})/(?P{PromoDJBaseIE._MEDIA_TYPES_RE})$', + rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{PromoDJBaseIE._LOGIN_RE})/(?Pgroups)/(?P\d+)(?:/(?P\w+))?', + ] + _TESTS = [{ + # default playlist: tracks (audio) + 'url': 'https://promodj.com/gluk/tracks', + 'only_matching': True, + }, { + # default playlist: video + 'url': 'https://promodj.com/djperetse/videos', + 'only_matching': True, + }, { + # user playlist: audio + 'url': 'https://promodj.com/fonarev/groups/608158/Digital_Emotions_Night', + 'only_matching': True, + }, { + # two pages + 'url': 'https://promodj.com/lavrov/groups/677132/VINYL', + 'only_matching': True, + }, { + # user playlist: video + 'url': 'https://promodj.com/deeplecture/groups/672782/LAROCCA_TV', + 'only_matching': True, + }, { + # user playlist: audio and video + 'url': 'https://promodj.com/djperetse/groups/637358/Russkie_treki', + 'only_matching': True, + }, { + # 900+ items + 'url': 'https://promodj.com/fonarev/groups/17350/Digital_Emotions_Podcast', + 'only_matching': True, + }] + + def _real_extract(self, url): + match = self._match_valid_url(url) + login = match.group('login') + type = match.group('type') + playlist_id = f'{login}-{type}' if len(match.groups()) == 2 else f'{login}-{type}-{match.group("id")}' + page_size = self._get_playlist_page_size(url) + + entries = OnDemandPagedList( + functools.partial(self._fetch_page, url, ['music', 'video'], playlist_id), + page_size) + return self.playlist_result(entries, playlist_id=playlist_id) + + +class PromoDJIE(PromoDJBaseIE): + _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/{PromoDJBaseIE._LOGIN_RE}/(?P{PromoDJBaseIE._MEDIA_TYPES_RE})/(?P\d+)(?:/\w+)?', + _TESTS = [{ + 'url': 'https://promodj.com/antonpavlovsky/remixes/6259208/David_Usher_Black_Black_Heart_Anton_Pavlovsky_Cover', + 'only_matching': True, + }, { + 'url': 'https://promodj.com/j-factory/samples/7560171/Amedici_BW1_Intro', + 'only_matching': True, + }, { + # no download links in html + 'url': 'https://promodj.com/gluk/tracks/4713922/DJ_Glyuk_Folk_ing_DJ_Steven_Smile_Remix_2005', + 'only_matching': True, + }, { + # no player + 'url': 'https://promodj.com/gluk/tracks/420310/IMpulse_Zakat', + 'only_matching': True, + }, { + # without slug + 'url': 'https://promodj.com/djlykov/tracks/7551590', + 'only_matching': True, + }, { + # lossless + 'url': 'https://promodj.com/modi-glu/tracks/6081339/Modi_Glyu_Anabel', + 'only_matching': True, + }, { + # paid audio + 'url': 'https://promodj.com/boyko/tracks/1435682/Dj_Boyko_Katy_Queen_Nad_Oblakami', + 'only_matching': True, + }, { + 'url': 'https://promodj.com/sergeyfedotov306/videos/7457627/V_Matrice_Sboy', + 'only_matching': True, + }, { + 'url': 'https://promodj.com/djperetse/videos/5868236/Fatalist_Project_feat_DJ_Peretse_Den_pobedi_Videoklip', + 'only_matching': True, + }] + + _IS_PAID_RE = r'Цена:' + # examples: MP3, 320 Кбит | MP4, 20157 Кбит | WAV, 1412 Кбит + _FORMATS_RE = r'[^\"]+\.(?:mp3|mp4|wav))\">\s*(?PMP3|MP4|WAV), (?P\d+) Кбит\s*' + _VIEW_COUNT_RE = r'(?:Прослушиваний|Просмотров):\s*(\d+)' + # examples: 0:21, 1:07, 74:38 + _DURATION_RE = r'Продолжительность:\s*(\d{1,}:\d{2})' + # examples: 818.4 Кб, 12.9 Мб, 4 Гб, 1.76 Гб + _SIZE_RE = r'Размер:\s*(?P\d{1,3}(?:\.\d{1,2})?)\s*(?PКб|Мб|Гб)' + # examples: сегодня 2:55, вчера 23:17, 1 июня 2016 3:46 + _TIMESTAMP_RE = r'Публикация:\s*(?Pвчера|сегодня|\d{1,2})(?: (?P[а-я]+) (?P\d{4}))?\s*(?P\d{1,2}):(?P\d{2})' + _TAGS_RE = r'([^\n]+)' + + def _parse_ru_date(self, raw_date): + RU_MONTHS = ['января', 'февраля', 'марта', 'апреля', 'мая', 'июня', 'июля', 'августа', 'сентября', 'октября', 'ноября', 'декабря'] + day, month, year, hours, minutes = raw_date + if day == 'сегодня': + d = datetime.date.today() + day = d.day + month = d.month + year = d.year + elif day == 'вчера': + d = datetime.date.today() - datetime.timedelta(days=1) + day = d.day + month = d.month + year = d.year + else: + day = int(day) + month = RU_MONTHS.index(month) + 1 + year = int(year) + return datetime.datetime(year, month, day, int(hours), int(minutes)).timestamp() + + def _parse_ru_size(self, raw_size): + RU_SIZE_UNITS = ['Б', 'Кб', 'Мб', 'Гб'] + size, size_unit = raw_size + return int(float(size) * pow(1024, RU_SIZE_UNITS.index(size_unit))) + + def _parse_media(self, html, id): + meta_html = get_element_by_class('clearfix', get_element_by_class('dj_bblock', html)) + + is_paid = re.search(self._IS_PAID_RE, meta_html) + formats_from_html = re.findall(self._FORMATS_RE, meta_html) + + if is_paid or len(formats_from_html) == 0: + media_data_raw = self._search_regex( + r'({\"no_preroll\":false,\"seekAny\":true,\"sources\":[^\n]+)\);', html, 'media data') + media_data = self._parse_json(media_data_raw, id) + formats = [{ + 'url': source.get('URL'), + 'size': int_or_none(source.get('size')), + } for source in traverse_obj(media_data, ('sources')) if url_or_none(source.get('URL'))] + else: + formats = [{ + 'url': url, + 'format': format.lower(), + 'tbr': int(bitrate), + } for url, format, bitrate in formats_from_html if url_or_none(url)] + # size field describes best quality. best quality always comes first + formats[0]['size'] = self._parse_ru_size(re.findall(self._SIZE_RE, meta_html)[0]) + + return { + 'id': id, + 'title': clean_html(get_element_by_class('file_title', html)), + 'formats': formats, + 'view_count': int_or_none(self._search_regex(self._VIEW_COUNT_RE, meta_html, 'view_count')), + 'duration': parse_duration(self._search_regex(self._DURATION_RE, meta_html, 'duration')), + 'timestamp': self._parse_ru_date(re.findall(self._TIMESTAMP_RE, meta_html)[0]), + 'tags': self._html_search_regex(self._TAGS_RE, meta_html, 'tags').split(', '), + } + + def _real_extract(self, url): + id = self._match_id(url) + html = self._download_webpage(url, id) + return self._parse_media(html, id) + + +class PromoDJEmbedIE(PromoDJBaseIE): + _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/embed/(?P\d+)/(?Pcover|big)' + _TESTS = [{ + 'url': 'https://promodj.com/embed/7555440/cover', + 'only_matching': True, + }, { + 'url': 'https://promodj.com/embed/7540163/big', + 'only_matching': True, + }, { + # video (can be only big) + 'url': 'https://promodj.com/embed/3922099/big', + 'only_matching': True, + }] + + def _get_full_url(self, media_data, id): + if media_data.get('video'): + video_config = self._parse_json(media_data['config'], id) + video = traverse_obj(video_config, ('playlist', 'item', 0)) + return traverse_obj(video, ('title', '@ico_url')) + else: + return media_data.get('titleURL') + + def _real_extract(self, url): + id = self._match_id(url) + url = self._get_full_url(self._fetch_media_data([id], id)[0], id) + return self.url_result(url, PromoDJIE, id) + + +class PromoDJShortIE(PromoDJBaseIE): + _VALID_URL = r'https://pdj.cc/(?P\w+)' + _TESTS = [{ + 'url': 'https://pdj.cc/fv8VD', + 'only_matching': True, + }] + + _PAGE_URL_REGEX = r'\w+)' + _TESTS = [{ + 'url': 'https://promodj.com/radio#dubstep', + 'only_matching': True, + }, { + 'url': 'https://promodj.com/radio#oldschool', + 'only_matching': True, + }] + + def _real_extract(self, url): + id = self._match_id(url) + return { + 'id': id, + 'formats': [{ + 'url': f'https://radio.promodj.com/{id}-192', + 'abr': 192, + }], + 'is_live': True, + } From 3416c1a0e88c599ac053946ae22a385381409158 Mon Sep 17 00:00:00 2001 From: DmitryScaletta Date: Wed, 14 Feb 2024 00:46:01 +0300 Subject: [PATCH 02/21] [PromoDJ] Add user pages and blogs extractors --- yt_dlp/extractor/promodj.py | 66 ++++++++++++++++++++++++++++++++++--- 1 file changed, 62 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py index da9db44ad..e9820b60d 100644 --- a/yt_dlp/extractor/promodj.py +++ b/yt_dlp/extractor/promodj.py @@ -4,17 +4,16 @@ import re import urllib.parse from .common import InfoExtractor +from .youtube import YoutubeIE from ..utils import ( OnDemandPagedList, clean_html, - dict_get, extract_attributes, - float_or_none, + ExtractorError, get_element_by_class, get_elements_by_class, int_or_none, parse_duration, - str_or_none, traverse_obj, urlencode_postdata, url_or_none, @@ -106,6 +105,14 @@ class PromoDJBaseIE(InfoExtractor): return playlist_links + def _parse_page_content(self, html): + for id in re.findall(r'CORE\.Player\(\'[^\']+\', \'(?:standalone|cover)\.big\', (\d+),', html): + yield self.url_result(f'https://promodj.com/embed/{id}/big', PromoDJEmbedIE, id) + + for iframe_url in re.findall(r']+src=\"([^\"]+)\"', html): + if YoutubeIE.suitable(iframe_url): + yield self.url_result(iframe_url, YoutubeIE) + def _get_playlist_page_size(self, url): is_default_playlist = '/groups/' not in url return 30 if is_default_playlist else 20 @@ -210,7 +217,7 @@ class PromoDJUserMediaIE(PromoDJBaseIE): class PromoDJUserPagesIE(PromoDJBaseIE): - _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{PromoDJBaseIE._LOGIN_RE})/(?P(pages|blog))$' + _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{PromoDJBaseIE._LOGIN_RE})/(?Ppages|blog)$' _TESTS = [{ 'url': 'https://promodj.com/djperetse/pages', 'only_matching': True, @@ -219,8 +226,38 @@ class PromoDJUserPagesIE(PromoDJBaseIE): 'only_matching': True, }] + _PAGE_SIZE = 10 + + def _parse_pages(self, url, playlist_id): + html = self._download_webpage(url, playlist_id) + content_html = get_element_by_class('dj_universal', get_element_by_class('dj_bblock', html)) + print(re.findall(r'([^<]+)', content_html)) + for page_url, page_title in re.findall(r'([^<]+)', content_html): + yield self.url_result(page_url, PromoDJUserPageIE, video_title=page_title) + + def _fetch_blog_page(self, url, playlist_id, page): + page_url = self._set_url_page(url, page + 1) + html = self._download_webpage(page_url, f'{playlist_id}-page-{page + 1}') + current_page = int(clean_html(get_element_by_class('NavigatorCurrentPage', html)) or '1') + if current_page != page + 1: + return + + for a in get_elements_by_class('post_title', html): + if not a: + continue + if url := traverse_obj(extract_attributes(a), ('href', {url_or_none})): + yield self.url_result(url, PromoDJBlogPageIE) + def _real_extract(self, url): login, type = self._match_valid_url(url).groups() + playlist_id = f'{login}-{type}' + if type == 'pages': + entries = self._parse_pages(url, playlist_id) + elif type == 'blog': + entries = OnDemandPagedList( + functools.partial(self._fetch_blog_page, url, playlist_id), + self._PAGE_SIZE) + return self.playlist_result(entries, playlist_id) class PromoDJUserPageIE(PromoDJBaseIE): @@ -246,6 +283,11 @@ class PromoDJUserPageIE(PromoDJBaseIE): def _real_extract(self, url): login, slug = self._match_valid_url(url).groups() + page_id = f'{login}-{slug}' + html = self._download_webpage(url, page_id) + content_html = get_element_by_class('perfect', html) + return self.playlist_result( + self._parse_page_content(content_html), playlist_id=page_id) class PromoDJBlogPageIE(PromoDJBaseIE): @@ -266,6 +308,11 @@ class PromoDJBlogPageIE(PromoDJBaseIE): def _real_extract(self, url): login, id, slug = self._match_valid_url(url).groups() + page_id = f'{login}-blog-{id}-{slug}' + html = self._download_webpage(url, page_id) + content_html = get_element_by_class('post_body', html) + return self.playlist_result( + self._parse_page_content(content_html), playlist_id=page_id) class PromoDJPlaylistIE(PromoDJBaseIE): @@ -439,9 +486,20 @@ class PromoDJEmbedIE(PromoDJBaseIE): # video (can be only big) 'url': 'https://promodj.com/embed/3922099/big', 'only_matching': True, + }, { + # blocked + 'url': 'https://promodj.com/embed/5586967/big', + 'only_matching': True, + }, { + # deleted + 'url': 'https://promodj.com/embed/5606804/big', + 'only_matching': True, }] def _get_full_url(self, media_data, id): + if player_error := media_data.get('player_error'): + raise ExtractorError(player_error, expected=True) + if media_data.get('video'): video_config = self._parse_json(media_data['config'], id) video = traverse_obj(video_config, ('playlist', 'item', 0)) From ca0be3f1c1e3d452e690e37263b734e788f3438c Mon Sep 17 00:00:00 2001 From: DmitryScaletta Date: Wed, 14 Feb 2024 04:36:08 +0300 Subject: [PATCH 03/21] [PromoDJ] Improve extractors --- yt_dlp/extractor/_extractors.py | 2 + yt_dlp/extractor/promodj.py | 110 +++++++++++++++++++++----------- 2 files changed, 76 insertions(+), 36 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index f35eab137..59873f479 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1537,6 +1537,8 @@ from .promodj import ( PromoDJUserPageIE, PromoDJBlogPageIE, PromoDJPlaylistIE, + PromoDJMusicPlaylistIE, + PromoDJVideoPlaylistIE, PromoDJIE, PromoDJEmbedIE, PromoDJShortIE, diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py index e9820b60d..f58be2893 100644 --- a/yt_dlp/extractor/promodj.py +++ b/yt_dlp/extractor/promodj.py @@ -11,7 +11,7 @@ from ..utils import ( extract_attributes, ExtractorError, get_element_by_class, - get_elements_by_class, + get_elements_html_by_class, int_or_none, parse_duration, traverse_obj, @@ -63,7 +63,7 @@ class PromoDJBaseIE(InfoExtractor): _BASE_URL_RE = r'https?://(?:www\.)?promodj\.com' _MEDIA_TYPES_RE = '|'.join(_MEDIA_TYPES) _NOT_PAGE_RE = '|'.join(['radio', *_PAGES]) - _LOGIN_RE = rf'(?:(?!{_NOT_PAGE_RE}).)[\w-]+' + _LOGIN_RE = rf'(?:(?!{_NOT_PAGE_RE}).)[\w.-]+' def _set_url_page(self, url, page): parsed_url = urllib.parse.urlparse(url) @@ -71,27 +71,25 @@ class PromoDJBaseIE(InfoExtractor): qs['page'] = page return parsed_url._replace(query=urllib.parse.urlencode(qs, doseq=True)).geturl() - def _fetch_page(self, url, parsed_media_types, playlist_id, page): + def _fetch_page(self, url, media_types, playlist_id, page): page_url = self._set_url_page(url, page + 1) html = self._download_webpage(page_url, f'{playlist_id}-page-{page + 1}') current_page = int(clean_html(get_element_by_class('NavigatorCurrentPage', html)) or '1') if current_page != page + 1: return - tracks_dump_html = get_element_by_class('tracks_dump', html) - for item_html in get_elements_by_class('player_standard', tracks_dump_html): - if 'music' in parsed_media_types: - a = get_element_by_class('title', item_html) - if 'video' in parsed_media_types and not a: - a = get_element_by_class('h5videoplayer_promodj_video__title', item_html) - if not a: + for a in get_elements_html_by_class('player_standard_tool__play', html): + url = traverse_obj(extract_attributes(a), ('href', {url_or_none})) + if not url: continue - if url := traverse_obj(extract_attributes(a), ('href', {url_or_none})): + url = url.replace('?play=1', '') + is_video = '/videos/' in url + if is_video and 'video' in media_types or not is_video and 'music' in media_types: yield self.url_result(url, PromoDJIE) def _parse_playlist_links(self, html): PLAYLISTS_RE = r'' - DEFAULT_VIDEO_PLAYLIST_RE = r'
Видео
' + DEFAULT_VIDEO_PLAYLIST_RE = r'
Видео
' playlist_links = [] @@ -210,8 +208,8 @@ class PromoDJUserMediaIE(PromoDJBaseIE): def entries(): for playlist_url in self._parse_playlist_links(html): - # TODO: parse only music or videos - yield self.url_result(playlist_url, PromoDJPlaylistIE) + ie = PromoDJMusicPlaylistIE if type == 'music' else PromoDJVideoPlaylistIE + yield self.url_result(playlist_url, ie) return self.playlist_result(entries(), playlist_id=page_id) @@ -231,7 +229,6 @@ class PromoDJUserPagesIE(PromoDJBaseIE): def _parse_pages(self, url, playlist_id): html = self._download_webpage(url, playlist_id) content_html = get_element_by_class('dj_universal', get_element_by_class('dj_bblock', html)) - print(re.findall(r'([^<]+)', content_html)) for page_url, page_title in re.findall(r'([^<]+)', content_html): yield self.url_result(page_url, PromoDJUserPageIE, video_title=page_title) @@ -242,9 +239,7 @@ class PromoDJUserPagesIE(PromoDJBaseIE): if current_page != page + 1: return - for a in get_elements_by_class('post_title', html): - if not a: - continue + for a in get_elements_html_by_class('post_title_moderated', html): if url := traverse_obj(extract_attributes(a), ('href', {url_or_none})): yield self.url_result(url, PromoDJBlogPageIE) @@ -350,6 +345,8 @@ class PromoDJPlaylistIE(PromoDJBaseIE): 'only_matching': True, }] + _MEDIA_TYPES = ['music', 'video'] + def _real_extract(self, url): match = self._match_valid_url(url) login = match.group('login') @@ -358,11 +355,19 @@ class PromoDJPlaylistIE(PromoDJBaseIE): page_size = self._get_playlist_page_size(url) entries = OnDemandPagedList( - functools.partial(self._fetch_page, url, ['music', 'video'], playlist_id), + functools.partial(self._fetch_page, url, self._MEDIA_TYPES, playlist_id), page_size) return self.playlist_result(entries, playlist_id=playlist_id) +class PromoDJMusicPlaylistIE(PromoDJPlaylistIE): + _MEDIA_TYPES = ['music'] + + +class PromoDJVideoPlaylistIE(PromoDJPlaylistIE): + _MEDIA_TYPES = ['video'] + + class PromoDJIE(PromoDJBaseIE): _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/{PromoDJBaseIE._LOGIN_RE}/(?P{PromoDJBaseIE._MEDIA_TYPES_RE})/(?P\d+)(?:/\w+)?', _TESTS = [{ @@ -372,9 +377,13 @@ class PromoDJIE(PromoDJBaseIE): 'url': 'https://promodj.com/j-factory/samples/7560171/Amedici_BW1_Intro', 'only_matching': True, }, { - # no download links in html + # music: no download links in html 'url': 'https://promodj.com/gluk/tracks/4713922/DJ_Glyuk_Folk_ing_DJ_Steven_Smile_Remix_2005', 'only_matching': True, + }, { + # video: no download link in html + 'url': 'https://promodj.com/psywanderer/videos/7559147/Chu_de_sa', + 'only_matching': True, }, { # no player 'url': 'https://promodj.com/gluk/tracks/420310/IMpulse_Zakat', @@ -397,20 +406,37 @@ class PromoDJIE(PromoDJBaseIE): }, { 'url': 'https://promodj.com/djperetse/videos/5868236/Fatalist_Project_feat_DJ_Peretse_Den_pobedi_Videoklip', 'only_matching': True, + }, { + # avi + 'url': 'https://promodj.com/djmikis/videos/5311597/Mikis_Live_SDJ_Show', + 'only_matching': True, + }, { + # asf + 'url': 'https://promodj.com/gigsiphonic/videos/7559341/Gigsiphonic_PODCAST_309_Extended_video_version', + 'only_matching': True, + }, { + # not valid html + 'url': 'https://promodj.com/martin.sehnal/videos/7555841/Martin_Sehnal_CII_33_Plus_CII_32_Clothes_on_the_peg_2_020_2_024_02_01th', + 'only_matching': True, }] _IS_PAID_RE = r'Цена:' - # examples: MP3, 320 Кбит | MP4, 20157 Кбит | WAV, 1412 Кбит - _FORMATS_RE = r'[^\"]+\.(?:mp3|mp4|wav))\">\s*(?PMP3|MP4|WAV), (?P\d+) Кбит\s*' + # examples: MP3, 320 Кбит | MP4, 20157 Кбит | WAV, 1412 Кбит | AVI, 1731 Кбит | ASF, 6905 Кбит + _FORMATS_RE = r'[^\"]+)\">\s*(?P\w+), (?P\d+) Кбит\s*' _VIEW_COUNT_RE = r'(?:Прослушиваний|Просмотров):\s*(\d+)' - # examples: 0:21, 1:07, 74:38 - _DURATION_RE = r'Продолжительность:\s*(\d{1,}:\d{2})' - # examples: 818.4 Кб, 12.9 Мб, 4 Гб, 1.76 Гб - _SIZE_RE = r'Размер:\s*(?P\d{1,3}(?:\.\d{1,2})?)\s*(?PКб|Мб|Гб)' - # examples: сегодня 2:55, вчера 23:17, 1 июня 2016 3:46 + # examples: 0:21 | 1:07 | 74:38 + _DURATION_RE = r'Продолжительность:\s*(\d+:\d{2})' + # examples: 818.4 Кб | 12.9 Мб | 4 Гб | 1.76 Гб | 1001.5 Мб + _SIZE_RE = r'Размер:\s*(?P\d+(?:\.\d+)?)\s*(?PКб|Мб|Гб)' + # examples: сегодня 2:55 | вчера 23:17 | 1 июня 2016 3:46 _TIMESTAMP_RE = r'Публикация:\s*(?Pвчера|сегодня|\d{1,2})(?: (?P[а-я]+) (?P\d{4}))?\s*(?P\d{1,2}):(?P\d{2})' _TAGS_RE = r'([^\n]+)' + # https://regex101.com/r/2ZkUmW/1 + _MUSIC_DATA_REGEX = r'({\"no_preroll\":false,\"seekAny\":true,\"sources\":[^\n]+)\);' + # https://regex101.com/r/b9utBf/1 + _VIDEO_DATA_REGEX = r'({\"video\":true,\"config\":[^\n]+)\);' + def _parse_ru_date(self, raw_date): RU_MONTHS = ['января', 'февраля', 'марта', 'апреля', 'мая', 'июня', 'июля', 'августа', 'сентября', 'октября', 'ноября', 'декабря'] day, month, year, hours, minutes = raw_date @@ -435,20 +461,32 @@ class PromoDJIE(PromoDJBaseIE): size, size_unit = raw_size return int(float(size) * pow(1024, RU_SIZE_UNITS.index(size_unit))) - def _parse_media(self, html, id): - meta_html = get_element_by_class('clearfix', get_element_by_class('dj_bblock', html)) + def _parse_media(self, html, id, type): + # html can be invalid + try: + meta_html = get_elements_html_by_class('dj_universal', html)[1] + except Exception: + meta_html = html - is_paid = re.search(self._IS_PAID_RE, meta_html) formats_from_html = re.findall(self._FORMATS_RE, meta_html) + has_formats = len(formats_from_html) != 0 + is_paid = re.search(self._IS_PAID_RE, meta_html) - if is_paid or len(formats_from_html) == 0: - media_data_raw = self._search_regex( - r'({\"no_preroll\":false,\"seekAny\":true,\"sources\":[^\n]+)\);', html, 'media data') + if not has_formats and is_paid: + media_data_raw = self._search_regex(self._MUSIC_DATA_REGEX, html, 'media data') media_data = self._parse_json(media_data_raw, id) formats = [{ 'url': source.get('URL'), 'size': int_or_none(source.get('size')), } for source in traverse_obj(media_data, ('sources')) if url_or_none(source.get('URL'))] + elif not has_formats and type == 'videos': + media_data_raw = self._search_regex(self._VIDEO_DATA_REGEX, html, 'media data') + media_data = self._parse_json(media_data_raw, id) + video_config = self._parse_json(media_data['config'], id) + video = traverse_obj(video_config, ('playlist', 'item', 0)) + formats = [{ + 'url': traverse_obj(video, ('play', '@url', {url_or_none})), + }] else: formats = [{ 'url': url, @@ -462,16 +500,16 @@ class PromoDJIE(PromoDJBaseIE): 'id': id, 'title': clean_html(get_element_by_class('file_title', html)), 'formats': formats, - 'view_count': int_or_none(self._search_regex(self._VIEW_COUNT_RE, meta_html, 'view_count')), + 'view_count': int_or_none(self._search_regex(self._VIEW_COUNT_RE, meta_html, 'view_count', default=None)), 'duration': parse_duration(self._search_regex(self._DURATION_RE, meta_html, 'duration')), 'timestamp': self._parse_ru_date(re.findall(self._TIMESTAMP_RE, meta_html)[0]), 'tags': self._html_search_regex(self._TAGS_RE, meta_html, 'tags').split(', '), } def _real_extract(self, url): - id = self._match_id(url) + type, id = self._match_valid_url(url).groups() html = self._download_webpage(url, id) - return self._parse_media(html, id) + return self._parse_media(html, id, type) class PromoDJEmbedIE(PromoDJBaseIE): From 13f116fce62ddab5a77ba5ba2d5c935e1acdb201 Mon Sep 17 00:00:00 2001 From: DmitryScaletta Date: Wed, 14 Feb 2024 04:40:18 +0300 Subject: [PATCH 04/21] [PromoDJ] Rename media_types to allowed_media_cats --- yt_dlp/extractor/promodj.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py index f58be2893..ccfa50d05 100644 --- a/yt_dlp/extractor/promodj.py +++ b/yt_dlp/extractor/promodj.py @@ -71,7 +71,7 @@ class PromoDJBaseIE(InfoExtractor): qs['page'] = page return parsed_url._replace(query=urllib.parse.urlencode(qs, doseq=True)).geturl() - def _fetch_page(self, url, media_types, playlist_id, page): + def _fetch_page(self, url, allowed_media_cats, playlist_id, page): page_url = self._set_url_page(url, page + 1) html = self._download_webpage(page_url, f'{playlist_id}-page-{page + 1}') current_page = int(clean_html(get_element_by_class('NavigatorCurrentPage', html)) or '1') @@ -84,7 +84,7 @@ class PromoDJBaseIE(InfoExtractor): continue url = url.replace('?play=1', '') is_video = '/videos/' in url - if is_video and 'video' in media_types or not is_video and 'music' in media_types: + if is_video and 'video' in allowed_media_cats or not is_video and 'music' in allowed_media_cats: yield self.url_result(url, PromoDJIE) def _parse_playlist_links(self, html): @@ -345,7 +345,7 @@ class PromoDJPlaylistIE(PromoDJBaseIE): 'only_matching': True, }] - _MEDIA_TYPES = ['music', 'video'] + _ALLOWED_MEDIA_CATS = ['music', 'video'] def _real_extract(self, url): match = self._match_valid_url(url) @@ -355,17 +355,17 @@ class PromoDJPlaylistIE(PromoDJBaseIE): page_size = self._get_playlist_page_size(url) entries = OnDemandPagedList( - functools.partial(self._fetch_page, url, self._MEDIA_TYPES, playlist_id), + functools.partial(self._fetch_page, url, self._ALLOWED_MEDIA_CATS, playlist_id), page_size) return self.playlist_result(entries, playlist_id=playlist_id) class PromoDJMusicPlaylistIE(PromoDJPlaylistIE): - _MEDIA_TYPES = ['music'] + _ALLOWED_MEDIA_CATS = ['music'] class PromoDJVideoPlaylistIE(PromoDJPlaylistIE): - _MEDIA_TYPES = ['video'] + _ALLOWED_MEDIA_CATS = ['video'] class PromoDJIE(PromoDJBaseIE): From a634e7c5d1b288b145d8acaeb947a64790f368e7 Mon Sep 17 00:00:00 2001 From: DmitryScaletta Date: Wed, 14 Feb 2024 14:00:03 +0300 Subject: [PATCH 05/21] [PromoDJ] Some refactoring --- yt_dlp/extractor/promodj.py | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py index ccfa50d05..b63113c30 100644 --- a/yt_dlp/extractor/promodj.py +++ b/yt_dlp/extractor/promodj.py @@ -74,8 +74,7 @@ class PromoDJBaseIE(InfoExtractor): def _fetch_page(self, url, allowed_media_cats, playlist_id, page): page_url = self._set_url_page(url, page + 1) html = self._download_webpage(page_url, f'{playlist_id}-page-{page + 1}') - current_page = int(clean_html(get_element_by_class('NavigatorCurrentPage', html)) or '1') - if current_page != page + 1: + if self._get_current_page(html) != page + 1: return for a in get_elements_html_by_class('player_standard_tool__play', html): @@ -115,6 +114,9 @@ class PromoDJBaseIE(InfoExtractor): is_default_playlist = '/groups/' not in url return 30 if is_default_playlist else 20 + def _get_current_page(self, html): + return int(clean_html(get_element_by_class('NavigatorCurrentPage', html)) or '1') + def _fetch_media_data(self, ids, video_id): data = {} for i, id in enumerate(ids): @@ -232,11 +234,10 @@ class PromoDJUserPagesIE(PromoDJBaseIE): for page_url, page_title in re.findall(r'([^<]+)', content_html): yield self.url_result(page_url, PromoDJUserPageIE, video_title=page_title) - def _fetch_blog_page(self, url, playlist_id, page): + def _fetch_blogs_page(self, url, playlist_id, page): page_url = self._set_url_page(url, page + 1) html = self._download_webpage(page_url, f'{playlist_id}-page-{page + 1}') - current_page = int(clean_html(get_element_by_class('NavigatorCurrentPage', html)) or '1') - if current_page != page + 1: + if self._get_current_page(html) != page + 1: return for a in get_elements_html_by_class('post_title_moderated', html): @@ -250,7 +251,7 @@ class PromoDJUserPagesIE(PromoDJBaseIE): entries = self._parse_pages(url, playlist_id) elif type == 'blog': entries = OnDemandPagedList( - functools.partial(self._fetch_blog_page, url, playlist_id), + functools.partial(self._fetch_blogs_page, url, playlist_id), self._PAGE_SIZE) return self.playlist_result(entries, playlist_id) @@ -472,14 +473,7 @@ class PromoDJIE(PromoDJBaseIE): has_formats = len(formats_from_html) != 0 is_paid = re.search(self._IS_PAID_RE, meta_html) - if not has_formats and is_paid: - media_data_raw = self._search_regex(self._MUSIC_DATA_REGEX, html, 'media data') - media_data = self._parse_json(media_data_raw, id) - formats = [{ - 'url': source.get('URL'), - 'size': int_or_none(source.get('size')), - } for source in traverse_obj(media_data, ('sources')) if url_or_none(source.get('URL'))] - elif not has_formats and type == 'videos': + if not has_formats and type == 'videos': media_data_raw = self._search_regex(self._VIDEO_DATA_REGEX, html, 'media data') media_data = self._parse_json(media_data_raw, id) video_config = self._parse_json(media_data['config'], id) @@ -487,6 +481,13 @@ class PromoDJIE(PromoDJBaseIE): formats = [{ 'url': traverse_obj(video, ('play', '@url', {url_or_none})), }] + elif not has_formats or is_paid: + media_data_raw = self._search_regex(self._MUSIC_DATA_REGEX, html, 'media data') + media_data = self._parse_json(media_data_raw, id) + formats = [{ + 'url': source.get('URL'), + 'size': int_or_none(source.get('size')), + } for source in traverse_obj(media_data, ('sources')) if url_or_none(source.get('URL'))] else: formats = [{ 'url': url, @@ -558,13 +559,10 @@ class PromoDJShortIE(PromoDJBaseIE): 'only_matching': True, }] - _PAGE_URL_REGEX = r' Date: Wed, 14 Feb 2024 16:23:17 +0300 Subject: [PATCH 06/21] [PromoDJ] Update media info extraction logic --- yt_dlp/extractor/promodj.py | 139 ++++++++++++++++++++++++------------ 1 file changed, 93 insertions(+), 46 deletions(-) diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py index b63113c30..937fa56c9 100644 --- a/yt_dlp/extractor/promodj.py +++ b/yt_dlp/extractor/promodj.py @@ -8,12 +8,17 @@ from .youtube import YoutubeIE from ..utils import ( OnDemandPagedList, clean_html, + dict_get, extract_attributes, ExtractorError, + float_or_none, get_element_by_class, get_elements_html_by_class, int_or_none, + js_to_json, + merge_dicts, parse_duration, + str_or_none, traverse_obj, urlencode_postdata, url_or_none, @@ -127,6 +132,44 @@ class PromoDJBaseIE(InfoExtractor): 'https://promodj.com/api/multi.json', video_id, data=urlencode_postdata(data), headers={'Content-Type': 'application/x-www-form-urlencoded'}) + def _parse_media_data(self, media_data, id): + if player_error := media_data.get('player_error'): + raise ExtractorError(player_error, expected=True) + + if media_data.get('video'): + video = traverse_obj( + self._parse_json(media_data['config'], id), ('playlist', 'item', 0)) + formats = [{ + 'url': traverse_obj(video, ('play', '@url', {url_or_none})), + **traverse_obj(media_data, { + 'width': ('width', {int_or_none}), + 'height': ('height', {int_or_none}), + }) + }] + return { + 'id': id, + 'formats': formats, + **traverse_obj(video, { + 'title': ('title', 'line', 1, 0, '$', {str_or_none}), + 'webpage_url': ('title', '@ico_url', {url_or_none}), + 'duration': ('play', '@duration', {int_or_none}), + 'thumbnail': ('background', '@url', {url_or_none}), + 'channel': ('title', 'line', 0, 0, '$', {str_or_none}), + 'channel_url': ('title', 'line', 0, 0, '@url', {url_or_none}), + }) + } + + formats = [traverse_obj(source, { + 'url': ('URL', {url_or_none}), + 'size': ('size', {int_or_none}), + }) for source in traverse_obj(media_data, ('sources'))] + return { + 'id': id, + 'title': clean_html(dict_get(media_data, ('title_html', 'title'))), + 'formats': formats, + 'webpage_url': traverse_obj(media_data, ('titleURL', {url_or_none})) + } + class PromoDJPageIE(PromoDJBaseIE): _PAGES_RE = '|'.join(PromoDJBaseIE._PAGES) @@ -394,13 +437,21 @@ class PromoDJIE(PromoDJBaseIE): 'url': 'https://promodj.com/djlykov/tracks/7551590', 'only_matching': True, }, { - # lossless + # lossless wav 'url': 'https://promodj.com/modi-glu/tracks/6081339/Modi_Glyu_Anabel', 'only_matching': True, }, { - # paid audio + # lossless flac + 'url': 'https://promodj.com/sashaorbeat/mixes/7422493/Sasha_Orbeat_Pure_Love_3', + 'only_matching': True, + }, { + # paid lossless 'url': 'https://promodj.com/boyko/tracks/1435682/Dj_Boyko_Katy_Queen_Nad_Oblakami', 'only_matching': True, + }, { + # paid lossy + 'url': 'https://promodj.com/tesla/tracks/342938/Library_Of_Bugs', + 'only_matching': True, }, { 'url': 'https://promodj.com/sergeyfedotov306/videos/7457627/V_Matrice_Sboy', 'only_matching': True, @@ -422,8 +473,9 @@ class PromoDJIE(PromoDJBaseIE): }] _IS_PAID_RE = r'Цена:' - # examples: MP3, 320 Кбит | MP4, 20157 Кбит | WAV, 1412 Кбит | AVI, 1731 Кбит | ASF, 6905 Кбит - _FORMATS_RE = r'[^\"]+)\">\s*(?P\w+), (?P\d+) Кбит\s*' + # examples: MP3, 320 Кбит | MP4, 20157 Кбит | WAV, 1412 Кбит | AVI, 1731 Кбит | ASF, 6905 Кбит | FLAC, 1509 Кбит + # https://regex101.com/r/2AuaxB/1 + _FORMATS_RE = r'(?:[^\"]+)\">)?\s*(?P\w+), (?P\d+) Кбит' _VIEW_COUNT_RE = r'(?:Прослушиваний|Просмотров):\s*(\d+)' # examples: 0:21 | 1:07 | 74:38 _DURATION_RE = r'Продолжительность:\s*(\d+:\d{2})' @@ -463,49 +515,50 @@ class PromoDJIE(PromoDJBaseIE): return int(float(size) * pow(1024, RU_SIZE_UNITS.index(size_unit))) def _parse_media(self, html, id, type): + # videos always have one format + # audios can have one or two formats + + # always returns only one format + # if audio has two formats, returns only lossy + media_data = self._search_json( + '', html, 'media data', id, + contains_pattern=self._VIDEO_DATA_REGEX if type == 'videos' else self._MUSIC_DATA_REGEX, + transform_source=js_to_json) + metadata = self._parse_media_data(media_data, id) + # html can be invalid try: meta_html = get_elements_html_by_class('dj_universal', html)[1] except Exception: meta_html = html + # returns one or two formats but sometimes without download links + # best quality always comes first formats_from_html = re.findall(self._FORMATS_RE, meta_html) - has_formats = len(formats_from_html) != 0 is_paid = re.search(self._IS_PAID_RE, meta_html) + bitrate_key = 'tbr' if type == 'videos' else 'abr' + for i, match in enumerate(formats_from_html): + url, _, bitrate = match + is_last = i == len(formats_from_html) - 1 + if is_last: + metadata['formats'][0][bitrate_key] = int(bitrate) + elif url_or_none(url) and not is_paid: + metadata['formats'].append({ + 'url': url, + bitrate_key: int(bitrate), + }) - if not has_formats and type == 'videos': - media_data_raw = self._search_regex(self._VIDEO_DATA_REGEX, html, 'media data') - media_data = self._parse_json(media_data_raw, id) - video_config = self._parse_json(media_data['config'], id) - video = traverse_obj(video_config, ('playlist', 'item', 0)) - formats = [{ - 'url': traverse_obj(video, ('play', '@url', {url_or_none})), - }] - elif not has_formats or is_paid: - media_data_raw = self._search_regex(self._MUSIC_DATA_REGEX, html, 'media data') - media_data = self._parse_json(media_data_raw, id) - formats = [{ - 'url': source.get('URL'), - 'size': int_or_none(source.get('size')), - } for source in traverse_obj(media_data, ('sources')) if url_or_none(source.get('URL'))] - else: - formats = [{ - 'url': url, - 'format': format.lower(), - 'tbr': int(bitrate), - } for url, format, bitrate in formats_from_html if url_or_none(url)] - # size field describes best quality. best quality always comes first - formats[0]['size'] = self._parse_ru_size(re.findall(self._SIZE_RE, meta_html)[0]) + # size field describes best quality + size = self._parse_ru_size(re.search(self._SIZE_RE, meta_html).groups()) + metadata['formats'][-1]['size'] = size - return { - 'id': id, + return merge_dicts(metadata, { 'title': clean_html(get_element_by_class('file_title', html)), - 'formats': formats, 'view_count': int_or_none(self._search_regex(self._VIEW_COUNT_RE, meta_html, 'view_count', default=None)), 'duration': parse_duration(self._search_regex(self._DURATION_RE, meta_html, 'duration')), 'timestamp': self._parse_ru_date(re.findall(self._TIMESTAMP_RE, meta_html)[0]), 'tags': self._html_search_regex(self._TAGS_RE, meta_html, 'tags').split(', '), - } + }) def _real_extract(self, url): type, id = self._match_valid_url(url).groups() @@ -535,21 +588,11 @@ class PromoDJEmbedIE(PromoDJBaseIE): 'only_matching': True, }] - def _get_full_url(self, media_data, id): - if player_error := media_data.get('player_error'): - raise ExtractorError(player_error, expected=True) - - if media_data.get('video'): - video_config = self._parse_json(media_data['config'], id) - video = traverse_obj(video_config, ('playlist', 'item', 0)) - return traverse_obj(video, ('title', '@ico_url')) - else: - return media_data.get('titleURL') - def _real_extract(self, url): id = self._match_id(url) - url = self._get_full_url(self._fetch_media_data([id], id)[0], id) - return self.url_result(url, PromoDJIE, id) + metadata = self._parse_media_data( + self._fetch_media_data([id], id)[0], id) + return self.url_result(metadata['webpage_url'], PromoDJIE, id) class PromoDJShortIE(PromoDJBaseIE): @@ -562,7 +605,11 @@ class PromoDJShortIE(PromoDJBaseIE): def _real_extract(self, url): id = self._match_id(url) html = self._download_webpage(url, id) - return self.url_result(self._og_search_url(html), PromoDJIE, id) + try: + url = self._og_search_url(html) + except Exception: + raise ExtractorError('Unable to extract full URL') + return self.url_result(url, PromoDJIE, id) class PromoDJRadioIE(PromoDJBaseIE): From 5b148c0f796fa2d2d0971574235250e9ebe61f1b Mon Sep 17 00:00:00 2001 From: DmitryScaletta Date: Wed, 14 Feb 2024 17:55:59 +0300 Subject: [PATCH 07/21] [PromoDJ] Improve video formats, update tests --- yt_dlp/extractor/promodj.py | 259 +++++++++++++++++++++++++++++------- 1 file changed, 212 insertions(+), 47 deletions(-) diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py index 937fa56c9..9a79399fa 100644 --- a/yt_dlp/extractor/promodj.py +++ b/yt_dlp/extractor/promodj.py @@ -11,7 +11,6 @@ from ..utils import ( dict_get, extract_attributes, ExtractorError, - float_or_none, get_element_by_class, get_elements_html_by_class, int_or_none, @@ -126,7 +125,7 @@ class PromoDJBaseIE(InfoExtractor): data = {} for i, id in enumerate(ids): data[f'multi[{i}][method]'] = 'players/config' - data[f'multi[{i}][params][kind]'] = 'standalone.big' + data[f'multi[{i}][params][kind]'] = 'cover.big' data[f'multi[{i}][params][fileID]'] = id return self._download_json( 'https://promodj.com/api/multi.json', video_id, data=urlencode_postdata(data), @@ -140,7 +139,7 @@ class PromoDJBaseIE(InfoExtractor): video = traverse_obj( self._parse_json(media_data['config'], id), ('playlist', 'item', 0)) formats = [{ - 'url': traverse_obj(video, ('play', '@url', {url_or_none})), + 'url': traverse_obj(video, ('play', '@url')).replace('?returnurl=1', ''), **traverse_obj(media_data, { 'width': ('width', {int_or_none}), 'height': ('height', {int_or_none}), @@ -163,10 +162,12 @@ class PromoDJBaseIE(InfoExtractor): 'url': ('URL', {url_or_none}), 'size': ('size', {int_or_none}), }) for source in traverse_obj(media_data, ('sources'))] + thumbnails = [{'url': url} for url in traverse_obj(media_data, ('coverURL', ('600', '1200', '2000'))) if url_or_none] return { 'id': id, 'title': clean_html(dict_get(media_data, ('title_html', 'title'))), 'formats': formats, + 'thumbnails': thumbnails, 'webpage_url': traverse_obj(media_data, ('titleURL', {url_or_none})) } @@ -416,60 +417,216 @@ class PromoDJIE(PromoDJBaseIE): _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/{PromoDJBaseIE._LOGIN_RE}/(?P{PromoDJBaseIE._MEDIA_TYPES_RE})/(?P\d+)(?:/\w+)?', _TESTS = [{ 'url': 'https://promodj.com/antonpavlovsky/remixes/6259208/David_Usher_Black_Black_Heart_Anton_Pavlovsky_Cover', - 'only_matching': True, + 'info_dict': { + 'id': '6259208', + 'ext': 'mp3', + 'title': 'David Usher - Black Black Heart (Anton Pavlovsky Cover)', + 'tags': ['Lounge', 'Deep House'], + 'upload_date': '20170323', + 'timestamp': 1490258400.0, + 'duration': 173.0, + 'size': 7654604, + 'view_count': int, + }, }, { 'url': 'https://promodj.com/j-factory/samples/7560171/Amedici_BW1_Intro', - 'only_matching': True, + 'info_dict': { + 'id': '7560171', + 'ext': 'mp3', + 'title': 'Amedici - BW1 - Intro', + 'tags': ['Multitrack master', 'Fx'], + 'upload_date': '20240212', + 'timestamp': 1707748800.0, + 'duration': 21.0, + 'size': 838041, + 'view_count': int, + }, }, { # music: no download links in html 'url': 'https://promodj.com/gluk/tracks/4713922/DJ_Glyuk_Folk_ing_DJ_Steven_Smile_Remix_2005', - 'only_matching': True, + 'info_dict': { + 'id': '4713922', + 'ext': 'mp3', + 'title': 'DJ Глюк - Folk\'ing [DJ Steven Smile Remix] (2005)', + 'tags': ['Pumping House', 'Hard House'], + 'upload_date': '20140404', + 'timestamp': 1396605480.0, + 'duration': 299.0, + 'size': 12058624, + 'view_count': int, + }, }, { # video: no download link in html 'url': 'https://promodj.com/psywanderer/videos/7559147/Chu_de_sa', - 'only_matching': True, + 'info_dict': { + 'id': '7559147', + 'ext': 'mp4', + 'title': 'Чу де са', + 'tags': ['Jazz-Rap', 'Jazzstep'], + 'thumbnail': r're:^https?://', + 'upload_date': '20240210', + 'timestamp': 1707533820.0, + 'duration': 388720, + 'view_count': int, + 'channel': 'PsyWanderer', + 'channel_url': 'https://promodj.com/psywanderer', + }, }, { - # no player + # no player (external link) 'url': 'https://promodj.com/gluk/tracks/420310/IMpulse_Zakat', - 'only_matching': True, + 'info_dict': { + 'id': '420310', + 'ext': 'mp3', + 'title': 'IMpulse - Закат', + 'tags': ['House', 'Electro House'], + 'thumbnail': r're:^https?://', + 'upload_date': '20081024', + 'timestamp': 1224846120.0, + 'duration': 133.0, + 'size': 1048576, + 'view_count': int, + }, + 'params': { + 'skip_download': 'Link is broken', + }, }, { # without slug 'url': 'https://promodj.com/djlykov/tracks/7551590', - 'only_matching': True, + 'info_dict': { + 'id': '7551590', + 'ext': 'mp3', + 'title': 'Lykov - Benjamin (Radio Edit) [MOUSE-P]', + 'tags': ['Dance Pop', 'Eurodance'], + 'upload_date': '20240122', + 'timestamp': 1705919280.0, + 'duration': 233.0, + 'size': 9332326, + 'view_count': int, + }, }, { # lossless wav 'url': 'https://promodj.com/modi-glu/tracks/6081339/Modi_Glyu_Anabel', - 'only_matching': True, + 'info_dict': { + 'id': '6081339', + 'ext': 'wav', + 'title': 'Моди Глю " Анабель"', + 'tags': ['Chillout', 'Downtempo'], + 'upload_date': '20161029', + 'timestamp': 1477767780.0, + 'duration': 236.0, + 'size': 42257612, + 'view_count': int, + }, }, { # lossless flac 'url': 'https://promodj.com/sashaorbeat/mixes/7422493/Sasha_Orbeat_Pure_Love_3', - 'only_matching': True, + 'info_dict': { + 'id': '7422493', + 'ext': 'flac', + 'title': 'Sasha Orbeat — Pure Love 3', + 'tags': ['Lo-Fi', 'Downtempo'], + 'upload_date': '20230213', + 'timestamp': 1676306160.0, + 'duration': 3631.0, + 'size': 685139558, + 'view_count': int, + }, }, { # paid lossless 'url': 'https://promodj.com/boyko/tracks/1435682/Dj_Boyko_Katy_Queen_Nad_Oblakami', - 'only_matching': True, + 'info_dict': { + 'id': '1435682', + 'ext': 'mp3', + 'title': 'Dj Boyko & Katy Queen - Над Облаками', + 'tags': ['House', 'Trance'], + 'upload_date': '20100404', + 'timestamp': 1270376700.0, + 'duration': 321.0, + 'size': 56623104, + 'view_count': int, + }, }, { # paid lossy 'url': 'https://promodj.com/tesla/tracks/342938/Library_Of_Bugs', - 'only_matching': True, - }, { - 'url': 'https://promodj.com/sergeyfedotov306/videos/7457627/V_Matrice_Sboy', - 'only_matching': True, + 'info_dict': { + 'id': '342938', + 'ext': 'mp3', + 'title': 'Library Of Bugs', + 'tags': ['Minimal Techno', 'Tech House'], + 'upload_date': '20080827', + 'timestamp': 1219841220.0, + 'duration': 64.0, + 'size': 2097152, + 'view_count': int, + }, }, { + # mp4 'url': 'https://promodj.com/djperetse/videos/5868236/Fatalist_Project_feat_DJ_Peretse_Den_pobedi_Videoklip', - 'only_matching': True, + 'info_dict': { + 'id': '5868236', + 'ext': 'mp4', + 'title': 'Fatalist Project feat. DJ Peretse - День победы (Видеоклип)', + 'tags': ['House', 'Progressive House'], + 'thumbnail': r're:^https?://', + 'upload_date': '20160505', + 'timestamp': 1462419720.0, + 'duration': 265045, + 'size': 165465292, + 'view_count': int, + 'channel': 'DJ Peretse', + 'channel_url': 'https://promodj.com/djperetse', + }, }, { # avi 'url': 'https://promodj.com/djmikis/videos/5311597/Mikis_Live_SDJ_Show', - 'only_matching': True, + 'info_dict': { + 'id': '5311597', + 'ext': 'avi', + 'title': 'Mikis Live @ SDJ Show', + 'tags': ['Club House'], + 'thumbnail': r're:^https?://', + 'upload_date': '20150409', + 'timestamp': 1428579840.0, + 'duration': 1716240, + 'size': 371195904, + 'view_count': int, + 'channel': 'MIKIS', + 'channel_url': 'https://promodj.com/djmikis', + }, }, { # asf 'url': 'https://promodj.com/gigsiphonic/videos/7559341/Gigsiphonic_PODCAST_309_Extended_video_version', - 'only_matching': True, + 'info_dict': { + 'id': '7559341', + 'ext': 'asf', + 'title': 'Gigsiphonic - PODCAST 309 (Extended video version)', + 'tags': ['Synthwave', 'Synth-Pop'], + 'thumbnail': r're:^https?://', + 'upload_date': '20240210', + 'timestamp': 1707580080.0, + 'duration': 4309200, + 'size': 3715146711, + 'view_count': int, + 'channel': 'Gigsiphonic', + 'channel_url': 'https://promodj.com/gigsiphonic', + }, }, { # not valid html 'url': 'https://promodj.com/martin.sehnal/videos/7555841/Martin_Sehnal_CII_33_Plus_CII_32_Clothes_on_the_peg_2_020_2_024_02_01th', - 'only_matching': True, + 'info_dict': { + 'id': '7555841', + 'ext': 'avi', + 'title': 'Martin Sehnal - CII 33 ( Plus CII 32 ) Clothes on the peg 2 020 ( 2 024 02. 01th ) )', + 'tags': ['Easy Listening', 'Drum & Bass'], + 'thumbnail': r're:^https?://', + 'upload_date': '20240201', + 'timestamp': 1706827560.0, + 'duration': 30000, + 'size': 2340757176, + 'view_count': int, + 'channel_url': 'https://promodj.com/martin.sehnal', + 'channel': 'Martin Sehnal', + }, }] _IS_PAID_RE = r'Цена:' @@ -514,16 +671,19 @@ class PromoDJIE(PromoDJBaseIE): size, size_unit = raw_size return int(float(size) * pow(1024, RU_SIZE_UNITS.index(size_unit))) - def _parse_media(self, html, id, type): - # videos always have one format - # audios can have one or two formats + # music: always have lossy format (mp3), sometimes have lossless (wav or flac) format + # video: sometimes have source format (mp4, avi, asf), always have converted for web format (mp4) + def _real_extract(self, url): + type, id = self._match_valid_url(url).groups() + html = self._download_webpage(url, id) - # always returns only one format - # if audio has two formats, returns only lossy + # always returns only one format: lossy mp3 for music or converted mp4 for video media_data = self._search_json( '', html, 'media data', id, contains_pattern=self._VIDEO_DATA_REGEX if type == 'videos' else self._MUSIC_DATA_REGEX, - transform_source=js_to_json) + transform_source=js_to_json, fatal=False, default=None) + if not media_data: + media_data = self._fetch_media_data([id], id)[0] metadata = self._parse_media_data(media_data, id) # html can be invalid @@ -532,25 +692,35 @@ class PromoDJIE(PromoDJBaseIE): except Exception: meta_html = html - # returns one or two formats but sometimes without download links - # best quality always comes first + # music: lossy format or lossless and lossy formats + # video: source format + # download links can be missing + # best quality format always comes first formats_from_html = re.findall(self._FORMATS_RE, meta_html) is_paid = re.search(self._IS_PAID_RE, meta_html) - bitrate_key = 'tbr' if type == 'videos' else 'abr' - for i, match in enumerate(formats_from_html): - url, _, bitrate = match - is_last = i == len(formats_from_html) - 1 - if is_last: - metadata['formats'][0][bitrate_key] = int(bitrate) - elif url_or_none(url) and not is_paid: - metadata['formats'].append({ - 'url': url, - bitrate_key: int(bitrate), - }) - # size field describes best quality size = self._parse_ru_size(re.search(self._SIZE_RE, meta_html).groups()) - metadata['formats'][-1]['size'] = size + if type == 'videos': + for url, _, bitrate in formats_from_html: + if url_or_none(url): + metadata['formats'].append({ + 'url': url, + 'tbr': int(bitrate), + 'size': size, + 'quality': 1, + }) + else: + for i, match in enumerate(formats_from_html): + url, _, bitrate = match + is_last = i == len(formats_from_html) - 1 + if is_last: + metadata['formats'][0]['abr'] = int(bitrate) + elif url_or_none(url) and not is_paid: + metadata['formats'].append({ + 'url': url, + 'abr': int(bitrate), + }) + metadata['formats'][-1]['size'] = size return merge_dicts(metadata, { 'title': clean_html(get_element_by_class('file_title', html)), @@ -560,11 +730,6 @@ class PromoDJIE(PromoDJBaseIE): 'tags': self._html_search_regex(self._TAGS_RE, meta_html, 'tags').split(', '), }) - def _real_extract(self, url): - type, id = self._match_valid_url(url).groups() - html = self._download_webpage(url, id) - return self._parse_media(html, id, type) - class PromoDJEmbedIE(PromoDJBaseIE): _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/embed/(?P\d+)/(?Pcover|big)' From 0c8466572fff01e9ec87fe4189d64d351314f8b9 Mon Sep 17 00:00:00 2001 From: DmitryScaletta Date: Thu, 15 Feb 2024 00:53:12 +0300 Subject: [PATCH 08/21] [PromoDJ] Add more tests --- yt_dlp/extractor/promodj.py | 306 +++++++++++++++++++++++++++++------- 1 file changed, 246 insertions(+), 60 deletions(-) diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py index 9a79399fa..fecf30589 100644 --- a/yt_dlp/extractor/promodj.py +++ b/yt_dlp/extractor/promodj.py @@ -81,11 +81,11 @@ class PromoDJBaseIE(InfoExtractor): if self._get_current_page(html) != page + 1: return - for a in get_elements_html_by_class('player_standard_tool__play', html): + for a in get_elements_html_by_class('player_standard_tool__comments', html): url = traverse_obj(extract_attributes(a), ('href', {url_or_none})) if not url: continue - url = url.replace('?play=1', '') + url = url.replace('#comments', '') is_video = '/videos/' in url if is_video and 'video' in allowed_media_cats or not is_video and 'music' in allowed_media_cats: yield self.url_result(url, PromoDJIE) @@ -139,6 +139,7 @@ class PromoDJBaseIE(InfoExtractor): video = traverse_obj( self._parse_json(media_data['config'], id), ('playlist', 'item', 0)) formats = [{ + 'format_id': 'web', 'url': traverse_obj(video, ('play', '@url')).replace('?returnurl=1', ''), **traverse_obj(media_data, { 'width': ('width', {int_or_none}), @@ -162,7 +163,9 @@ class PromoDJBaseIE(InfoExtractor): 'url': ('URL', {url_or_none}), 'size': ('size', {int_or_none}), }) for source in traverse_obj(media_data, ('sources'))] - thumbnails = [{'url': url} for url in traverse_obj(media_data, ('coverURL', ('600', '1200', '2000'))) if url_or_none] + thumbnails = [{ + 'url': url, + } for url in traverse_obj(media_data, ('coverURL', ('600', '1200', '2000'))) if url_or_none(url)] return { 'id': id, 'title': clean_html(dict_get(media_data, ('title_html', 'title'))), @@ -178,11 +181,13 @@ class PromoDJPageIE(PromoDJBaseIE): _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{_PAGES_RE})' _TESTS = [{ 'url': 'https://promodj.com/featured', - 'only_matching': True, - }, { - # second page - 'url': 'https://promodj.com/featured/rap?download=1&page=2', - 'only_matching': True, + 'info_dict': { + 'id': 'featured', + }, + 'playlist_count': 40, + 'params': { + 'playlistend': 40, + }, }, { # filtered 'url': 'https://promodj.com/remixes?top=1', @@ -196,9 +201,25 @@ class PromoDJPageIE(PromoDJBaseIE): 'url': 'https://promodj.com/mixes?kind=mixes&styleID=&searchfor=dance', 'only_matching': True, }, { - # no download button + # shop 'url': 'https://promodj.com/shop', - 'only_matching': True, + 'info_dict': { + 'id': 'shop', + }, + 'playlist_count': 20, + 'params': { + 'playlistend': 20, + }, + }, { + # videos + 'url': 'https://promodj.com/videos', + 'info_dict': { + 'id': 'videos', + }, + 'playlist_count': 20, + 'params': { + 'playlistend': 20, + }, }] _PAGE_SIZE = 20 @@ -215,11 +236,25 @@ class PromoDJPageIE(PromoDJBaseIE): class PromoDJUserIE(PromoDJBaseIE): _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{PromoDJBaseIE._LOGIN_RE})$' _TESTS = [{ - 'url': 'https://promodj.com/djperetse', - 'only_matching': True, - }, { 'url': 'https://promodj.com/dj-trojan', - 'only_matching': True, + 'info_dict': { + 'id': 'dj-trojan', + }, + 'playlist_mincount': 89, + }, { + # with default video playlist + 'url': 'https://promodj.com/djperetse', + 'info_dict': { + 'id': 'djperetse', + }, + 'playlist_mincount': 15, + }, { + # without any playlists + 'url': 'https://promodj.com/slim96', + 'info_dict': { + 'id': 'slim96', + }, + 'playlist_count': 0, }] def _real_extract(self, url): @@ -236,15 +271,31 @@ class PromoDJUserIE(PromoDJBaseIE): class PromoDJUserMediaIE(PromoDJBaseIE): _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{PromoDJBaseIE._LOGIN_RE})/(?Pmusic|video)$' _TESTS = [{ - 'url': 'https://promodj.com/feel/music', - 'only_matching': True, + 'url': 'https://promodj.com/worobyev/music', + 'info_dict': { + 'id': 'worobyev-music', + }, + 'playlist_mincount': 11, }, { - 'url': 'https://promodj.com/djmikis/video', - 'only_matching': True, + # no music + 'url': 'https://promodj.com/xsev71/music', + 'info_dict': { + 'id': 'xsev71-music', + }, + 'playlist_count': 0, }, { - # a user without any videos + 'url': 'https://promodj.com/cosmonaut/video', + 'info_dict': { + 'id': 'cosmonaut-video', + }, + 'playlist_mincount': 2, + }, { + # no video 'url': 'https://promodj.com/worobyev/video', - 'only_matching': True, + 'info_dict': { + 'id': 'worobyev-video', + }, + 'playlist_count': 0, }] def _real_extract(self, url): @@ -264,19 +315,40 @@ class PromoDJUserPagesIE(PromoDJBaseIE): _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{PromoDJBaseIE._LOGIN_RE})/(?Ppages|blog)$' _TESTS = [{ 'url': 'https://promodj.com/djperetse/pages', - 'only_matching': True, + 'info_dict': { + 'id': 'djperetse-pages', + }, + 'playlist_count': 10, }, { - 'url': 'https://promodj.com/golub/blog', - 'only_matching': True, + # no pages + 'url': 'https://promodj.com/djlosev/pages', + 'info_dict': { + 'id': 'djlosev-pages', + }, + 'playlist_count': 0, + }, { + 'url': 'https://promodj.com/ivanroudyk/blog', + 'info_dict': { + 'id': 'ivanroudyk-blog', + }, + 'playlist_mincount': 37, + }, { + # no blog + 'url': 'https://promodj.com/worobyev/blog', + 'info_dict': { + 'id': 'worobyev-blog', + }, + 'playlist_count': 0, }] _PAGE_SIZE = 10 def _parse_pages(self, url, playlist_id): html = self._download_webpage(url, playlist_id) - content_html = get_element_by_class('dj_universal', get_element_by_class('dj_bblock', html)) - for page_url, page_title in re.findall(r'([^<]+)', content_html): - yield self.url_result(page_url, PromoDJUserPageIE, video_title=page_title) + content_html = get_element_by_class('dj_content ', html) + if pages_html := get_element_by_class('dj_universal', content_html): + for page_url, page_title in re.findall(r'([^<]+)', pages_html): + yield self.url_result(page_url, PromoDJUserPageIE, video_title=page_title) def _fetch_blogs_page(self, url, playlist_id, page): page_url = self._set_url_page(url, page + 1) @@ -318,7 +390,10 @@ class PromoDJUserPageIE(PromoDJBaseIE): _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{PromoDJBaseIE._LOGIN_RE})/(?P{_USER_PAGE_RE})$' _TESTS = [{ 'url': 'https://promodj.com/djperetse/MaxMixes', - 'only_matching': True, + 'info_dict': { + 'id': 'djperetse-MaxMixes', + }, + 'playlist_count': 5, }] def _real_extract(self, url): @@ -331,24 +406,40 @@ class PromoDJUserPageIE(PromoDJBaseIE): class PromoDJBlogPageIE(PromoDJBaseIE): - _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{PromoDJBaseIE._LOGIN_RE})/blog/(?P\d+)(?:/(?P\w+))?' + _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{PromoDJBaseIE._LOGIN_RE})/blog/(?P\d+)(?:/\w+)?' _TESTS = [{ # with small and big audio players and youtube video 'url': 'https://promodj.com/golub/blog/1163895/DJ_Andrey_Golubev_To_Depeche_Mode_with_love_part_9_special_dj_edits_mix', - 'only_matching': True, + 'info_dict': { + 'id': 'golub-blog-1163895', + }, + 'playlist_count': 13, }, { # with audio and video 'url': 'https://promodj.com/svetmusic/blog/1101958/SVET_I_Like_It_Extra_Sound_Recordings', - 'only_matching': True, + 'info_dict': { + 'id': 'svetmusic-blog-1101958', + }, + 'playlist_count': 5, }, { # without any media 'url': 'https://promodj.com/svetmusic/blog/915878/DJ_SVET_pobeditel_konkursa_Burn_City_Sound', - 'only_matching': True, + 'info_dict': { + 'id': 'svetmusic-blog-915878', + }, + 'playlist_count': 0, + }, { + # with deleted and blocked music + 'url': 'https://promodj.com/djperetse/blog/1048739/DJ_Peretse_i_Coca_Cola_obyavlyayut_MEGAMIX_BATTLE_2015', + 'info_dict': { + 'id': 'djperetse-blog-1048739', + }, + 'playlist_count': 29, }] def _real_extract(self, url): - login, id, slug = self._match_valid_url(url).groups() - page_id = f'{login}-blog-{id}-{slug}' + login, id = self._match_valid_url(url).groups() + page_id = f'{login}-blog-{id}' html = self._download_webpage(url, page_id) content_html = get_element_by_class('post_body', html) return self.playlist_result( @@ -361,29 +452,57 @@ class PromoDJPlaylistIE(PromoDJBaseIE): rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{PromoDJBaseIE._LOGIN_RE})/(?Pgroups)/(?P\d+)(?:/(?P\w+))?', ] _TESTS = [{ - # default playlist: tracks (audio) + # default playlist: music (with songs without player) 'url': 'https://promodj.com/gluk/tracks', - 'only_matching': True, + 'info_dict': { + 'id': 'gluk-tracks', + }, + 'playlist_mincount': 29, + }, { + # default playlist: with pagination + 'url': 'https://promodj.com/gluk/mixes', + 'info_dict': { + 'id': 'gluk-mixes', + }, + 'playlist_count': 60, + 'params': { + 'playlistend': 60, + }, }, { # default playlist: video 'url': 'https://promodj.com/djperetse/videos', - 'only_matching': True, + 'info_dict': { + 'id': 'djperetse-videos', + }, + 'playlist_mincount': 6, }, { # user playlist: audio 'url': 'https://promodj.com/fonarev/groups/608158/Digital_Emotions_Night', - 'only_matching': True, + 'info_dict': { + 'id': 'fonarev-groups-608158', + }, + 'playlist_mincount': 9, }, { - # two pages + # user playlist: with pagination 'url': 'https://promodj.com/lavrov/groups/677132/VINYL', - 'only_matching': True, + 'info_dict': { + 'id': 'lavrov-groups-677132', + }, + 'playlist_mincount': 33, }, { # user playlist: video 'url': 'https://promodj.com/deeplecture/groups/672782/LAROCCA_TV', - 'only_matching': True, + 'info_dict': { + 'id': 'deeplecture-groups-672782', + }, + 'playlist_mincount': 4, }, { # user playlist: audio and video 'url': 'https://promodj.com/djperetse/groups/637358/Russkie_treki', - 'only_matching': True, + 'info_dict': { + 'id': 'djperetse-groups-637358', + }, + 'playlist_mincount': 17, }, { # 900+ items 'url': 'https://promodj.com/fonarev/groups/17350/Digital_Emotions_Podcast', @@ -406,10 +525,12 @@ class PromoDJPlaylistIE(PromoDJBaseIE): class PromoDJMusicPlaylistIE(PromoDJPlaylistIE): + _VALID_URL = [] _ALLOWED_MEDIA_CATS = ['music'] class PromoDJVideoPlaylistIE(PromoDJPlaylistIE): + _VALID_URL = [] _ALLOWED_MEDIA_CATS = ['video'] @@ -429,18 +550,17 @@ class PromoDJIE(PromoDJBaseIE): 'view_count': int, }, }, { + # samples type 'url': 'https://promodj.com/j-factory/samples/7560171/Amedici_BW1_Intro', - 'info_dict': { - 'id': '7560171', - 'ext': 'mp3', - 'title': 'Amedici - BW1 - Intro', - 'tags': ['Multitrack master', 'Fx'], - 'upload_date': '20240212', - 'timestamp': 1707748800.0, - 'duration': 21.0, - 'size': 838041, - 'view_count': int, - }, + 'only_matching': True, + }, { + # acapellas type + 'url': 'https://promodj.com/cosmonaut/acapellas/200970/Kosmonavt_golosovoe_ID', + 'only_matching': True, + }, { + # realtones type + 'url': 'https://promodj.com/plashstringer/realtones/965489/bomba_bomba', + 'only_matching': True, }, { # music: no download links in html 'url': 'https://promodj.com/gluk/tracks/4713922/DJ_Glyuk_Folk_ing_DJ_Steven_Smile_Remix_2005', @@ -489,6 +609,10 @@ class PromoDJIE(PromoDJBaseIE): 'params': { 'skip_download': 'Link is broken', }, + }, { + # no player (the link from html is broken but the link from API is ok) + 'url': 'https://promodj.com/scratchin/remixes/374580/Katya_First_Perestala_DJ_Ivan_Scratchin_Mix', + 'only_matching': True, }, { # without slug 'url': 'https://promodj.com/djlykov/tracks/7551590', @@ -632,7 +756,7 @@ class PromoDJIE(PromoDJBaseIE): _IS_PAID_RE = r'Цена:' # examples: MP3, 320 Кбит | MP4, 20157 Кбит | WAV, 1412 Кбит | AVI, 1731 Кбит | ASF, 6905 Кбит | FLAC, 1509 Кбит # https://regex101.com/r/2AuaxB/1 - _FORMATS_RE = r'(?:[^\"]+)\">)?\s*(?P\w+), (?P\d+) Кбит' + _FORMATS_RE = r'(?:[^\"]+)\">)?\s*\w+, (?P\d+) Кбит' _VIEW_COUNT_RE = r'(?:Прослушиваний|Просмотров):\s*(\d+)' # examples: 0:21 | 1:07 | 74:38 _DURATION_RE = r'Продолжительность:\s*(\d+:\d{2})' @@ -701,9 +825,10 @@ class PromoDJIE(PromoDJBaseIE): # size field describes best quality size = self._parse_ru_size(re.search(self._SIZE_RE, meta_html).groups()) if type == 'videos': - for url, _, bitrate in formats_from_html: + for url, bitrate in formats_from_html: if url_or_none(url): metadata['formats'].append({ + 'format_id': 'source', 'url': url, 'tbr': int(bitrate), 'size': size, @@ -711,7 +836,7 @@ class PromoDJIE(PromoDJBaseIE): }) else: for i, match in enumerate(formats_from_html): - url, _, bitrate = match + url, bitrate = match is_last = i == len(formats_from_html) - 1 if is_last: metadata['formats'][0]['abr'] = int(bitrate) @@ -726,7 +851,7 @@ class PromoDJIE(PromoDJBaseIE): 'title': clean_html(get_element_by_class('file_title', html)), 'view_count': int_or_none(self._search_regex(self._VIEW_COUNT_RE, meta_html, 'view_count', default=None)), 'duration': parse_duration(self._search_regex(self._DURATION_RE, meta_html, 'duration')), - 'timestamp': self._parse_ru_date(re.findall(self._TIMESTAMP_RE, meta_html)[0]), + 'timestamp': self._parse_ru_date(re.search(self._TIMESTAMP_RE, meta_html).groups()), 'tags': self._html_search_regex(self._TAGS_RE, meta_html, 'tags').split(', '), }) @@ -735,14 +860,47 @@ class PromoDJEmbedIE(PromoDJBaseIE): _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/embed/(?P\d+)/(?Pcover|big)' _TESTS = [{ 'url': 'https://promodj.com/embed/7555440/cover', - 'only_matching': True, + 'info_dict': { + 'id': '7555440', + 'ext': 'mp3', + 'title': 'Kolya Funk - Exclusive Mix (February 2024)', + 'tags': ['House', 'Indie Dance'], + 'upload_date': '20240131', + 'timestamp': 1706738400.0, + 'duration': 3697.0, + 'size': 148478361, + 'view_count': int, + }, }, { 'url': 'https://promodj.com/embed/7540163/big', - 'only_matching': True, + 'info_dict': { + 'id': '7540163', + 'ext': 'mp3', + 'title': 'Khalif - Amore (Akif Pro Remix)', + 'tags': ['Deep House', 'Slap House'], + 'upload_date': '20231224', + 'timestamp': 1703418600.0, + 'duration': 157.0, + 'size': 8178892, + 'view_count': int, + }, }, { # video (can be only big) 'url': 'https://promodj.com/embed/3922099/big', - 'only_matching': True, + 'info_dict': { + 'id': '3922099', + 'ext': 'mp4', + 'title': 'Will I Am & Britney Spears - Scream & Shout (DJ Nejtrino & DJ Stranger Remix) Video Full HD', + 'tags': ['Club House', 'Vocal House'], + 'thumbnail': r're:^https?://', + 'upload_date': '20130211', + 'timestamp': 1360583760.0, + 'duration': 234560, + 'size': 309644492, + 'view_count': int, + 'channel_url': 'https://promodj.com/dj-stranger', + 'channel': 'DJ Stranger', + }, }, { # blocked 'url': 'https://promodj.com/embed/5586967/big', @@ -763,8 +921,36 @@ class PromoDJEmbedIE(PromoDJBaseIE): class PromoDJShortIE(PromoDJBaseIE): _VALID_URL = r'https://pdj.cc/(?P\w+)' _TESTS = [{ + # music 'url': 'https://pdj.cc/fv8VD', - 'only_matching': True, + 'info_dict': { + 'id': '7422493', + 'ext': 'flac', + 'title': 'Sasha Orbeat — Pure Love 3', + 'tags': ['Lo-Fi', 'Downtempo'], + 'upload_date': '20230213', + 'timestamp': 1676306160.0, + 'duration': 3631.0, + 'size': 685139558, + 'view_count': int, + }, + }, { + # video + 'url': 'https://pdj.cc/fvcpX', + 'info_dict': { + 'id': '7435905', + 'ext': 'mp4', + 'title': 'JULIA - DEBRI FM (guest mix 18.03.23)', + 'tags': ['Drum & Bass'], + 'thumbnail': r're:^https?://', + 'upload_date': '20230321', + 'timestamp': 1679441100.0, + 'duration': 2329640, + 'size': 2952790016, + 'view_count': int, + 'channel': 'JULIA', + 'channel_url': 'https://promodj.com/julia-breaks', + }, }] def _real_extract(self, url): From c820715205c9b8999e38c75c324a84eaff8eab84 Mon Sep 17 00:00:00 2001 From: DmitryScaletta Date: Thu, 15 Feb 2024 11:53:12 +0300 Subject: [PATCH 09/21] [PromoDJ] Fix parse data and size functions --- yt_dlp/extractor/promodj.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py index fecf30589..974cad7e2 100644 --- a/yt_dlp/extractor/promodj.py +++ b/yt_dlp/extractor/promodj.py @@ -761,7 +761,7 @@ class PromoDJIE(PromoDJBaseIE): # examples: 0:21 | 1:07 | 74:38 _DURATION_RE = r'Продолжительность:\s*(\d+:\d{2})' # examples: 818.4 Кб | 12.9 Мб | 4 Гб | 1.76 Гб | 1001.5 Мб - _SIZE_RE = r'Размер:\s*(?P\d+(?:\.\d+)?)\s*(?PКб|Мб|Гб)' + _SIZE_RE = r'Размер:\s*(?P\d+(?:\.\d+)?)\s*(?PБ|Кб|Мб|Гб|Тб)' # examples: сегодня 2:55 | вчера 23:17 | 1 июня 2016 3:46 _TIMESTAMP_RE = r'Публикация:\s*(?Pвчера|сегодня|\d{1,2})(?: (?P[а-я]+) (?P\d{4}))?\s*(?P\d{1,2}):(?P\d{2})' _TAGS_RE = r'([^\n]+)' @@ -771,9 +771,8 @@ class PromoDJIE(PromoDJBaseIE): # https://regex101.com/r/b9utBf/1 _VIDEO_DATA_REGEX = r'({\"video\":true,\"config\":[^\n]+)\);' - def _parse_ru_date(self, raw_date): + def _parse_ru_date(self, day, month, year, hours, minutes): RU_MONTHS = ['января', 'февраля', 'марта', 'апреля', 'мая', 'июня', 'июля', 'августа', 'сентября', 'октября', 'ноября', 'декабря'] - day, month, year, hours, minutes = raw_date if day == 'сегодня': d = datetime.date.today() day = d.day @@ -790,10 +789,9 @@ class PromoDJIE(PromoDJBaseIE): year = int(year) return datetime.datetime(year, month, day, int(hours), int(minutes)).timestamp() - def _parse_ru_size(self, raw_size): - RU_SIZE_UNITS = ['Б', 'Кб', 'Мб', 'Гб'] - size, size_unit = raw_size - return int(float(size) * pow(1024, RU_SIZE_UNITS.index(size_unit))) + def _parse_ru_size(self, size, unit): + RU_SIZE_UNITS = ['Б', 'Кб', 'Мб', 'Гб', 'Тб'] + return int(float(size) * pow(1024, RU_SIZE_UNITS.index(unit))) # music: always have lossy format (mp3), sometimes have lossless (wav or flac) format # video: sometimes have source format (mp4, avi, asf), always have converted for web format (mp4) @@ -823,7 +821,7 @@ class PromoDJIE(PromoDJBaseIE): formats_from_html = re.findall(self._FORMATS_RE, meta_html) is_paid = re.search(self._IS_PAID_RE, meta_html) # size field describes best quality - size = self._parse_ru_size(re.search(self._SIZE_RE, meta_html).groups()) + size = self._parse_ru_size(*re.search(self._SIZE_RE, meta_html).groups()) if type == 'videos': for url, bitrate in formats_from_html: if url_or_none(url): @@ -851,7 +849,7 @@ class PromoDJIE(PromoDJBaseIE): 'title': clean_html(get_element_by_class('file_title', html)), 'view_count': int_or_none(self._search_regex(self._VIEW_COUNT_RE, meta_html, 'view_count', default=None)), 'duration': parse_duration(self._search_regex(self._DURATION_RE, meta_html, 'duration')), - 'timestamp': self._parse_ru_date(re.search(self._TIMESTAMP_RE, meta_html).groups()), + 'timestamp': self._parse_ru_date(*re.search(self._TIMESTAMP_RE, meta_html).groups()), 'tags': self._html_search_regex(self._TAGS_RE, meta_html, 'tags').split(', '), }) From c837d90e126e7e8b2a6f03221a6163d5f3e7a03d Mon Sep 17 00:00:00 2001 From: DmitryScaletta Date: Thu, 15 Feb 2024 12:21:53 +0300 Subject: [PATCH 10/21] [PromoDJ] Add support for user's best media playlist --- yt_dlp/extractor/promodj.py | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py index 974cad7e2..8d5a14c49 100644 --- a/yt_dlp/extractor/promodj.py +++ b/yt_dlp/extractor/promodj.py @@ -114,10 +114,6 @@ class PromoDJBaseIE(InfoExtractor): if YoutubeIE.suitable(iframe_url): yield self.url_result(iframe_url, YoutubeIE) - def _get_playlist_page_size(self, url): - is_default_playlist = '/groups/' not in url - return 30 if is_default_playlist else 20 - def _get_current_page(self, html): return int(clean_html(get_element_by_class('NavigatorCurrentPage', html)) or '1') @@ -382,6 +378,7 @@ class PromoDJUserPageIE(PromoDJBaseIE): 'blog', 'feedback', 'contact', + 'uenno', *PromoDJBaseIE._MEDIA_TYPES, ] _NOT_USER_PAGE_RE = '|'.join(_USER_PAGES) @@ -447,8 +444,11 @@ class PromoDJBlogPageIE(PromoDJBaseIE): class PromoDJPlaylistIE(PromoDJBaseIE): + _PLAYLIST_TYPES = ['uenno', *PromoDJBaseIE._MEDIA_TYPES] + _PLAYLIST_TYPES_RE = '|'.join(_PLAYLIST_TYPES) + _VALID_URL = [ - rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{PromoDJBaseIE._LOGIN_RE})/(?P{PromoDJBaseIE._MEDIA_TYPES_RE})$', + rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{PromoDJBaseIE._LOGIN_RE})/(?P{_PLAYLIST_TYPES_RE})$', rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{PromoDJBaseIE._LOGIN_RE})/(?Pgroups)/(?P\d+)(?:/(?P\w+))?', ] _TESTS = [{ @@ -507,20 +507,36 @@ class PromoDJPlaylistIE(PromoDJBaseIE): # 900+ items 'url': 'https://promodj.com/fonarev/groups/17350/Digital_Emotions_Podcast', 'only_matching': True, + }, { + # user's best music and video + 'url': 'https://promodj.com/djbaribyn/uenno', + 'info_dict': { + 'id': 'djbaribyn-uenno', + }, + 'playlist_count': 15, + 'params': { + 'playlistend': 15, + } }] _ALLOWED_MEDIA_CATS = ['music', 'video'] + def _get_page_size(self, url): + if '/uenno' in url: + return 15 + if '/groups/' in url: + return 30 + return 20 + def _real_extract(self, url): match = self._match_valid_url(url) login = match.group('login') type = match.group('type') playlist_id = f'{login}-{type}' if len(match.groups()) == 2 else f'{login}-{type}-{match.group("id")}' - page_size = self._get_playlist_page_size(url) entries = OnDemandPagedList( functools.partial(self._fetch_page, url, self._ALLOWED_MEDIA_CATS, playlist_id), - page_size) + self._get_page_size(url)) return self.playlist_result(entries, playlist_id=playlist_id) From e6f3e6de0e152640771055618f46a4aa609ca7e1 Mon Sep 17 00:00:00 2001 From: DmitryScaletta Date: Thu, 15 Feb 2024 12:34:50 +0300 Subject: [PATCH 11/21] [PromoDJ] Fix paid music metadata --- yt_dlp/extractor/promodj.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py index 8d5a14c49..4f29a4347 100644 --- a/yt_dlp/extractor/promodj.py +++ b/yt_dlp/extractor/promodj.py @@ -769,7 +769,6 @@ class PromoDJIE(PromoDJBaseIE): }, }] - _IS_PAID_RE = r'Цена:' # examples: MP3, 320 Кбит | MP4, 20157 Кбит | WAV, 1412 Кбит | AVI, 1731 Кбит | ASF, 6905 Кбит | FLAC, 1509 Кбит # https://regex101.com/r/2AuaxB/1 _FORMATS_RE = r'(?:[^\"]+)\">)?\s*\w+, (?P\d+) Кбит' @@ -835,7 +834,7 @@ class PromoDJIE(PromoDJBaseIE): # download links can be missing # best quality format always comes first formats_from_html = re.findall(self._FORMATS_RE, meta_html) - is_paid = re.search(self._IS_PAID_RE, meta_html) + is_paid = 'Цена:' in meta_html # size field describes best quality size = self._parse_ru_size(*re.search(self._SIZE_RE, meta_html).groups()) if type == 'videos': @@ -848,13 +847,13 @@ class PromoDJIE(PromoDJBaseIE): 'size': size, 'quality': 1, }) - else: + elif not is_paid: for i, match in enumerate(formats_from_html): url, bitrate = match is_last = i == len(formats_from_html) - 1 if is_last: metadata['formats'][0]['abr'] = int(bitrate) - elif url_or_none(url) and not is_paid: + elif url_or_none(url): metadata['formats'].append({ 'url': url, 'abr': int(bitrate), From 7e96492ba0dd902261c3e308a6c6d8d129c80af8 Mon Sep 17 00:00:00 2001 From: DmitryScaletta Date: Thu, 15 Feb 2024 12:39:46 +0300 Subject: [PATCH 12/21] [PromoDJ] Fix page size for playlists --- yt_dlp/extractor/promodj.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py index 4f29a4347..cdf33741f 100644 --- a/yt_dlp/extractor/promodj.py +++ b/yt_dlp/extractor/promodj.py @@ -521,10 +521,10 @@ class PromoDJPlaylistIE(PromoDJBaseIE): _ALLOWED_MEDIA_CATS = ['music', 'video'] - def _get_page_size(self, url): - if '/uenno' in url: + def _get_page_size(self, type): + if type == 'uenno': return 15 - if '/groups/' in url: + if type == 'groups': return 30 return 20 @@ -536,7 +536,7 @@ class PromoDJPlaylistIE(PromoDJBaseIE): entries = OnDemandPagedList( functools.partial(self._fetch_page, url, self._ALLOWED_MEDIA_CATS, playlist_id), - self._get_page_size(url)) + self._get_page_size(type)) return self.playlist_result(entries, playlist_id=playlist_id) From 1b3c186424df4f3acfa89cf9685768d5d150e088 Mon Sep 17 00:00:00 2001 From: DmitryScaletta Date: Thu, 15 Feb 2024 12:41:29 +0300 Subject: [PATCH 13/21] [PromoDJ] Fix page size for playlists --- yt_dlp/extractor/promodj.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py index cdf33741f..dae880352 100644 --- a/yt_dlp/extractor/promodj.py +++ b/yt_dlp/extractor/promodj.py @@ -525,8 +525,8 @@ class PromoDJPlaylistIE(PromoDJBaseIE): if type == 'uenno': return 15 if type == 'groups': - return 30 - return 20 + return 20 + return 30 def _real_extract(self, url): match = self._match_valid_url(url) From 99dec4d6ed065ba8da0ac2a7533d72f1b8759b6b Mon Sep 17 00:00:00 2001 From: DmitryScaletta Date: Thu, 15 Feb 2024 13:48:13 +0300 Subject: [PATCH 14/21] [PromoDJ] Add music format ids --- yt_dlp/extractor/promodj.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py index dae880352..0c61b039f 100644 --- a/yt_dlp/extractor/promodj.py +++ b/yt_dlp/extractor/promodj.py @@ -155,10 +155,11 @@ class PromoDJBaseIE(InfoExtractor): }) } - formats = [traverse_obj(source, { - 'url': ('URL', {url_or_none}), - 'size': ('size', {int_or_none}), - }) for source in traverse_obj(media_data, ('sources'))] + formats = [{ + 'format_id': 'lossy', + 'url': traverse_obj(source, ('URL', {url_or_none})), + 'size': traverse_obj(source, ('size', {int_or_none})), + } for source in traverse_obj(media_data, ('sources'))] thumbnails = [{ 'url': url, } for url in traverse_obj(media_data, ('coverURL', ('600', '1200', '2000'))) if url_or_none(url)] @@ -855,6 +856,7 @@ class PromoDJIE(PromoDJBaseIE): metadata['formats'][0]['abr'] = int(bitrate) elif url_or_none(url): metadata['formats'].append({ + 'format_id': 'lossless', 'url': url, 'abr': int(bitrate), }) From 900bc5f708199d3c4bf5190d156942d686e935ed Mon Sep 17 00:00:00 2001 From: DmitryScaletta Date: Thu, 15 Feb 2024 21:32:19 +0300 Subject: [PATCH 15/21] [PromoDJ] Refactor fetch_media_data and regexes --- yt_dlp/extractor/promodj.py | 60 ++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py index 0c61b039f..e629efb01 100644 --- a/yt_dlp/extractor/promodj.py +++ b/yt_dlp/extractor/promodj.py @@ -65,9 +65,8 @@ class PromoDJBaseIE(InfoExtractor): _PAGES = ['featured', 'shop', *_MEDIA_TYPES] _BASE_URL_RE = r'https?://(?:www\.)?promodj\.com' - _MEDIA_TYPES_RE = '|'.join(_MEDIA_TYPES) - _NOT_PAGE_RE = '|'.join(['radio', *_PAGES]) - _LOGIN_RE = rf'(?:(?!{_NOT_PAGE_RE}).)[\w.-]+' + _NOT_LOGIN_LIST = '|'.join(['radio', *_PAGES]) + _LOGIN_RE = rf'(?:(?!{_NOT_LOGIN_LIST}).)[\w.-]+' def _set_url_page(self, url, page): parsed_url = urllib.parse.urlparse(url) @@ -117,15 +116,15 @@ class PromoDJBaseIE(InfoExtractor): def _get_current_page(self, html): return int(clean_html(get_element_by_class('NavigatorCurrentPage', html)) or '1') - def _fetch_media_data(self, ids, video_id): - data = {} - for i, id in enumerate(ids): - data[f'multi[{i}][method]'] = 'players/config' - data[f'multi[{i}][params][kind]'] = 'cover.big' - data[f'multi[{i}][params][fileID]'] = id + def _fetch_media_data(self, id): + data = { + 'multi[0][method]': 'players/config', + 'multi[0][params][kind]': 'cover.big', + 'multi[0][params][fileID]': id, + } return self._download_json( - 'https://promodj.com/api/multi.json', video_id, data=urlencode_postdata(data), - headers={'Content-Type': 'application/x-www-form-urlencoded'}) + 'https://promodj.com/api/multi.json', id, data=urlencode_postdata(data), + headers={'Content-Type': 'application/x-www-form-urlencoded'})[0] def _parse_media_data(self, media_data, id): if player_error := media_data.get('player_error'): @@ -173,9 +172,9 @@ class PromoDJBaseIE(InfoExtractor): class PromoDJPageIE(PromoDJBaseIE): - _PAGES_RE = '|'.join(PromoDJBaseIE._PAGES) + _PAGES_LIST = '|'.join(PromoDJBaseIE._PAGES) - _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{_PAGES_RE})' + _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{_PAGES_LIST})' _TESTS = [{ 'url': 'https://promodj.com/featured', 'info_dict': { @@ -370,7 +369,7 @@ class PromoDJUserPagesIE(PromoDJBaseIE): class PromoDJUserPageIE(PromoDJBaseIE): - _USER_PAGES = [ + _USER_PATHS = [ 'pages', 'music', 'video', @@ -382,8 +381,8 @@ class PromoDJUserPageIE(PromoDJBaseIE): 'uenno', *PromoDJBaseIE._MEDIA_TYPES, ] - _NOT_USER_PAGE_RE = '|'.join(_USER_PAGES) - _USER_PAGE_RE = rf'(?:(?!{_NOT_USER_PAGE_RE}).)[\w-]+' + _NOT_USER_PAGE_LIST = '|'.join(_USER_PATHS) + _USER_PAGE_RE = rf'(?:(?!{_NOT_USER_PAGE_LIST}).)[\w-]+' _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{PromoDJBaseIE._LOGIN_RE})/(?P{_USER_PAGE_RE})$' _TESTS = [{ @@ -445,12 +444,11 @@ class PromoDJBlogPageIE(PromoDJBaseIE): class PromoDJPlaylistIE(PromoDJBaseIE): - _PLAYLIST_TYPES = ['uenno', *PromoDJBaseIE._MEDIA_TYPES] - _PLAYLIST_TYPES_RE = '|'.join(_PLAYLIST_TYPES) + _PLAYLIST_TYPES_LIST = '|'.join(['uenno', *PromoDJBaseIE._MEDIA_TYPES]) _VALID_URL = [ - rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{PromoDJBaseIE._LOGIN_RE})/(?P{_PLAYLIST_TYPES_RE})$', - rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{PromoDJBaseIE._LOGIN_RE})/(?Pgroups)/(?P\d+)(?:/(?P\w+))?', + rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{PromoDJBaseIE._LOGIN_RE})/(?P{_PLAYLIST_TYPES_LIST})$', + rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{PromoDJBaseIE._LOGIN_RE})/(?Pgroups)/(?P\d+)(?:/\w+)?', ] _TESTS = [{ # default playlist: music (with songs without player) @@ -552,7 +550,9 @@ class PromoDJVideoPlaylistIE(PromoDJPlaylistIE): class PromoDJIE(PromoDJBaseIE): - _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/{PromoDJBaseIE._LOGIN_RE}/(?P{PromoDJBaseIE._MEDIA_TYPES_RE})/(?P\d+)(?:/\w+)?', + _MEDIA_TYPES_LIST = '|'.join(PromoDJBaseIE._MEDIA_TYPES) + + _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/{PromoDJBaseIE._LOGIN_RE}/(?P{_MEDIA_TYPES_LIST})/(?P\d+)(?:/\w+)?', _TESTS = [{ 'url': 'https://promodj.com/antonpavlovsky/remixes/6259208/David_Usher_Black_Black_Heart_Anton_Pavlovsky_Cover', 'info_dict': { @@ -683,7 +683,7 @@ class PromoDJIE(PromoDJBaseIE): 'upload_date': '20100404', 'timestamp': 1270376700.0, 'duration': 321.0, - 'size': 56623104, + 'size': 5128821, 'view_count': int, }, }, { @@ -697,7 +697,7 @@ class PromoDJIE(PromoDJBaseIE): 'upload_date': '20080827', 'timestamp': 1219841220.0, 'duration': 64.0, - 'size': 2097152, + 'size': 1014431, 'view_count': int, }, }, { @@ -783,9 +783,9 @@ class PromoDJIE(PromoDJBaseIE): _TAGS_RE = r'([^\n]+)' # https://regex101.com/r/2ZkUmW/1 - _MUSIC_DATA_REGEX = r'({\"no_preroll\":false,\"seekAny\":true,\"sources\":[^\n]+)\);' + _MUSIC_DATA_RE = r'({\"no_preroll\":false,\"seekAny\":true,\"sources\":[^\n]+)\);' # https://regex101.com/r/b9utBf/1 - _VIDEO_DATA_REGEX = r'({\"video\":true,\"config\":[^\n]+)\);' + _VIDEO_DATA_RE = r'({\"video\":true,\"config\":[^\n]+)\);' def _parse_ru_date(self, day, month, year, hours, minutes): RU_MONTHS = ['января', 'февраля', 'марта', 'апреля', 'мая', 'июня', 'июля', 'августа', 'сентября', 'октября', 'ноября', 'декабря'] @@ -818,10 +818,10 @@ class PromoDJIE(PromoDJBaseIE): # always returns only one format: lossy mp3 for music or converted mp4 for video media_data = self._search_json( '', html, 'media data', id, - contains_pattern=self._VIDEO_DATA_REGEX if type == 'videos' else self._MUSIC_DATA_REGEX, + contains_pattern=self._VIDEO_DATA_RE if type == 'videos' else self._MUSIC_DATA_RE, transform_source=js_to_json, fatal=False, default=None) if not media_data: - media_data = self._fetch_media_data([id], id)[0] + media_data = self._fetch_media_data(id) metadata = self._parse_media_data(media_data, id) # html can be invalid @@ -872,7 +872,7 @@ class PromoDJIE(PromoDJBaseIE): class PromoDJEmbedIE(PromoDJBaseIE): - _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/embed/(?P\d+)/(?Pcover|big)' + _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/embed/(?P\d+)/(?:cover|big)' _TESTS = [{ 'url': 'https://promodj.com/embed/7555440/cover', 'info_dict': { @@ -929,12 +929,12 @@ class PromoDJEmbedIE(PromoDJBaseIE): def _real_extract(self, url): id = self._match_id(url) metadata = self._parse_media_data( - self._fetch_media_data([id], id)[0], id) + self._fetch_media_data(id), id) return self.url_result(metadata['webpage_url'], PromoDJIE, id) class PromoDJShortIE(PromoDJBaseIE): - _VALID_URL = r'https://pdj.cc/(?P\w+)' + _VALID_URL = r'https://(?:www\\.)?pdj.cc/(?P\w+)' _TESTS = [{ # music 'url': 'https://pdj.cc/fv8VD', From ed61b73bcd3b9f14746d4f6be203ed2da5727b2b Mon Sep 17 00:00:00 2001 From: DmitryScaletta Date: Thu, 15 Feb 2024 21:50:13 +0300 Subject: [PATCH 16/21] [PromoDJ] Remove player's width and height --- yt_dlp/extractor/promodj.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py index e629efb01..4fdcad85a 100644 --- a/yt_dlp/extractor/promodj.py +++ b/yt_dlp/extractor/promodj.py @@ -136,10 +136,6 @@ class PromoDJBaseIE(InfoExtractor): formats = [{ 'format_id': 'web', 'url': traverse_obj(video, ('play', '@url')).replace('?returnurl=1', ''), - **traverse_obj(media_data, { - 'width': ('width', {int_or_none}), - 'height': ('height', {int_or_none}), - }) }] return { 'id': id, From 345d01a175f0609c175a2141f7d552d919fae05e Mon Sep 17 00:00:00 2001 From: DmitryScaletta Date: Fri, 16 Feb 2024 23:22:57 +0300 Subject: [PATCH 17/21] [PromoDJ] Fix negative lookahead check --- yt_dlp/extractor/promodj.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py index 4fdcad85a..da1beb8b2 100644 --- a/yt_dlp/extractor/promodj.py +++ b/yt_dlp/extractor/promodj.py @@ -66,7 +66,7 @@ class PromoDJBaseIE(InfoExtractor): _BASE_URL_RE = r'https?://(?:www\.)?promodj\.com' _NOT_LOGIN_LIST = '|'.join(['radio', *_PAGES]) - _LOGIN_RE = rf'(?:(?!{_NOT_LOGIN_LIST}).)[\w.-]+' + _LOGIN_RE = rf'(?!{_NOT_LOGIN_LIST})[\w.-]+' def _set_url_page(self, url, page): parsed_url = urllib.parse.urlparse(url) @@ -378,7 +378,7 @@ class PromoDJUserPageIE(PromoDJBaseIE): *PromoDJBaseIE._MEDIA_TYPES, ] _NOT_USER_PAGE_LIST = '|'.join(_USER_PATHS) - _USER_PAGE_RE = rf'(?:(?!{_NOT_USER_PAGE_LIST}).)[\w-]+' + _USER_PAGE_RE = rf'(?!{_NOT_USER_PAGE_LIST})[\w-]+' _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{PromoDJBaseIE._LOGIN_RE})/(?P{_USER_PAGE_RE})$' _TESTS = [{ From e32ba3fc218d15ff59c63d95e3c6210184199c20 Mon Sep 17 00:00:00 2001 From: DmitryScaletta Date: Sat, 17 Feb 2024 04:16:05 +0300 Subject: [PATCH 18/21] [PromoDJ] Fix login regex --- yt_dlp/extractor/promodj.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py index da1beb8b2..f42d6ed2d 100644 --- a/yt_dlp/extractor/promodj.py +++ b/yt_dlp/extractor/promodj.py @@ -65,8 +65,8 @@ class PromoDJBaseIE(InfoExtractor): _PAGES = ['featured', 'shop', *_MEDIA_TYPES] _BASE_URL_RE = r'https?://(?:www\.)?promodj\.com' - _NOT_LOGIN_LIST = '|'.join(['radio', *_PAGES]) - _LOGIN_RE = rf'(?!{_NOT_LOGIN_LIST})[\w.-]+' + _NOT_LOGIN_LIST = '|'.join(['radio', 'embed', *_PAGES]) + _LOGIN_RE = rf'(?!(?:{_NOT_LOGIN_LIST})(?:/|$))[\w.-]+' def _set_url_page(self, url, page): parsed_url = urllib.parse.urlparse(url) @@ -247,6 +247,10 @@ class PromoDJUserIE(PromoDJBaseIE): 'id': 'slim96', }, 'playlist_count': 0, + }, { + # login starts with page name + 'url': 'https://promodj.com/radio.remix', + 'only_matching': True, }] def _real_extract(self, url): @@ -288,6 +292,10 @@ class PromoDJUserMediaIE(PromoDJBaseIE): 'id': 'worobyev-video', }, 'playlist_count': 0, + }, { + # login starts with page name + 'url': 'https://promodj.com/radio.remix/music', + 'only_matching': True, }] def _real_extract(self, url): @@ -378,15 +386,18 @@ class PromoDJUserPageIE(PromoDJBaseIE): *PromoDJBaseIE._MEDIA_TYPES, ] _NOT_USER_PAGE_LIST = '|'.join(_USER_PATHS) - _USER_PAGE_RE = rf'(?!{_NOT_USER_PAGE_LIST})[\w-]+' - _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{PromoDJBaseIE._LOGIN_RE})/(?P{_USER_PAGE_RE})$' + _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/(?P{PromoDJBaseIE._LOGIN_RE})/(?P(?!(?:{_NOT_USER_PAGE_LIST})$)[\w-]+$)' _TESTS = [{ 'url': 'https://promodj.com/djperetse/MaxMixes', 'info_dict': { 'id': 'djperetse-MaxMixes', }, 'playlist_count': 5, + }, { + # user page starts with media type (not a real link) + 'url': 'https://promodj.com/djperetse/remixes-best', + 'only_matching': True, }] def _real_extract(self, url): From 49ac5d31a38dc4cffe46b907544cdff619255e48 Mon Sep 17 00:00:00 2001 From: DmitryScaletta Date: Sat, 17 Feb 2024 05:06:06 +0300 Subject: [PATCH 19/21] [PromoDJ] Update radio extractor and add tests --- yt_dlp/extractor/promodj.py | 47 ++++++++++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py index f42d6ed2d..8600b93d1 100644 --- a/yt_dlp/extractor/promodj.py +++ b/yt_dlp/extractor/promodj.py @@ -6,16 +6,17 @@ import urllib.parse from .common import InfoExtractor from .youtube import YoutubeIE from ..utils import ( - OnDemandPagedList, clean_html, dict_get, extract_attributes, ExtractorError, get_element_by_class, + get_element_html_by_id, get_elements_html_by_class, int_or_none, js_to_json, merge_dicts, + OnDemandPagedList, parse_duration, str_or_none, traverse_obj, @@ -989,19 +990,53 @@ class PromoDJRadioIE(PromoDJBaseIE): _VALID_URL = rf'{PromoDJBaseIE._BASE_URL_RE}/radio#(?P\w+)' _TESTS = [{ 'url': 'https://promodj.com/radio#dubstep', - 'only_matching': True, + 'info_dict': { + 'id': 'dubstep', + 'ext': 'mp3', + 'title': r're:^Dubstep ', + 'description': 'Всё лучше под дабстеп', + 'thumbnail': r're:^https?://', + 'live_status': 'is_live', + }, }, { 'url': 'https://promodj.com/radio#oldschool', - 'only_matching': True, + 'info_dict': { + 'id': 'oldschool', + 'ext': 'mp3', + 'title': r're:^Old-School ', + 'description': 'То самое доброе, старое, вечное', + 'thumbnail': r're:^https?://', + 'live_status': 'is_live', + }, }] def _real_extract(self, url): - id = self._match_id(url) + slug = self._match_id(url) + html = self._download_webpage(url, slug) + radio_span = get_element_html_by_id(f'radio_{slug}', html) + if not radio_span: + raise ExtractorError('Radio channel is offline or not exists', expected=True) + id = self._search_regex(r'amba="radio:(\d+)"', radio_span, 'id') + tooltip_html = self._download_webpage( + f'https://promodj.com/ajax/tooltip.html?wtf=radio:{id}', slug, + note='Downloading tooltip webpage') + title = clean_html(self._search_regex( + r']*>([^<]+)', tooltip_html, 'title', default=None)) + description = clean_html(self._search_regex( + r'
([^<]+)
', tooltip_html, 'description', default=None)) + thumbnail = self._search_regex( + rf'#radio_{slug}:after {{ background-image: url\(([^)]+)\); }}', + html, 'thumbnail', default=None) + return { - 'id': id, + 'id': slug, + 'title': title, + 'description': description, + 'thumbnail': url_or_none(thumbnail), 'formats': [{ - 'url': f'https://radio.promodj.com/{id}-192', + 'url': f'https://radio.promodj.com/{slug}-192', 'abr': 192, + 'ext': 'mp3', }], 'is_live': True, } From 2416fddcfbd3dd22caa78fc2cf7018d82a7d2efc Mon Sep 17 00:00:00 2001 From: DmitryScaletta Date: Sat, 17 Feb 2024 05:24:49 +0300 Subject: [PATCH 20/21] [PromoDJ] Add codecs --- yt_dlp/extractor/promodj.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py index 8600b93d1..235ff0187 100644 --- a/yt_dlp/extractor/promodj.py +++ b/yt_dlp/extractor/promodj.py @@ -155,6 +155,8 @@ class PromoDJBaseIE(InfoExtractor): 'format_id': 'lossy', 'url': traverse_obj(source, ('URL', {url_or_none})), 'size': traverse_obj(source, ('size', {int_or_none})), + 'acodec': 'mp3', + 'vcodec': 'none', } for source in traverse_obj(media_data, ('sources'))] thumbnails = [{ 'url': url, @@ -780,7 +782,7 @@ class PromoDJIE(PromoDJBaseIE): # examples: MP3, 320 Кбит | MP4, 20157 Кбит | WAV, 1412 Кбит | AVI, 1731 Кбит | ASF, 6905 Кбит | FLAC, 1509 Кбит # https://regex101.com/r/2AuaxB/1 - _FORMATS_RE = r'(?:[^\"]+)\">)?\s*\w+, (?P\d+) Кбит' + _FORMATS_RE = r'(?:[^\"]+)\">)?\s*(?P\w+), (?P\d+) Кбит' _VIEW_COUNT_RE = r'(?:Прослушиваний|Просмотров):\s*(\d+)' # examples: 0:21 | 1:07 | 74:38 _DURATION_RE = r'Продолжительность:\s*(\d+:\d{2})' @@ -847,18 +849,19 @@ class PromoDJIE(PromoDJBaseIE): # size field describes best quality size = self._parse_ru_size(*re.search(self._SIZE_RE, meta_html).groups()) if type == 'videos': - for url, bitrate in formats_from_html: + for url, format, bitrate in formats_from_html: if url_or_none(url): metadata['formats'].append({ 'format_id': 'source', 'url': url, 'tbr': int(bitrate), 'size': size, + 'container': format.lower(), 'quality': 1, }) elif not is_paid: for i, match in enumerate(formats_from_html): - url, bitrate = match + url, format, bitrate = match is_last = i == len(formats_from_html) - 1 if is_last: metadata['formats'][0]['abr'] = int(bitrate) @@ -867,6 +870,8 @@ class PromoDJIE(PromoDJBaseIE): 'format_id': 'lossless', 'url': url, 'abr': int(bitrate), + 'acodec': format.lower(), + 'vcodec': 'none', }) metadata['formats'][-1]['size'] = size @@ -1037,6 +1042,8 @@ class PromoDJRadioIE(PromoDJBaseIE): 'url': f'https://radio.promodj.com/{slug}-192', 'abr': 192, 'ext': 'mp3', + 'acodec': 'mp3', + 'vcodec': 'none', }], 'is_live': True, } From 107bed866fc9017373607256aa8ee37fabea6555 Mon Sep 17 00:00:00 2001 From: DmitryScaletta Date: Tue, 27 Feb 2024 02:32:49 +0300 Subject: [PATCH 21/21] [PromoDJ] Sort imports --- yt_dlp/extractor/promodj.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/promodj.py b/yt_dlp/extractor/promodj.py index 235ff0187..494652123 100644 --- a/yt_dlp/extractor/promodj.py +++ b/yt_dlp/extractor/promodj.py @@ -6,17 +6,17 @@ import urllib.parse from .common import InfoExtractor from .youtube import YoutubeIE from ..utils import ( + ExtractorError, + OnDemandPagedList, clean_html, dict_get, extract_attributes, - ExtractorError, get_element_by_class, get_element_html_by_id, get_elements_html_by_class, int_or_none, js_to_json, merge_dicts, - OnDemandPagedList, parse_duration, str_or_none, traverse_obj,