From 85a0ad0117cc59e039fb05ccf0f0c845be98492f Mon Sep 17 00:00:00 2001 From: lyz-code Date: Wed, 16 Feb 2022 15:56:17 +0000 Subject: [PATCH] [bandcamp] Fix user URLs (#2800) Authored by: lyz-code --- yt_dlp/extractor/bandcamp.py | 68 ++++++++++++++++++++-------------- yt_dlp/extractor/extractors.py | 2 +- 2 files changed, 42 insertions(+), 28 deletions(-) diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py index b664145a18..42223dab72 100644 --- a/yt_dlp/extractor/bandcamp.py +++ b/yt_dlp/extractor/bandcamp.py @@ -212,7 +212,7 @@ class BandcampIE(InfoExtractor): class BandcampAlbumIE(BandcampIE): IE_NAME = 'Bandcamp:album' - _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com(?!/music)(?:/album/(?P[^/?#&]+))?' + _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com/album/(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', @@ -257,14 +257,6 @@ class BandcampAlbumIE(BandcampIE): 'id': 'hierophany-of-the-open-grave', }, 'playlist_mincount': 9, - }, { - 'url': 'http://dotscale.bandcamp.com', - 'info_dict': { - 'title': 'Loom', - 'id': 'dotscale', - 'uploader_id': 'dotscale', - }, - 'playlist_mincount': 7, }, { # with escaped quote in title 'url': 'https://jstrecords.bandcamp.com/album/entropy-ep', @@ -391,41 +383,63 @@ class BandcampWeeklyIE(BandcampIE): } -class BandcampMusicIE(InfoExtractor): - _VALID_URL = r'https?://(?P[^/]+)\.bandcamp\.com/music' +class BandcampUserIE(InfoExtractor): + IE_NAME = 'Bandcamp:user' + _VALID_URL = r'https?://(?!www\.)(?P[^.]+)\.bandcamp\.com(?:/music)?/?(?:[#?]|$)' + _TESTS = [{ + # Type 1 Bandcamp user page. + 'url': 'https://adrianvonziegler.bandcamp.com', + 'info_dict': { + 'id': 'adrianvonziegler', + 'title': 'Discography of adrianvonziegler', + }, + 'playlist_mincount': 23, + }, { + # Bandcamp user page with only one album + 'url': 'http://dotscale.bandcamp.com', + 'info_dict': { + 'id': 'dotscale', + 'title': 'Discography of dotscale' + }, + 'playlist_count': 1, + }, { + # Type 2 Bandcamp user page. + 'url': 'https://nightcallofficial.bandcamp.com', + 'info_dict': { + 'id': 'nightcallofficial', + 'title': 'Discography of nightcallofficial', + }, + 'playlist_count': 4, + }, { 'url': 'https://steviasphere.bandcamp.com/music', 'playlist_mincount': 47, 'info_dict': { 'id': 'steviasphere', + 'title': 'Discography of steviasphere', }, }, { 'url': 'https://coldworldofficial.bandcamp.com/music', 'playlist_mincount': 10, 'info_dict': { 'id': 'coldworldofficial', + 'title': 'Discography of coldworldofficial', }, }, { 'url': 'https://nuclearwarnowproductions.bandcamp.com/music', 'playlist_mincount': 399, 'info_dict': { 'id': 'nuclearwarnowproductions', + 'title': 'Discography of nuclearwarnowproductions', }, - } - ] - - _TYPE_IE_DICT = { - 'album': BandcampAlbumIE.ie_key(), - 'track': BandcampIE.ie_key() - } + }] def _real_extract(self, url): - id = self._match_id(url) - webpage = self._download_webpage(url, id) - items = re.findall(r'href\=\"\/(?P(?Palbum|track)+/[^\"]+)', webpage) - entries = [ - self.url_result( - f'https://{id}.bandcamp.com/{item[0]}', - ie=self._TYPE_IE_DICT[item[1]]) - for item in items] - return self.playlist_result(entries, id) + uploader = self._match_id(url) + webpage = self._download_webpage(url, uploader) + + discography_data = (re.findall(r'
  • ]+>\s*]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage)) + + return self.playlist_from_matches( + discography_data, uploader, f'Discography of {uploader}', getter=lambda x: urljoin(url, x)) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index c3f3eb9745..b240d6553b 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -118,7 +118,7 @@ from .bandcamp import ( BandcampIE, BandcampAlbumIE, BandcampWeeklyIE, - BandcampMusicIE, + BandcampUserIE, ) from .bannedvideo import BannedVideoIE from .bbc import (