From 071326c0cc880f844ec5d3da55ea7fe3a97290f3 Mon Sep 17 00:00:00 2001 From: Lev Plyusnin Date: Wed, 3 Jan 2024 08:35:28 +0700 Subject: [PATCH] [ie] Add new fields with proper support for multiple values --- yt_dlp/YoutubeDL.py | 20 ++++++++++-- yt_dlp/__init__.py | 5 +++ yt_dlp/extractor/common.py | 19 +++++++---- yt_dlp/postprocessor/__init__.py | 1 + yt_dlp/postprocessor/ffmpeg.py | 13 +++++--- yt_dlp/postprocessor/mutagenmetadata.py | 42 +++++++++++++++++++++++++ 6 files changed, 87 insertions(+), 13 deletions(-) create mode 100644 yt_dlp/postprocessor/mutagenmetadata.py diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 8d96498a67..ea03e7274c 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -24,6 +24,7 @@ import traceback import unicodedata from .cache import Cache + from .compat import functools, urllib # isort: split from .compat import compat_os_name, compat_shlex_quote, urllib_req_to_req from .cookies import LenientSimpleCookie, load_cookies @@ -1735,6 +1736,7 @@ class YoutubeDL: '_type': 'compat_list', 'entries': ie_result, } + self.fix_deprecated_fields(ie_result) if extra_info.get('original_url'): ie_result.setdefault('original_url', extra_info['original_url']) self.add_default_extra_info(ie_result, ie, url) @@ -1744,6 +1746,19 @@ class YoutubeDL: else: return ie_result + def fix_deprecated_fields(self, ie_result): + deprecated_multivalue_fields = { + 'artist': 'artist_list', + 'composer': 'composer_list', + 'album_artist': 'album_artist_list', + 'genre': 'genre_list', + } + for deprecated_field, new_field in deprecated_multivalue_fields.items(): + if deprecated_field not in ie_result: + continue + self.deprecation_warning(f'"{deprecated_field}" field is deprecated. Use "{new_field}" instead') + ie_result[new_field] = re.split(r', ?', ie_result[deprecated_field]) + def add_default_extra_info(self, ie_result, ie, url): if url is not None: self.add_extra_info(ie_result, { @@ -3918,10 +3933,9 @@ class YoutubeDL: # These imports can be slow. So import them only as needed from .extractor.extractors import _LAZY_LOADER - from .extractor.extractors import ( - _PLUGIN_CLASSES as plugin_ies, + from .extractor.extractors import _PLUGIN_CLASSES as plugin_ies + from .extractor.extractors import \ _PLUGIN_OVERRIDES as plugin_ie_overrides - ) def get_encoding(stream): ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 57a4871575..025b97a315 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -670,6 +670,11 @@ def get_postprocessors(opts): 'add_metadata': opts.addmetadata, 'add_infojson': opts.embed_infojson, } + # MutagenMetadata must run after FFmpegMetadata + if opts.addmetadata: + yield { + 'key': 'MutagenMetadata', + } # Deprecated # This should be above EmbedThumbnail since sponskrub removes the thumbnail attachment # but must be below EmbedSubtitle and FFmpegMetadata diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index af534775f0..b614472b57 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -422,16 +422,23 @@ class InfoExtractor: track_number: Number of the track within an album or a disc, as an integer. track_id: Id of the track (useful in case of custom indexing, e.g. 6.iii), as a unicode string. - artist: Artist(s) of the track. - genre: Genre(s) of the track. + artist_list: Artist(s) of the track, as a list of unicode strings. + composer_list: Composer(s) of the piece, as a list of unicode strings. + genre_list: Genre(s) of the track, as a list of unicode strings. + album: Title of the album the track belongs to. album_type: Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc). - album_artist: List of all artists appeared on the album (e.g. - "Ash Borer / Fell Voices" or "Various Artists", useful for splits - and compilations). + album_artist_list: All artists appeared on the album, as a list of unicode strings. + (e.g. ["Ash Borer", "Fell Voices"] or ["Various Artists"], + useful for splits and compilations). disc_number: Number of the disc or other physical medium the track belongs to, as an integer. - composer: Composer of the piece + composer: Deprecated, use "composer_list" instead. Composer(s) of the piece, + comma-separated + artist: Deprecated, use "artist_list" instead. Artist(s) of the track, comma-separated. + genre: Deprecated, use "genre_list" instead. Genre(s) of the track, comma-separated. + album_artist: Deprecated, use "album_artist_list" instead. All artists appeared on the + album, comma-separated. The following fields should only be set for clips that should be cut from the original video: diff --git a/yt_dlp/postprocessor/__init__.py b/yt_dlp/postprocessor/__init__.py index bfe9df733b..e96a015e60 100644 --- a/yt_dlp/postprocessor/__init__.py +++ b/yt_dlp/postprocessor/__init__.py @@ -30,6 +30,7 @@ from .metadataparser import ( ) from .modify_chapters import ModifyChaptersPP from .movefilesafterdownload import MoveFilesAfterDownloadPP +from .mutagenmetadata import MutagenMetadataPP from .sponskrub import SponSkrubPP from .sponsorblock import SponsorBlockPP from .xattrpp import XAttrMetadataPP diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 7c904417ba..b7e08de4be 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -23,6 +23,7 @@ from ..utils import ( encodeFilename, filter_dict, float_or_none, + is_iterable_like, is_outdated_version, orderedSet, prepend_extension, @@ -738,9 +739,12 @@ class FFmpegMetadataPP(FFmpegPostProcessor): def add(meta_list, info_list=None): value = next(( - str(info[key]) for key in [f'{meta_prefix}_'] + list(variadic(info_list or meta_list)) + info[key] for key in [f'{meta_prefix}_'] + list(variadic(info_list or meta_list)) if info.get(key) is not None), None) if value not in ('', None): + if is_iterable_like(value): + value = ', '.join(value) + value = str(value) value = value.replace('\0', '') # nul character cannot be passed in command line metadata['common'].update({meta_f: value for meta_f in variadic(meta_list)}) @@ -754,10 +758,11 @@ class FFmpegMetadataPP(FFmpegPostProcessor): add(('description', 'synopsis'), 'description') add(('purl', 'comment'), 'webpage_url') add('track', 'track_number') - add('artist', ('artist', 'creator', 'uploader', 'uploader_id')) - add('genre') + add('artist', ('artist_list', 'creator', 'uploader', 'uploader_id')) + add('composer', 'composer_list') + add('genre', 'genre_list') add('album') - add('album_artist') + add('album_artist', 'album_artist_list') add('disc', 'disc_number') add('show', 'series') add('season_number') diff --git a/yt_dlp/postprocessor/mutagenmetadata.py b/yt_dlp/postprocessor/mutagenmetadata.py new file mode 100644 index 0000000000..25d7f016ca --- /dev/null +++ b/yt_dlp/postprocessor/mutagenmetadata.py @@ -0,0 +1,42 @@ +from .common import PostProcessor +from ..dependencies import mutagen + +if mutagen: + from mutagen.easymp4 import EasyMP4 + from mutagen.flac import FLAC + from mutagen.mp3 import EasyMP3 + from mutagen.musepack import Musepack + from mutagen.oggopus import OggOpus + from mutagen.oggvorbis import OggVorbis + + +class MutagenMetadataPP(PostProcessor): + def __init__(self, downloader): + PostProcessor.__init__(self, downloader) + + @PostProcessor._restrict_to(images=False) + def run(self, information): + extension = information['ext'] + ret = [], information + if not mutagen: + if extension in ['mp3', 'm4a', 'ogg', 'opus', 'flac', '.mpc']: + self.report_warning('module mutagen was not found. Tags with multiple values (e.g. artist, album artist and genre) may be set incorrectly. Please install using `python -m pip install mutagen`') + return ret + tag_mapping = { + 'artist': 'artist_list', + 'albumartist': 'album_artist_list', + 'genre': 'genre_list', + 'composer': 'composer_list' + } + supported_formats = [EasyMP3, EasyMP4, OggVorbis, OggOpus, FLAC, Musepack] + file = mutagen.File(information['filepath'], supported_formats) + if not file: + return ret + if isinstance(file, EasyMP4): + file.RegisterTextKey('composer', '\251wrt') + for tag_key, info_key in tag_mapping.items(): + value = information.get(info_key) + if value: + file[tag_key] = value + file.save() + return ret