From 1732eccc0a40256e076bf0435a29f0f1d8419280 Mon Sep 17 00:00:00 2001 From: sepro <4618135+seproDev@users.noreply.github.com> Date: Sun, 26 Nov 2023 03:12:05 +0100 Subject: [PATCH 1/9] [core] Parse `release_year` from `release_date` (#8524) Closes #7263 Authored by: seproDev --- README.md | 2 +- test/helper.py | 6 +++++- yt_dlp/YoutubeDL.py | 3 +++ yt_dlp/extractor/archiveorg.py | 2 -- yt_dlp/extractor/common.py | 4 +++- yt_dlp/extractor/harpodeon.py | 10 +++++----- yt_dlp/extractor/monstercat.py | 2 -- yt_dlp/extractor/youtube.py | 1 - 8 files changed, 17 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 6fe7fab6a2..f67cab572d 100644 --- a/README.md +++ b/README.md @@ -1309,6 +1309,7 @@ The available fields are: - `upload_date` (string): Video upload date in UTC (YYYYMMDD) - `release_timestamp` (numeric): UNIX timestamp of the moment the video was released - `release_date` (string): The date (YYYYMMDD) when the video was released in UTC + - `release_year` (numeric): Year (YYYY) when the video or album was released - `modified_timestamp` (numeric): UNIX timestamp of the moment the video was last modified - `modified_date` (string): The date (YYYYMMDD) when the video was last modified in UTC - `uploader_id` (string): Nickname or id of the video uploader @@ -1382,7 +1383,6 @@ Available for the media that is a track or a part of a music album: - `album_type` (string): Type of the album - `album_artist` (string): List of all artists appeared on the album - `disc_number` (numeric): Number of the disc or other physical medium the track belongs to - - `release_year` (numeric): Year (YYYY) when the album was released Available only when using `--download-sections` and for `chapter:` prefix when using `--split-chapters` for videos with internal chapters: diff --git a/test/helper.py b/test/helper.py index 7503840be8..e5ace8fe2c 100644 --- a/test/helper.py +++ b/test/helper.py @@ -10,7 +10,7 @@ import types import yt_dlp.extractor from yt_dlp import YoutubeDL from yt_dlp.compat import compat_os_name -from yt_dlp.utils import preferredencoding, write_string +from yt_dlp.utils import preferredencoding, try_call, write_string if 'pytest' in sys.modules: import pytest @@ -223,6 +223,10 @@ def sanitize_got_info_dict(got_dict): if test_info_dict.get('display_id') == test_info_dict.get('id'): test_info_dict.pop('display_id') + # release_year may be generated from release_date + if try_call(lambda: test_info_dict['release_year'] == int(test_info_dict['release_date'][:4])): + test_info_dict.pop('release_year') + # Check url for flat entries if got_dict.get('_type', 'video') != 'video' and got_dict.get('url'): test_info_dict['url'] = got_dict['url'] diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 32ae25aa01..e65bef862c 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2589,6 +2589,9 @@ class YoutubeDL: upload_date = datetime.datetime.fromtimestamp(info_dict[ts_key], datetime.timezone.utc) info_dict[date_key] = upload_date.strftime('%Y%m%d') + if not info_dict.get('release_year'): + info_dict['release_year'] = traverse_obj(info_dict, ('release_date', {lambda x: int(x[:4])})) + live_keys = ('is_live', 'was_live') live_status = info_dict.get('live_status') if live_status is None: diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py index a0b26ac5a0..3bb6f2e311 100644 --- a/yt_dlp/extractor/archiveorg.py +++ b/yt_dlp/extractor/archiveorg.py @@ -52,7 +52,6 @@ class ArchiveOrgIE(InfoExtractor): 'creator': 'SRI International', 'uploader': 'laura@archive.org', 'thumbnail': r're:https://archive\.org/download/.*\.jpg', - 'release_year': 1968, 'display_id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.cdr', 'track': 'XD300-23 68HighlightsAResearchCntAugHumanIntellect', @@ -134,7 +133,6 @@ class ArchiveOrgIE(InfoExtractor): 'album': '1977-05-08 - Barton Hall - Cornell University', 'release_date': '19770508', 'display_id': 'gd1977-05-08d01t07.flac', - 'release_year': 1977, 'track_number': 7, }, }, { diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 507ef34cf5..b179f40382 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -286,6 +286,9 @@ class InfoExtractor: If it is not clear whether to use timestamp or this, use the former release_date: The date (YYYYMMDD) when the video was released in UTC. If not explicitly set, calculated from release_timestamp + release_year: Year (YYYY) as integer when the video or album was released. + To be used if no exact release date is known. + If not explicitly set, calculated from release_date. modified_timestamp: UNIX timestamp of the moment the video was last modified. modified_date: The date (YYYYMMDD) when the video was last modified in UTC. If not explicitly set, calculated from modified_timestamp @@ -427,7 +430,6 @@ class InfoExtractor: and compilations). disc_number: Number of the disc or other physical medium the track belongs to, as an integer. - release_year: Year (YYYY) when the album was released. composer: Composer of the piece The following fields should only be set for clips that should be cut from the original video: diff --git a/yt_dlp/extractor/harpodeon.py b/yt_dlp/extractor/harpodeon.py index 0aa47337ff..46eaddb32f 100644 --- a/yt_dlp/extractor/harpodeon.py +++ b/yt_dlp/extractor/harpodeon.py @@ -1,5 +1,5 @@ from .common import InfoExtractor -from ..utils import unified_strdate +from ..utils import int_or_none class HarpodeonIE(InfoExtractor): @@ -14,7 +14,7 @@ class HarpodeonIE(InfoExtractor): 'title': 'The Smoking Out of Bella Butts', 'description': 'md5:47e16bdb41fc8a79c83ab83af11c8b77', 'creator': 'Vitagraph Company of America', - 'release_date': '19150101' + 'release_year': 1915, } }, { 'url': 'https://www.harpodeon.com/preview/The_Smoking_Out_of_Bella_Butts/268068288', @@ -25,7 +25,7 @@ class HarpodeonIE(InfoExtractor): 'title': 'The Smoking Out of Bella Butts', 'description': 'md5:47e16bdb41fc8a79c83ab83af11c8b77', 'creator': 'Vitagraph Company of America', - 'release_date': '19150101' + 'release_year': 1915, } }, { 'url': 'https://www.harpodeon.com/preview/Behind_the_Screen/421838710', @@ -36,7 +36,7 @@ class HarpodeonIE(InfoExtractor): 'title': 'Behind the Screen', 'description': 'md5:008972a3dc51fba3965ee517d2ba9155', 'creator': 'Lone Star Corporation', - 'release_date': '19160101' + 'release_year': 1916, } }] @@ -66,5 +66,5 @@ class HarpodeonIE(InfoExtractor): 'http_headers': {'Referer': url}, 'description': self._html_search_meta('description', webpage, fatal=False), 'creator': creator, - 'release_date': unified_strdate(f'{release_year}0101') + 'release_year': int_or_none(release_year), } diff --git a/yt_dlp/extractor/monstercat.py b/yt_dlp/extractor/monstercat.py index 7f04825fcd..cf5e099691 100644 --- a/yt_dlp/extractor/monstercat.py +++ b/yt_dlp/extractor/monstercat.py @@ -24,7 +24,6 @@ class MonstercatIE(InfoExtractor): 'title': 'The Secret Language of Trees', 'id': '742779548009', 'thumbnail': 'https://www.monstercat.com/release/742779548009/cover', - 'release_year': 2023, 'release_date': '20230711', 'album': 'The Secret Language of Trees', 'album_artist': 'BT', @@ -71,7 +70,6 @@ class MonstercatIE(InfoExtractor): 'thumbnail': f'https://www.monstercat.com/release/{url_id}/cover', 'album_artist': try_call( lambda: get_element_by_class('h-normal text-uppercase mb-desktop-medium mb-smallish', html)), - 'release_year': int_or_none(date[:4]) if date else None, 'release_date': date, } diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index f6caf09708..73fe233eff 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2072,7 +2072,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'track': 'Voyeur Girl', 'album': 'it\'s too much love to know my dear', 'release_date': '20190313', - 'release_year': 2019, 'alt_title': 'Voyeur Girl', 'view_count': int, 'playable_in_embed': True, From 9cafb9ff17e14475a35c9a58b5bb010c86c9db4b Mon Sep 17 00:00:00 2001 From: kclauhk <78251477+kclauhk@users.noreply.github.com> Date: Sun, 26 Nov 2023 10:17:16 +0800 Subject: [PATCH 2/9] [ie/facebook] Improve subtitles extraction (#8296) Authored by: kclauhk --- yt_dlp/extractor/facebook.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index 50a750d3b1..58162cc5fc 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -16,6 +16,7 @@ from ..utils import ( determine_ext, error_to_compat_str, float_or_none, + format_field, get_element_by_id, get_first, int_or_none, @@ -420,6 +421,29 @@ class FacebookIE(InfoExtractor): r'data-sjs>({.*?ScheduledServerJS.*?})', webpage)] post = traverse_obj(post_data, ( ..., 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or [] + + automatic_captions, subtitles = {}, {} + subs_data = traverse_obj(post, (..., 'video', ..., 'attachments', ..., lambda k, v: ( + k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video'))) + is_video_broadcast = get_first(subs_data, 'is_video_broadcast', expected_type=bool) + captions = get_first(subs_data, 'video_available_captions_locales', 'captions_url') + if url_or_none(captions): # if subs_data only had a 'captions_url' + locale = self._html_search_meta(['og:locale', 'twitter:locale'], webpage, 'locale', default='en_US') + subtitles[locale] = [{'url': captions}] + # or else subs_data had 'video_available_captions_locales', a list of dicts + for caption in traverse_obj(captions, ( + {lambda x: sorted(x, key=lambda c: c['locale'])}, lambda _, v: v['captions_url']) + ): + lang = caption.get('localized_language') or '' + subs = { + 'url': caption['captions_url'], + 'name': format_field(caption, 'localized_country', f'{lang} (%s)', default=lang), + } + if caption.get('localized_creation_method') or is_video_broadcast: + automatic_captions.setdefault(caption['locale'], []).append(subs) + else: + subtitles.setdefault(caption['locale'], []).append(subs) + media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: ( k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict) title = get_first(media, ('title', 'text')) @@ -463,6 +487,8 @@ class FacebookIE(InfoExtractor): webpage, 'view count', default=None)), 'concurrent_view_count': get_first(post, ( ('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})), + 'automatic_captions': automatic_captions, + 'subtitles': subtitles, } info_json_ld = self._search_json_ld(webpage, video_id, default={}) From 628fa244bbce2ad39775a5959e99588f30cac152 Mon Sep 17 00:00:00 2001 From: sepro <4618135+seproDev@users.noreply.github.com> Date: Sun, 26 Nov 2023 03:20:10 +0100 Subject: [PATCH 3/9] [ie/floatplane] Add extractors (#8639) Closes #5877, Closes #5912 Authored by: seproDev --- yt_dlp/extractor/_extractors.py | 4 + yt_dlp/extractor/floatplane.py | 268 ++++++++++++++++++++++++++++++++ 2 files changed, 272 insertions(+) create mode 100644 yt_dlp/extractor/floatplane.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 128b86c1af..ad8c7d6611 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -642,6 +642,10 @@ from .filmweb import FilmwebIE from .firsttv import FirstTVIE from .fivetv import FiveTVIE from .flickr import FlickrIE +from .floatplane import ( + FloatplaneIE, + FloatplaneChannelIE, +) from .folketinget import FolketingetIE from .footyroom import FootyRoomIE from .formula1 import Formula1IE diff --git a/yt_dlp/extractor/floatplane.py b/yt_dlp/extractor/floatplane.py new file mode 100644 index 0000000000..09abb40bf6 --- /dev/null +++ b/yt_dlp/extractor/floatplane.py @@ -0,0 +1,268 @@ +import functools + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + OnDemandPagedList, + clean_html, + determine_ext, + format_field, + int_or_none, + join_nonempty, + parse_codecs, + parse_iso8601, + urljoin, +) +from ..utils.traversal import traverse_obj + + +class FloatplaneIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:www|beta)\.)?floatplane\.com/post/(?P\w+)' + _TESTS = [{ + 'url': 'https://www.floatplane.com/post/2Yf3UedF7C', + 'info_dict': { + 'id': 'yuleLogLTT', + 'ext': 'mp4', + 'display_id': '2Yf3UedF7C', + 'title': '8K Yule Log Fireplace with Crackling Fire Sounds - 10 Hours', + 'description': 'md5:adf2970e0de1c5e3df447818bb0309f6', + 'thumbnail': r're:^https?://.*\.jpe?g$', + 'duration': 36035, + 'comment_count': int, + 'like_count': int, + 'dislike_count': int, + 'release_date': '20191206', + 'release_timestamp': 1575657000, + 'uploader': 'LinusTechTips', + 'uploader_id': '59f94c0bdd241b70349eb72b', + 'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home', + 'channel': 'Linus Tech Tips', + 'channel_id': '63fe42c309e691e4e36de93d', + 'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/main', + 'availability': 'subscriber_only', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.floatplane.com/post/j2jqG3JmgJ', + 'info_dict': { + 'id': 'j2jqG3JmgJ', + 'title': 'TJM: Does Anyone Care About Avatar: The Way of Water?', + 'description': 'md5:00bf17dc5733e4031e99b7fd6489f274', + 'thumbnail': r're:^https?://.*\.jpe?g$', + 'comment_count': int, + 'like_count': int, + 'dislike_count': int, + 'release_timestamp': 1671915900, + 'release_date': '20221224', + 'uploader': 'LinusTechTips', + 'uploader_id': '59f94c0bdd241b70349eb72b', + 'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home', + 'channel': "They're Just Movies", + 'channel_id': '64135f82fc76ab7f9fbdc876', + 'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/tajm', + 'availability': 'subscriber_only', + }, + 'playlist_count': 2, + }, { + 'url': 'https://www.floatplane.com/post/3tK2tInhoN', + 'info_dict': { + 'id': '3tK2tInhoN', + 'title': 'Extras - How Linus Communicates with Editors (Compensator 4)', + 'description': 'md5:83cd40aae1ce124df33769600c80ca5b', + 'thumbnail': r're:^https?://.*\.jpe?g$', + 'comment_count': int, + 'like_count': int, + 'dislike_count': int, + 'release_timestamp': 1700529120, + 'release_date': '20231121', + 'uploader': 'LinusTechTips', + 'uploader_id': '59f94c0bdd241b70349eb72b', + 'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home', + 'channel': 'FP Exclusives', + 'channel_id': '6413623f5b12cca228a28e78', + 'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/fpexclusive', + 'availability': 'subscriber_only', + }, + 'playlist_count': 2, + }, { + 'url': 'https://beta.floatplane.com/post/d870PEFXS1', + 'info_dict': { + 'id': 'bg9SuYKEww', + 'ext': 'mp4', + 'display_id': 'd870PEFXS1', + 'title': 'LCS Drama, TLOU 2 Remaster, Destiny 2 Player Count Drops, + More!', + 'description': 'md5:80d612dcabf41b17487afcbe303ec57d', + 'thumbnail': r're:^https?://.*\.jpe?g$', + 'release_timestamp': 1700622000, + 'release_date': '20231122', + 'duration': 513, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'uploader': 'LinusTechTips', + 'uploader_id': '59f94c0bdd241b70349eb72b', + 'uploader_url': 'https://www.floatplane.com/channel/linustechtips/home', + 'channel': 'GameLinked', + 'channel_id': '649dbade3540dbc3945eeda7', + 'channel_url': 'https://www.floatplane.com/channel/linustechtips/home/gamelinked', + 'availability': 'subscriber_only', + }, + 'params': {'skip_download': 'm3u8'}, + }] + + def _real_initialize(self): + if not self._get_cookies('https://www.floatplane.com').get('sails.sid'): + self.raise_login_required() + + def _real_extract(self, url): + post_id = self._match_id(url) + + post_data = self._download_json( + 'https://www.floatplane.com/api/v3/content/post', post_id, query={'id': post_id}, + note='Downloading post data', errnote='Unable to download post data') + + if not any(traverse_obj(post_data, ('metadata', ('hasVideo', 'hasAudio')))): + raise ExtractorError('Post does not contain a video or audio track', expected=True) + + items = [] + for media in traverse_obj(post_data, (('videoAttachments', 'audioAttachments'), ...)): + media_id = media['id'] + media_typ = media.get('type') or 'video' + + metadata = self._download_json( + f'https://www.floatplane.com/api/v3/content/{media_typ}', media_id, query={'id': media_id}, + note=f'Downloading {media_typ} metadata') + + stream = self._download_json( + 'https://www.floatplane.com/api/v2/cdn/delivery', media_id, query={ + 'type': 'vod' if media_typ == 'video' else 'aod', + 'guid': metadata['guid'] + }, note=f'Downloading {media_typ} stream data') + + path_template = traverse_obj(stream, ('resource', 'uri', {str})) + + def format_path(params): + path = path_template + for i, val in (params or {}).items(): + path = path.replace(f'{{qualityLevelParams.{i}}}', val) + return path + + formats = [] + for quality in traverse_obj(stream, ('resource', 'data', 'qualityLevels', ...)): + url = urljoin(stream['cdn'], format_path(traverse_obj( + stream, ('resource', 'data', 'qualityLevelParams', quality['name'])))) + formats.append({ + **traverse_obj(quality, { + 'format_id': 'name', + 'format_note': 'label', + 'width': ('width', {int}), + 'height': ('height', {int}), + }), + **parse_codecs(quality.get('codecs')), + 'url': url, + 'ext': determine_ext(url.partition('/chunk.m3u8')[0], 'mp4'), + }) + + items.append({ + 'id': media_id, + **traverse_obj(metadata, { + 'title': 'title', + 'duration': ('duration', {int_or_none}), + 'thumbnail': ('thumbnail', 'path'), + }), + 'formats': formats, + }) + + uploader_url = format_field(traverse_obj( + post_data, 'creator'), 'urlname', 'https://www.floatplane.com/channel/%s/home', default=None) + channel_url = urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname'))) + + post_info = { + 'id': post_id, + 'display_id': post_id, + **traverse_obj(post_data, { + 'title': 'title', + 'description': ('text', {clean_html}), + 'uploader': ('creator', 'title'), + 'uploader_id': ('creator', 'id'), + 'channel': ('channel', 'title'), + 'channel_id': ('channel', 'id'), + 'like_count': ('likes', {int_or_none}), + 'dislike_count': ('dislikes', {int_or_none}), + 'comment_count': ('comments', {int_or_none}), + 'release_timestamp': ('releaseDate', {parse_iso8601}), + 'thumbnail': ('thumbnail', 'path'), + }), + 'uploader_url': uploader_url, + 'channel_url': channel_url, + 'availability': self._availability(needs_subscription=True), + } + + if len(items) > 1: + return self.playlist_result(items, **post_info) + + post_info.update(items[0]) + return post_info + + +class FloatplaneChannelIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:www|beta)\.)?floatplane\.com/channel/(?P[\w-]+)/home(?:/(?P[\w-]+))?' + _PAGE_SIZE = 20 + _TESTS = [{ + 'url': 'https://www.floatplane.com/channel/linustechtips/home/ltxexpo', + 'info_dict': { + 'id': 'linustechtips/ltxexpo', + 'title': 'LTX Expo', + 'description': 'md5:9819002f9ebe7fd7c75a3a1d38a59149', + }, + 'playlist_mincount': 51, + }, { + 'url': 'https://www.floatplane.com/channel/ShankMods/home', + 'info_dict': { + 'id': 'ShankMods', + 'title': 'Shank Mods', + 'description': 'md5:6dff1bb07cad8e5448e04daad9be1b30', + }, + 'playlist_mincount': 14, + }, { + 'url': 'https://beta.floatplane.com/channel/bitwit_ultra/home', + 'info_dict': { + 'id': 'bitwit_ultra', + 'title': 'Bitwit Ultra', + 'description': 'md5:1452f280bb45962976d4789200f676dd', + }, + 'playlist_mincount': 200, + }] + + def _fetch_page(self, display_id, creator_id, channel_id, page): + query = { + 'id': creator_id, + 'limit': self._PAGE_SIZE, + 'fetchAfter': page * self._PAGE_SIZE, + } + if channel_id: + query['channel'] = channel_id + page_data = self._download_json( + 'https://www.floatplane.com/api/v3/content/creator', display_id, + query=query, note=f'Downloading page {page + 1}') + for post in page_data or []: + yield self.url_result( + f'https://www.floatplane.com/post/{post["id"]}', + ie=FloatplaneIE, video_id=post['id'], video_title=post.get('title'), + release_timestamp=parse_iso8601(post.get('releaseDate'))) + + def _real_extract(self, url): + creator, channel = self._match_valid_url(url).group('id', 'channel') + display_id = join_nonempty(creator, channel, delim='/') + + creator_data = self._download_json( + 'https://www.floatplane.com/api/v3/creator/named', + display_id, query={'creatorURL[0]': creator})[0] + + channel_data = traverse_obj( + creator_data, ('channels', lambda _, v: v['urlname'] == channel), get_all=False) or {} + + return self.playlist_result(OnDemandPagedList(functools.partial( + self._fetch_page, display_id, creator_data['id'], channel_data.get('id')), self._PAGE_SIZE), + display_id, playlist_title=channel_data.get('title') or creator_data.get('title'), + playlist_description=channel_data.get('about') or creator_data.get('about')) From bb5a54e6db2422bbd155d93a0e105b6616c09467 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 25 Nov 2023 20:21:29 -0600 Subject: [PATCH 4/9] [ie/youtube] Improve detection of faulty HLS formats (#8646) Closes #7747 Authored by: bashonly --- yt_dlp/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 73fe233eff..449d9d1a56 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -4562,7 +4562,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'): # Newly uploaded videos' HLS formats are potentially problematic and need to be checked upload_datetime = datetime_from_str(upload_date).replace(tzinfo=datetime.timezone.utc) - if upload_datetime >= datetime_from_str('today-1day'): + if upload_datetime >= datetime_from_str('today-2days'): for fmt in info['formats']: if fmt.get('protocol') == 'm3u8_native': fmt['__needs_testing'] = True From deeb13eae82e60f82a2c0c5861f460399a997528 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 25 Nov 2023 20:40:09 -0600 Subject: [PATCH 5/9] [pp/FFmpegMetadata] Embed stream metadata in single format downloads (#8647) Closes #8568 Authored by: bashonly --- yt_dlp/postprocessor/ffmpeg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 323f4303c0..7c904417ba 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -780,7 +780,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor): yield ('-metadata', f'{name}={value}') stream_idx = 0 - for fmt in info.get('requested_formats') or []: + for fmt in info.get('requested_formats') or [info]: stream_count = 2 if 'none' not in (fmt.get('vcodec'), fmt.get('acodec')) else 1 lang = ISO639Utils.short2long(fmt.get('language') or '') or fmt.get('language') for i in range(stream_idx, stream_idx + stream_count): From ff2fde1b8f922fd34bae6172602008cd67c07c93 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 25 Nov 2023 20:47:48 -0600 Subject: [PATCH 6/9] [ie/TwitCastingUser] Fix extraction (#8650) Closes #8653 Authored by: bashonly --- yt_dlp/extractor/twitcasting.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/yt_dlp/extractor/twitcasting.py b/yt_dlp/extractor/twitcasting.py index 3c303bdbbd..28ea16cc21 100644 --- a/yt_dlp/extractor/twitcasting.py +++ b/yt_dlp/extractor/twitcasting.py @@ -289,8 +289,7 @@ class TwitCastingUserIE(InfoExtractor): webpage = self._download_webpage( next_url, uploader_id, query={'filter': 'watchable'}, note='Downloading page %d' % page_num) matches = re.finditer( - r'''(?isx)/[^/]+/movie/\d+)"\s*>.+?''', - webpage) + r'(?s)/[^/"]+/movie/\d+)"', webpage) for mobj in matches: yield self.url_result(urljoin(base_url, mobj.group('url'))) From 4903f452b68efb62dadf22e81be8c7934fc743e7 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 25 Nov 2023 20:49:18 -0600 Subject: [PATCH 7/9] [ie/bfmtv] Fix extractors (#8651) Closes #8425 Authored by: bashonly --- yt_dlp/extractor/bfmtv.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/bfmtv.py b/yt_dlp/extractor/bfmtv.py index a7be0e67de..5d0c73ff32 100644 --- a/yt_dlp/extractor/bfmtv.py +++ b/yt_dlp/extractor/bfmtv.py @@ -7,7 +7,7 @@ from ..utils import extract_attributes class BFMTVBaseIE(InfoExtractor): _VALID_URL_BASE = r'https?://(?:www\.|rmc\.)?bfmtv\.com/' _VALID_URL_TMPL = _VALID_URL_BASE + r'(?:[^/]+/)*[^/?&#]+_%s[A-Z]-(?P\d{12})\.html' - _VIDEO_BLOCK_REGEX = r'(]+class="video_block"[^>]*>)' + _VIDEO_BLOCK_REGEX = r'(]+class="video_block[^"]*"[^>]*>)' BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s' def _brightcove_url_result(self, video_id, video_block): @@ -55,8 +55,11 @@ class BFMTVLiveIE(BFMTVIE): # XXX: Do not subclass from concrete IE 'ext': 'mp4', 'title': r're:^le direct BFMTV WEB \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 'uploader_id': '876450610001', - 'upload_date': '20171018', - 'timestamp': 1508329950, + 'upload_date': '20220926', + 'timestamp': 1664207191, + 'live_status': 'is_live', + 'thumbnail': r're:https://.+/image\.jpg', + 'tags': [], }, 'params': { 'skip_download': True, From 5a230233d6fce06f4abd1fce0dc92b948e6f780b Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 25 Nov 2023 20:50:23 -0600 Subject: [PATCH 8/9] [ie/box] Fix formats extraction (#8649) Closes #5098 Authored by: bashonly --- yt_dlp/extractor/box.py | 41 ++++++++++++++++------------------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/yt_dlp/extractor/box.py b/yt_dlp/extractor/box.py index 8ab149626b..7281b3c6a6 100644 --- a/yt_dlp/extractor/box.py +++ b/yt_dlp/extractor/box.py @@ -1,16 +1,17 @@ import json +import urllib.parse from .common import InfoExtractor from ..utils import ( - determine_ext, parse_iso8601, - # try_get, update_url_query, + url_or_none, ) +from ..utils.traversal import traverse_obj class BoxIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P[^/]+)/file/(?P\d+)' + _VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P[^/?#]+)/file/(?P\d+)' _TEST = { 'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538', 'md5': '1f81b2fd3960f38a40a3b8823e5fcd43', @@ -18,11 +19,12 @@ class BoxIE(InfoExtractor): 'id': '510727257538', 'ext': 'mp4', 'title': 'Garber St. Louis will be 28th MLS team +scarving.mp4', - 'uploader': 'MLS Video', + 'uploader': '', 'timestamp': 1566320259, 'upload_date': '20190820', 'uploader_id': '235196876', - } + }, + 'params': {'skip_download': 'dash fragment too small'}, } def _real_extract(self, url): @@ -58,26 +60,15 @@ class BoxIE(InfoExtractor): formats = [] - # for entry in (try_get(f, lambda x: x['representations']['entries'], list) or []): - # entry_url_template = try_get( - # entry, lambda x: x['content']['url_template']) - # if not entry_url_template: - # continue - # representation = entry.get('representation') - # if representation == 'dash': - # TODO: append query to every fragment URL - # formats.extend(self._extract_mpd_formats( - # entry_url_template.replace('{+asset_path}', 'manifest.mpd'), - # file_id, query=query)) - - authenticated_download_url = f.get('authenticated_download_url') - if authenticated_download_url and f.get('is_download_available'): - formats.append({ - 'ext': f.get('extension') or determine_ext(title), - 'filesize': f.get('size'), - 'format_id': 'download', - 'url': update_url_query(authenticated_download_url, query), - }) + for url_tmpl in traverse_obj(f, ( + 'representations', 'entries', lambda _, v: v['representation'] == 'dash', + 'content', 'url_template', {url_or_none} + )): + manifest_url = update_url_query(url_tmpl.replace('{+asset_path}', 'manifest.mpd'), query) + fmts = self._extract_mpd_formats(manifest_url, file_id) + for fmt in fmts: + fmt['extra_param_to_segment_url'] = urllib.parse.urlparse(manifest_url).query + formats.extend(fmts) creator = f.get('created_by') or {} From 9751a457cfdb18bf99d9ee0d10e4e6a594502bbf Mon Sep 17 00:00:00 2001 From: sepro <4618135+seproDev@users.noreply.github.com> Date: Sun, 26 Nov 2023 04:09:59 +0100 Subject: [PATCH 9/9] [cleanup] Remove dead extractors (#8604) Closes #1609, Closes #3232, Closes #4763, Closes #6026, Closes #6322, Closes #7912 Authored by: seproDev --- yt_dlp/extractor/_extractors.py | 156 +------ yt_dlp/extractor/airmozilla.py | 63 --- yt_dlp/extractor/aol.py | 1 + yt_dlp/extractor/atttechchannel.py | 53 --- yt_dlp/extractor/behindkink.py | 1 + yt_dlp/extractor/bet.py | 3 +- yt_dlp/extractor/bfi.py | 1 + yt_dlp/extractor/biqle.py | 110 ----- yt_dlp/extractor/bitwave.py | 58 --- yt_dlp/extractor/bleacherreport.py | 4 +- yt_dlp/extractor/br.py | 144 +------ yt_dlp/extractor/breakcom.py | 86 ---- yt_dlp/extractor/byutv.py | 16 +- yt_dlp/extractor/camwithher.py | 87 ---- yt_dlp/extractor/carambatv.py | 105 ----- yt_dlp/extractor/channel9.py | 252 ----------- yt_dlp/extractor/chirbit.py | 88 ---- yt_dlp/extractor/cinchcast.py | 56 --- yt_dlp/extractor/clipsyndicate.py | 52 --- yt_dlp/extractor/cloudy.py | 57 --- yt_dlp/extractor/clubic.py | 1 + yt_dlp/extractor/cmt.py | 1 + yt_dlp/extractor/daftsex.py | 150 ------- yt_dlp/extractor/defense.py | 37 -- yt_dlp/extractor/dhm.py | 1 + yt_dlp/extractor/dotsub.py | 81 ---- yt_dlp/extractor/echomsk.py | 43 -- yt_dlp/extractor/ehow.py | 36 -- yt_dlp/extractor/elevensports.py | 59 --- yt_dlp/extractor/ellentube.py | 130 ------ yt_dlp/extractor/engadget.py | 15 - yt_dlp/extractor/escapist.py | 108 ----- yt_dlp/extractor/esri.py | 70 --- yt_dlp/extractor/expotv.py | 74 ---- yt_dlp/extractor/extremetube.py | 48 --- yt_dlp/extractor/fourzerostudio.py | 106 ----- yt_dlp/extractor/foxgay.py | 58 --- yt_dlp/extractor/fusion.py | 81 ---- yt_dlp/extractor/generic.py | 61 +-- yt_dlp/extractor/gfycat.py | 145 ------- yt_dlp/extractor/groupon.py | 1 - yt_dlp/extractor/helsinki.py | 38 -- yt_dlp/extractor/hitbox.py | 209 --------- yt_dlp/extractor/howcast.py | 41 -- yt_dlp/extractor/howstuffworks.py | 86 ---- yt_dlp/extractor/keezmovies.py | 125 ------ yt_dlp/extractor/kinja.py | 7 - yt_dlp/extractor/laola1tv.py | 261 ------------ yt_dlp/extractor/linuxacademy.py | 238 ----------- yt_dlp/extractor/m6.py | 22 - yt_dlp/extractor/meta.py | 70 --- yt_dlp/extractor/metacafe.py | 281 ------------ yt_dlp/extractor/mgoon.py | 81 ---- yt_dlp/extractor/miomio.py | 134 ------ yt_dlp/extractor/mnet.py | 85 ---- yt_dlp/extractor/moevideo.py | 74 ---- yt_dlp/extractor/mofosex.py | 70 --- yt_dlp/extractor/movieclips.py | 47 -- yt_dlp/extractor/msn.py | 1 + yt_dlp/extractor/mwave.py | 87 ---- yt_dlp/extractor/mychannels.py | 35 -- yt_dlp/extractor/myvi.py | 100 ----- yt_dlp/extractor/newstube.py | 75 ---- yt_dlp/extractor/nick.py | 20 - yt_dlp/extractor/normalboots.py | 51 --- yt_dlp/extractor/nosvideo.py | 72 ---- yt_dlp/extractor/nrl.py | 1 + yt_dlp/extractor/ooyala.py | 230 ---------- yt_dlp/extractor/pandoratv.py | 128 ------ yt_dlp/extractor/people.py | 29 -- yt_dlp/extractor/playfm.py | 70 --- yt_dlp/extractor/plays.py | 49 --- yt_dlp/extractor/playvid.py | 90 ---- yt_dlp/extractor/porncom.py | 99 ----- yt_dlp/extractor/pornez.py | 60 --- yt_dlp/extractor/pornhd.py | 116 ----- yt_dlp/extractor/radiobremen.py | 59 --- yt_dlp/extractor/recurbate.py | 42 -- yt_dlp/extractor/rice.py | 112 ----- yt_dlp/extractor/rtl2.py | 100 +---- yt_dlp/extractor/rtvnh.py | 58 --- yt_dlp/extractor/ruhd.py | 42 -- yt_dlp/extractor/scte.py | 2 + yt_dlp/extractor/shared.py | 138 ------ yt_dlp/extractor/sky.py | 33 +- yt_dlp/extractor/spankwire.py | 174 -------- yt_dlp/extractor/srmediathek.py | 1 + yt_dlp/extractor/streamcloud.py | 75 ---- yt_dlp/extractor/swrmediathek.py | 111 ----- yt_dlp/extractor/techtalks.py | 80 ---- yt_dlp/extractor/telecinco.py | 1 - yt_dlp/extractor/tinypic.py | 54 --- yt_dlp/extractor/tokentube.py | 153 ------- yt_dlp/extractor/toypics.py | 2 + yt_dlp/extractor/trilulilu.py | 100 ----- yt_dlp/extractor/tube8.py | 95 ++++- yt_dlp/extractor/tunepk.py | 87 ---- yt_dlp/extractor/tvnet.py | 138 ------ yt_dlp/extractor/tvnow.py | 639 ---------------------------- yt_dlp/extractor/twentyfourvideo.py | 128 ------ yt_dlp/extractor/unscripted.py | 53 --- yt_dlp/extractor/veehd.py | 116 ----- yt_dlp/extractor/vice.py | 6 - yt_dlp/extractor/vidbit.py | 82 ---- yt_dlp/extractor/vimple.py | 58 --- yt_dlp/extractor/vodlocker.py | 73 ---- yt_dlp/extractor/voicerepublic.py | 59 --- yt_dlp/extractor/voot.py | 2 + yt_dlp/extractor/voxmedia.py | 8 +- yt_dlp/extractor/vrak.py | 77 ---- yt_dlp/extractor/vrv.py | 269 ------------ yt_dlp/extractor/vshare.py | 57 --- yt_dlp/extractor/vupload.py | 52 --- yt_dlp/extractor/vyborymos.py | 52 --- yt_dlp/extractor/vzaar.py | 100 ----- yt_dlp/extractor/wakanim.py | 75 ---- yt_dlp/extractor/watchbox.py | 153 ------- yt_dlp/extractor/watchindianporn.py | 65 --- yt_dlp/extractor/willow.py | 56 --- yt_dlp/extractor/xbef.py | 42 -- yt_dlp/extractor/xtube.py | 214 ---------- yt_dlp/extractor/xuite.py | 149 ------- yt_dlp/extractor/yesjapan.py | 56 --- yt_dlp/extractor/yinyuetai.py | 52 --- yt_dlp/extractor/ynet.py | 48 --- 125 files changed, 129 insertions(+), 10150 deletions(-) delete mode 100644 yt_dlp/extractor/airmozilla.py delete mode 100644 yt_dlp/extractor/atttechchannel.py delete mode 100644 yt_dlp/extractor/biqle.py delete mode 100644 yt_dlp/extractor/bitwave.py delete mode 100644 yt_dlp/extractor/breakcom.py delete mode 100644 yt_dlp/extractor/camwithher.py delete mode 100644 yt_dlp/extractor/carambatv.py delete mode 100644 yt_dlp/extractor/channel9.py delete mode 100644 yt_dlp/extractor/chirbit.py delete mode 100644 yt_dlp/extractor/cinchcast.py delete mode 100644 yt_dlp/extractor/clipsyndicate.py delete mode 100644 yt_dlp/extractor/cloudy.py delete mode 100644 yt_dlp/extractor/daftsex.py delete mode 100644 yt_dlp/extractor/defense.py delete mode 100644 yt_dlp/extractor/dotsub.py delete mode 100644 yt_dlp/extractor/echomsk.py delete mode 100644 yt_dlp/extractor/ehow.py delete mode 100644 yt_dlp/extractor/elevensports.py delete mode 100644 yt_dlp/extractor/ellentube.py delete mode 100644 yt_dlp/extractor/engadget.py delete mode 100644 yt_dlp/extractor/escapist.py delete mode 100644 yt_dlp/extractor/esri.py delete mode 100644 yt_dlp/extractor/expotv.py delete mode 100644 yt_dlp/extractor/extremetube.py delete mode 100644 yt_dlp/extractor/fourzerostudio.py delete mode 100644 yt_dlp/extractor/foxgay.py delete mode 100644 yt_dlp/extractor/fusion.py delete mode 100644 yt_dlp/extractor/gfycat.py delete mode 100644 yt_dlp/extractor/helsinki.py delete mode 100644 yt_dlp/extractor/hitbox.py delete mode 100644 yt_dlp/extractor/howcast.py delete mode 100644 yt_dlp/extractor/howstuffworks.py delete mode 100644 yt_dlp/extractor/keezmovies.py delete mode 100644 yt_dlp/extractor/laola1tv.py delete mode 100644 yt_dlp/extractor/linuxacademy.py delete mode 100644 yt_dlp/extractor/m6.py delete mode 100644 yt_dlp/extractor/meta.py delete mode 100644 yt_dlp/extractor/metacafe.py delete mode 100644 yt_dlp/extractor/mgoon.py delete mode 100644 yt_dlp/extractor/miomio.py delete mode 100644 yt_dlp/extractor/mnet.py delete mode 100644 yt_dlp/extractor/moevideo.py delete mode 100644 yt_dlp/extractor/mofosex.py delete mode 100644 yt_dlp/extractor/movieclips.py delete mode 100644 yt_dlp/extractor/mwave.py delete mode 100644 yt_dlp/extractor/mychannels.py delete mode 100644 yt_dlp/extractor/myvi.py delete mode 100644 yt_dlp/extractor/newstube.py delete mode 100644 yt_dlp/extractor/normalboots.py delete mode 100644 yt_dlp/extractor/nosvideo.py delete mode 100644 yt_dlp/extractor/ooyala.py delete mode 100644 yt_dlp/extractor/pandoratv.py delete mode 100644 yt_dlp/extractor/people.py delete mode 100644 yt_dlp/extractor/playfm.py delete mode 100644 yt_dlp/extractor/plays.py delete mode 100644 yt_dlp/extractor/playvid.py delete mode 100644 yt_dlp/extractor/porncom.py delete mode 100644 yt_dlp/extractor/pornez.py delete mode 100644 yt_dlp/extractor/pornhd.py delete mode 100644 yt_dlp/extractor/radiobremen.py delete mode 100644 yt_dlp/extractor/recurbate.py delete mode 100644 yt_dlp/extractor/rice.py delete mode 100644 yt_dlp/extractor/rtvnh.py delete mode 100644 yt_dlp/extractor/ruhd.py delete mode 100644 yt_dlp/extractor/shared.py delete mode 100644 yt_dlp/extractor/spankwire.py delete mode 100644 yt_dlp/extractor/streamcloud.py delete mode 100644 yt_dlp/extractor/swrmediathek.py delete mode 100644 yt_dlp/extractor/techtalks.py delete mode 100644 yt_dlp/extractor/tinypic.py delete mode 100644 yt_dlp/extractor/tokentube.py delete mode 100644 yt_dlp/extractor/trilulilu.py delete mode 100644 yt_dlp/extractor/tunepk.py delete mode 100644 yt_dlp/extractor/tvnet.py delete mode 100644 yt_dlp/extractor/tvnow.py delete mode 100644 yt_dlp/extractor/twentyfourvideo.py delete mode 100644 yt_dlp/extractor/unscripted.py delete mode 100644 yt_dlp/extractor/veehd.py delete mode 100644 yt_dlp/extractor/vidbit.py delete mode 100644 yt_dlp/extractor/vimple.py delete mode 100644 yt_dlp/extractor/vodlocker.py delete mode 100644 yt_dlp/extractor/voicerepublic.py delete mode 100644 yt_dlp/extractor/vrak.py delete mode 100644 yt_dlp/extractor/vrv.py delete mode 100644 yt_dlp/extractor/vshare.py delete mode 100644 yt_dlp/extractor/vupload.py delete mode 100644 yt_dlp/extractor/vyborymos.py delete mode 100644 yt_dlp/extractor/vzaar.py delete mode 100644 yt_dlp/extractor/wakanim.py delete mode 100644 yt_dlp/extractor/watchbox.py delete mode 100644 yt_dlp/extractor/watchindianporn.py delete mode 100644 yt_dlp/extractor/willow.py delete mode 100644 yt_dlp/extractor/xbef.py delete mode 100644 yt_dlp/extractor/xtube.py delete mode 100644 yt_dlp/extractor/xuite.py delete mode 100644 yt_dlp/extractor/yesjapan.py delete mode 100644 yt_dlp/extractor/yinyuetai.py delete mode 100644 yt_dlp/extractor/ynet.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index ad8c7d6611..9b96bd5b45 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -77,7 +77,6 @@ from .agora import ( WyborczaPodcastIE, WyborczaVideoIE, ) -from .airmozilla import AirMozillaIE from .airtv import AirTVIE from .aitube import AitubeKZVideoIE from .aljazeera import AlJazeeraIE @@ -147,7 +146,6 @@ from .arte import ( from .arnes import ArnesIE from .atresplayer import AtresPlayerIE from .atscaleconf import AtScaleConfEventIE -from .atttechchannel import ATTTechChannelIE from .atvat import ATVAtIE from .audimedia import AudiMediaIE from .audioboom import AudioBoomIE @@ -243,11 +241,6 @@ from .bitchute import ( BitChuteIE, BitChuteChannelIE, ) -from .bitwave import ( - BitwaveReplayIE, - BitwaveStreamIE, -) -from .biqle import BIQLEIE from .blackboardcollaborate import BlackboardCollaborateIE from .bleacherreport import ( BleacherReportIE, @@ -262,10 +255,7 @@ from .bostonglobe import BostonGlobeIE from .box import BoxIE from .boxcast import BoxCastVideoIE from .bpb import BpbIE -from .br import ( - BRIE, - BRMediathekIE, -) +from .br import BRIE from .bravotv import BravoTVIE from .brainpop import ( BrainPOPIE, @@ -275,7 +265,6 @@ from .brainpop import ( BrainPOPFrIE, BrainPOPIlIE, ) -from .breakcom import BreakIE from .breitbart import BreitBartIE from .brightcove import ( BrightcoveLegacyIE, @@ -305,16 +294,11 @@ from .camfm import ( from .cammodels import CamModelsIE from .camsoda import CamsodaIE from .camtasia import CamtasiaEmbedIE -from .camwithher import CamWithHerIE from .canal1 import Canal1IE from .canalalpha import CanalAlphaIE from .canalplus import CanalplusIE from .canalc2 import Canalc2IE from .caracoltv import CaracolTvPlayIE -from .carambatv import ( - CarambaTVIE, - CarambaTVPageIE, -) from .cartoonnetwork import CartoonNetworkIE from .cbc import ( CBCIE, @@ -353,7 +337,6 @@ from .cda import CDAIE from .cellebrite import CellebriteIE from .ceskatelevize import CeskaTelevizeIE from .cgtn import CGTNIE -from .channel9 import Channel9IE from .charlierose import CharlieRoseIE from .chaturbate import ChaturbateIE from .chilloutzone import ChilloutzoneIE @@ -361,11 +344,6 @@ from .chingari import ( ChingariIE, ChingariUserIE, ) -from .chirbit import ( - ChirbitIE, - ChirbitProfileIE, -) -from .cinchcast import CinchcastIE from .cinemax import CinemaxIE from .cinetecamilano import CinetecaMilanoIE from .cineverse import ( @@ -382,10 +360,8 @@ from .clipchamp import ClipchampIE from .cliphunter import CliphunterIE from .clippit import ClippitIE from .cliprs import ClipRsIE -from .clipsyndicate import ClipsyndicateIE from .closertotruth import CloserToTruthIE from .cloudflarestream import CloudflareStreamIE -from .cloudy import CloudyIE from .clubic import ClubicIE from .clyp import ClypIE from .cmt import CMTIE @@ -452,7 +428,6 @@ from .dacast import ( DacastVODIE, DacastPlaylistIE, ) -from .daftsex import DaftsexIE from .dailymail import DailyMailIE from .dailymotion import ( DailymotionIE, @@ -489,7 +464,6 @@ from .dlf import ( from .dfb import DFBIE from .dhm import DHMIE from .digg import DiggIE -from .dotsub import DotsubIE from .douyutv import ( DouyuShowIE, DouyuTVIE, @@ -536,7 +510,6 @@ from .duboku import ( DubokuPlaylistIE ) from .dumpert import DumpertIE -from .defense import DefenseGouvFrIE from .deuxm import ( DeuxMIE, DeuxMNewsIE @@ -559,26 +532,17 @@ from .dw import ( from .eagleplatform import EaglePlatformIE, ClipYouEmbedIE from .ebaumsworld import EbaumsWorldIE from .ebay import EbayIE -from .echomsk import EchoMskIE from .egghead import ( EggheadCourseIE, EggheadLessonIE, ) -from .ehow import EHowIE from .eighttracks import EightTracksIE from .einthusan import EinthusanIE from .eitb import EitbIE -from .elevensports import ElevenSportsIE -from .ellentube import ( - EllenTubeIE, - EllenTubeVideoIE, - EllenTubePlaylistIE, -) from .elonet import ElonetIE from .elpais import ElPaisIE from .eltrecetv import ElTreceTVIE from .embedly import EmbedlyIE -from .engadget import EngadgetIE from .epicon import ( EpiconIE, EpiconSeriesIE, @@ -596,7 +560,6 @@ from .ertgr import ( ERTFlixIE, ERTWebtvEmbedIE, ) -from .escapist import EscapistIE from .espn import ( ESPNIE, WatchESPNIE, @@ -604,15 +567,12 @@ from .espn import ( FiveThirtyEightIE, ESPNCricInfoIE, ) -from .esri import EsriVideoIE from .ettutv import EttuTvIE from .europa import EuropaIE, EuroParlWebstreamIE from .europeantour import EuropeanTourIE from .eurosport import EurosportIE from .euscreen import EUScreenIE -from .expotv import ExpoTVIE from .expressen import ExpressenIE -from .extremetube import ExtremeTubeIE from .eyedotv import EyedoTVIE from .facebook import ( FacebookIE, @@ -655,16 +615,11 @@ from .fourtube import ( PornerBrosIE, FuxIE, ) -from .fourzerostudio import ( - FourZeroStudioArchiveIE, - FourZeroStudioClipIE, -) from .fox import FOXIE from .fox9 import ( FOX9IE, FOX9NewsIE, ) -from .foxgay import FoxgayIE from .foxnews import ( FoxNewsIE, FoxNewsArticleIE, @@ -697,7 +652,6 @@ from .funimation import ( ) from .funk import FunkIE from .funker530 import Funker530IE -from .fusion import FusionIE from .fuyintv import FuyinTVIE from .gab import ( GabTVIE, @@ -728,7 +682,6 @@ from .gettr import ( GettrIE, GettrStreamingIE, ) -from .gfycat import GfycatIE from .giantbomb import GiantBombIE from .giga import GigaIE from .glide import GlideIE @@ -774,12 +727,10 @@ from .hbo import HBOIE from .hearthisat import HearThisAtIE from .heise import HeiseIE from .hellporno import HellPornoIE -from .helsinki import HelsinkiIE from .hgtv import HGTVComShowIE from .hketv import HKETVIE from .hidive import HiDiveIE from .historicfilms import HistoricFilmsIE -from .hitbox import HitboxIE, HitboxLiveIE from .hitrecord import HitRecordIE from .hollywoodreporter import ( HollywoodReporterIE, @@ -794,8 +745,6 @@ from .hotstar import ( HotStarSeasonIE, HotStarSeriesIE, ) -from .howcast import HowcastIE -from .howstuffworks import HowStuffWorksIE from .hrefli import HrefLiRedirectIE from .hrfensehen import HRFernsehenIE from .hrti import ( @@ -927,7 +876,6 @@ from .kanal2 import Kanal2IE from .kankanews import KankaNewsIE from .karaoketv import KaraoketvIE from .karrierevideos import KarriereVideosIE -from .keezmovies import KeezMoviesIE from .kelbyone import KelbyOneIE from .khanacademy import ( KhanAcademyIE, @@ -962,12 +910,6 @@ from .la7 import ( LA7PodcastEpisodeIE, LA7PodcastIE, ) -from .laola1tv import ( - Laola1TvEmbedIE, - Laola1TvIE, - EHFTVIE, - ITTFIE, -) from .lastfm import ( LastFMIE, LastFMPlaylistIE, @@ -1022,7 +964,6 @@ from .linkedin import ( LinkedInLearningIE, LinkedInLearningCourseIE, ) -from .linuxacademy import LinuxAcademyIE from .liputan6 import Liputan6IE from .listennotes import ListenNotesIE from .litv import LiTVIE @@ -1050,7 +991,6 @@ from .lynda import ( LyndaIE, LyndaCourseIE ) -from .m6 import M6IE from .magellantv import MagellanTVIE from .magentamusik360 import MagentaMusik360IE from .mailru import ( @@ -1101,10 +1041,7 @@ from .medici import MediciIE from .megaphone import MegaphoneIE from .meipai import MeipaiIE from .melonvod import MelonVODIE -from .meta import METAIE -from .metacafe import MetacafeIE from .metacritic import MetacriticIE -from .mgoon import MgoonIE from .mgtv import MGTVIE from .miaopai import MiaoPaiIE from .microsoftstream import MicrosoftStreamIE @@ -1126,7 +1063,6 @@ from .minds import ( ) from .ministrygrid import MinistryGridIE from .minoto import MinotoIE -from .miomio import MioMioIE from .mirrativ import ( MirrativIE, MirrativUserIE, @@ -1150,13 +1086,7 @@ from .mlb import ( MLBArticleIE, ) from .mlssoccer import MLSSoccerIE -from .mnet import MnetIE from .mocha import MochaVideoIE -from .moevideo import MoeVideoIE -from .mofosex import ( - MofosexIE, - MofosexEmbedIE, -) from .mojvideo import MojvideoIE from .monstercat import MonstercatIE from .morningstar import MorningstarIE @@ -1166,7 +1096,6 @@ from .motherless import ( MotherlessGalleryIE, ) from .motorsport import MotorsportIE -from .movieclips import MovieClipsIE from .moviepilot import MoviepilotIE from .moview import MoviewPlayIE from .moviezine import MoviezineIE @@ -1191,18 +1120,12 @@ from .musicdex import ( MusicdexArtistIE, MusicdexPlaylistIE, ) -from .mwave import MwaveIE, MwaveMeetGreetIE from .mxplayer import ( MxplayerIE, MxplayerShowIE, ) -from .mychannels import MyChannelsIE from .myspace import MySpaceIE, MySpaceAlbumIE from .myspass import MySpassIE -from .myvi import ( - MyviIE, - MyviEmbedIE, -) from .myvideoge import MyVideoGeIE from .myvidster import MyVidsterIE from .mzaalo import MzaaloIE @@ -1278,7 +1201,6 @@ from .newgrounds import ( NewgroundsUserIE, ) from .newspicks import NewsPicksIE -from .newstube import NewstubeIE from .newsy import NewsyIE from .nextmedia import ( NextMediaIE, @@ -1313,7 +1235,6 @@ from .nick import ( NickIE, NickBrIE, NickDeIE, - NickNightIE, NickRuIE, ) from .niconico import ( @@ -1346,8 +1267,6 @@ from .noice import NoicePodcastIE from .nonktube import NonkTubeIE from .noodlemagazine import NoodleMagazineIE from .noovo import NoovoIE -from .normalboots import NormalbootsIE -from .nosvideo import NosVideoIE from .nosnl import NOSNLArticleIE from .nova import ( NovaEmbedIE, @@ -1422,10 +1341,6 @@ from .onet import ( OnetPlIE, ) from .onionstudios import OnionStudiosIE -from .ooyala import ( - OoyalaIE, - OoyalaExternalIE, -) from .opencast import ( OpencastIE, OpencastPlaylistIE, @@ -1454,7 +1369,6 @@ from .palcomp3 import ( PalcoMP3ArtistIE, PalcoMP3VideoIE, ) -from .pandoratv import PandoraTVIE from .panopto import ( PanoptoIE, PanoptoListIE, @@ -1482,7 +1396,6 @@ from .peloton import ( PelotonIE, PelotonLiveIE ) -from .people import PeopleIE from .performgroup import PerformGroupIE from .periscope import ( PeriscopeIE, @@ -1514,13 +1427,10 @@ from .platzi import ( PlatziIE, PlatziCourseIE, ) -from .playfm import PlayFMIE from .playplustv import PlayPlusTVIE -from .plays import PlaysTVIE from .playstuff import PlayStuffIE from .playsuisse import PlaySuisseIE from .playtvak import PlaytvakIE -from .playvid import PlayvidIE from .playwire import PlaywireIE from .plutotv import PlutoTVIE from .pluralsight import ( @@ -1552,9 +1462,7 @@ from .popcorntimes import PopcorntimesIE from .popcorntv import PopcornTVIE from .porn91 import Porn91IE from .pornbox import PornboxIE -from .porncom import PornComIE from .pornflip import PornFlipIE -from .pornhd import PornHdIE from .pornhub import ( PornHubIE, PornHubUserIE, @@ -1565,7 +1473,6 @@ from .pornhub import ( from .pornotube import PornotubeIE from .pornovoisines import PornoVoisinesIE from .pornoxo import PornoXOIE -from .pornez import PornezIE from .puhutv import ( PuhuTVIE, PuhuTVSerieIE, @@ -1609,7 +1516,6 @@ from .radiocomercial import ( ) from .radiode import RadioDeIE from .radiojavan import RadioJavanIE -from .radiobremen import RadioBremenIE from .radiofrance import ( FranceCultureIE, RadioFranceIE, @@ -1661,7 +1567,6 @@ from .rcti import ( RCTIPlusTVIE, ) from .rds import RDSIE -from .recurbate import RecurbateIE from .redbee import ParliamentLiveUKIE, RTBFIE from .redbulltv import ( RedBullTVIE, @@ -1685,7 +1590,6 @@ from .restudy import RestudyIE from .reuters import ReutersIE from .reverbnation import ReverbNationIE from .rheinmaintv import RheinMainTVIE -from .rice import RICEIE from .rmcdecouverte import RMCDecouverteIE from .rockstargames import RockstarGamesIE from .rokfin import ( @@ -1709,11 +1613,7 @@ from .rtlnl import ( RTLLuLiveIE, RTLLuRadioIE, ) -from .rtl2 import ( - RTL2IE, - RTL2YouIE, - RTL2YouSeriesIE, -) +from .rtl2 import RTL2IE from .rtnews import ( RTNewsIE, RTDocumentryIE, @@ -1735,10 +1635,8 @@ from .rtve import ( RTVEInfantilIE, RTVETelevisionIE, ) -from .rtvnh import RTVNHIE from .rtvs import RTVSIE from .rtvslo import RTVSLOIE -from .ruhd import RUHDIE from .rule34video import Rule34VideoIE from .rumble import ( RumbleEmbedIE, @@ -1820,10 +1718,6 @@ from .shahid import ( ShahidIE, ShahidShowIE, ) -from .shared import ( - SharedIE, - VivoIE, -) from .sharevideos import ShareVideosEmbedIE from .sibnet import SibnetEmbedIE from .shemaroome import ShemarooMeIE @@ -1901,7 +1795,6 @@ from .spankbang import ( SpankBangIE, SpankBangPlaylistIE, ) -from .spankwire import SpankwireIE from .spiegel import SpiegelIE from .spike import ( BellatorIE, @@ -1951,7 +1844,6 @@ from .storyfire import ( StoryFireSeriesIE, ) from .streamable import StreamableIE -from .streamcloud import StreamcloudIE from .streamcz import StreamCZIE from .streamff import StreamFFIE from .streetvoice import StreetVoiceIE @@ -1971,7 +1863,6 @@ from .svt import ( SVTSeriesIE, ) from .swearnet import SwearnetEpisodeIE -from .swrmediathek import SWRMediathekIE from .syvdk import SYVDKIE from .syfy import SyfyIE from .sztvhu import SztvHuIE @@ -1998,7 +1889,6 @@ from .teamcoco import ( ConanClassicIE, ) from .teamtreehouse import TeamTreeHouseIE -from .techtalks import TechTalksIE from .ted import ( TedEmbedIE, TedPlaylistIE, @@ -2075,7 +1965,6 @@ from .tiktok import ( TikTokLiveIE, DouyinIE, ) -from .tinypic import TinyPicIE from .tmz import TMZIE from .tnaflix import ( TNAFlixNetworkEmbedIE, @@ -2090,10 +1979,6 @@ from .toggle import ( from .toggo import ( ToggoIE, ) -from .tokentube import ( - TokentubeIE, - TokentubeChannelIE -) from .tonline import TOnlineIE from .toongoggles import ToonGogglesIE from .toutv import TouTvIE @@ -2104,7 +1989,6 @@ from .triller import ( TrillerUserIE, TrillerShortIE, ) -from .trilulilu import TriluliluIE from .trovo import ( TrovoIE, TrovoVodIE, @@ -2129,7 +2013,6 @@ from .tunein import ( TuneInPodcastEpisodeIE, TuneInShortenerIE, ) -from .tunepk import TunePkIE from .turbo import TurboIE from .tv2 import ( TV2IE, @@ -2171,16 +2054,7 @@ from .tvigle import TvigleIE from .tviplayer import TVIPlayerIE from .tvland import TVLandIE from .tvn24 import TVN24IE -from .tvnet import TVNetIE from .tvnoe import TVNoeIE -from .tvnow import ( - TVNowIE, - TVNowFilmIE, - TVNowNewIE, - TVNowSeasonIE, - TVNowAnnualIE, - TVNowShowIE, -) from .tvopengr import ( TVOpenGrWatchIE, TVOpenGrEmbedIE, @@ -2198,7 +2072,6 @@ from .tvplay import ( ) from .tvplayer import TVPlayerIE from .tweakers import TweakersIE -from .twentyfourvideo import TwentyFourVideoIE from .twentymin import TwentyMinutenIE from .twentythreevideo import TwentyThreeVideoIE from .twitcasting import ( @@ -2247,7 +2120,6 @@ from .drooble import DroobleIE from .umg import UMGDeIE from .unistra import UnistraIE from .unity import UnityIE -from .unscripted import UnscriptedNewsVideoIE from .unsupported import KnownDRMIE, KnownPiracyIE from .uol import UOLIE from .uplynk import ( @@ -2266,7 +2138,6 @@ from .ustudio import ( from .utreon import UtreonIE from .varzesh3 import Varzesh3IE from .vbox7 import Vbox7IE -from .veehd import VeeHDIE from .veo import VeoIE from .veoh import ( VeohIE, @@ -2288,7 +2159,6 @@ from .vice import ( ViceArticleIE, ViceShowIE, ) -from .vidbit import VidbitIE from .viddler import ViddlerIE from .videa import VideaIE from .videocampus_sachsen import ( @@ -2339,7 +2209,6 @@ from .vimm import ( VimmIE, VimmRecordingIE, ) -from .vimple import VimpleIE from .vine import ( VineIE, VineUserIE, @@ -2363,10 +2232,8 @@ from .vk import ( VKPlayLiveIE, ) from .vocaroo import VocarooIE -from .vodlocker import VodlockerIE from .vodpl import VODPlIE from .vodplatform import VODPlatformIE -from .voicerepublic import VoiceRepublicIE from .voicy import ( VoicyIE, VoicyChannelIE, @@ -2386,23 +2253,13 @@ from .vrt import ( KetnetIE, DagelijkseKostIE, ) -from .vrak import VrakIE -from .vrv import ( - VRVIE, - VRVSeriesIE, -) -from .vshare import VShareIE from .vtm import VTMIE from .medialaan import MedialaanIE from .vuclip import VuClipIE -from .vupload import VuploadIE from .vvvvid import ( VVVVIDIE, VVVVIDShowIE, ) -from .vyborymos import VyboryMosIE -from .vzaar import VzaarIE -from .wakanim import WakanimIE from .walla import WallaIE from .washingtonpost import ( WashingtonPostIE, @@ -2414,8 +2271,6 @@ from .wasdtv import ( WASDTVClipIE, ) from .wat import WatIE -from .watchbox import WatchBoxIE -from .watchindianporn import WatchIndianPornIE from .wdr import ( WDRIE, WDRPageIE, @@ -2449,7 +2304,6 @@ from .wevidi import WeVidiIE from .weyyak import WeyyakIE from .whyp import WhypIE from .wikimedia import WikimediaIE -from .willow import WillowIE from .wimbledon import WimbledonIE from .wimtv import WimTVIE from .whowatch import WhoWatchIE @@ -2483,7 +2337,6 @@ from .wykop import ( WykopPostCommentIE, ) from .xanimu import XanimuIE -from .xbef import XBefIE from .xboxclips import XboxClipsIE from .xfileshare import XFileShareIE from .xhamster import ( @@ -2499,8 +2352,6 @@ from .xinpianchang import XinpianchangIE from .xminus import XMinusIE from .xnxx import XNXXIE from .xstream import XstreamIE -from .xtube import XTubeUserIE, XTubeIE -from .xuite import XuiteIE from .xvideos import ( XVideosIE, XVideosQuickiesIE @@ -2530,10 +2381,7 @@ from .yappy import ( YappyIE, YappyProfileIE, ) -from .yesjapan import YesJapanIE -from .yinyuetai import YinYueTaiIE from .yle_areena import YleAreenaIE -from .ynet import YnetIE from .youjizz import YouJizzIE from .youku import ( YoukuIE, diff --git a/yt_dlp/extractor/airmozilla.py b/yt_dlp/extractor/airmozilla.py deleted file mode 100644 index 669556b98f..0000000000 --- a/yt_dlp/extractor/airmozilla.py +++ /dev/null @@ -1,63 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_duration, - parse_iso8601, -) - - -class AirMozillaIE(InfoExtractor): - _VALID_URL = r'https?://air\.mozilla\.org/(?P[0-9a-z-]+)/?' - _TEST = { - 'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/', - 'md5': '8d02f53ee39cf006009180e21df1f3ba', - 'info_dict': { - 'id': '6x4q2w', - 'ext': 'mp4', - 'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco', - 'thumbnail': r're:https?://.*/poster\.jpg', - 'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...', - 'timestamp': 1422487800, - 'upload_date': '20150128', - 'location': 'SFO Commons', - 'duration': 3780, - 'view_count': int, - 'categories': ['Main', 'Privacy'], - } - } - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - video_id = self._html_search_regex(r'//vid\.ly/(.*?)/embed', webpage, 'id') - - embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id) - jwconfig = self._parse_json(self._search_regex( - r'initCallback\((.*)\);', embed_script, 'metadata'), video_id)['config'] - - info_dict = self._parse_jwplayer_data(jwconfig, video_id) - view_count = int_or_none(self._html_search_regex( - r'Views since archived: ([0-9]+)', - webpage, 'view count', fatal=False)) - timestamp = parse_iso8601(self._html_search_regex( - r'