diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml index af0320569c..cdbb867603 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -1,5 +1,5 @@ name: Broken site -description: Report broken or misfunctioning site +description: Report error in a supported site labels: [triage, site-bug] body: - type: checkboxes @@ -7,7 +7,7 @@ body: label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE description: Fill all fields even if you think it is irrelevant for the issue options: - - label: I understand that I will be **blocked** if I remove or skip any mandatory\* field + - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field required: true - type: checkboxes id: checklist @@ -16,15 +16,15 @@ body: description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - - label: I'm reporting a broken site + - label: I'm reporting that a **supported** site is broken required: true - - label: I've verified that I'm running yt-dlp version **2022.09.01** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.03.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/yt-dlp/yt-dlp/wiki/FAQ#video-url-contains-an-ampersand--and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command) required: true - - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates + - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true @@ -50,6 +50,8 @@ body: options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) required: true + - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" + required: false - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below required: true - type: textarea @@ -62,7 +64,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2022.09.01 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.03.04 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -70,8 +72,8 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2022.09.01, Current version: 2022.09.01 - yt-dlp is up to date (2022.09.01) + Latest version: 2023.03.04, Current version: 2023.03.04 + yt-dlp is up to date (2023.03.04) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml index 55ee9d3b7e..890df48fac 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -7,7 +7,7 @@ body: label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE description: Fill all fields even if you think it is irrelevant for the issue options: - - label: I understand that I will be **blocked** if I remove or skip any mandatory\* field + - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field required: true - type: checkboxes id: checklist @@ -18,13 +18,13 @@ body: options: - label: I'm reporting a new site support request required: true - - label: I've verified that I'm running yt-dlp version **2022.09.01** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.03.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true - label: I've checked that none of provided URLs [violate any copyrights](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-website-primarily-used-for-piracy) or contain any [DRM](https://en.wikipedia.org/wiki/Digital_rights_management) to the best of my knowledge required: true - - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates + - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true @@ -62,6 +62,8 @@ body: options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) required: true + - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" + required: false - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below required: true - type: textarea @@ -74,7 +76,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2022.09.01 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.03.04 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -82,8 +84,8 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2022.09.01, Current version: 2022.09.01 - yt-dlp is up to date (2022.09.01) + Latest version: 2023.03.04, Current version: 2023.03.04 + yt-dlp is up to date (2023.03.04) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml index a3a786e387..ef9bda36a8 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -7,7 +7,7 @@ body: label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE description: Fill all fields even if you think it is irrelevant for the issue options: - - label: I understand that I will be **blocked** if I remove or skip any mandatory\* field + - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field required: true - type: checkboxes id: checklist @@ -18,11 +18,11 @@ body: options: - label: I'm requesting a site-specific feature required: true - - label: I've verified that I'm running yt-dlp version **2022.09.01** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.03.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true - - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates + - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true @@ -58,6 +58,8 @@ body: options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) required: true + - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" + required: false - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below required: true - type: textarea @@ -70,7 +72,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2022.09.01 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.03.04 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -78,8 +80,8 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2022.09.01, Current version: 2022.09.01 - yt-dlp is up to date (2022.09.01) + Latest version: 2023.03.04, Current version: 2023.03.04 + yt-dlp is up to date (2023.03.04) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml index 4613fd35d1..bf1d97bbae 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -7,7 +7,7 @@ body: label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE description: Fill all fields even if you think it is irrelevant for the issue options: - - label: I understand that I will be **blocked** if I remove or skip any mandatory\* field + - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field required: true - type: checkboxes id: checklist @@ -18,13 +18,13 @@ body: options: - label: I'm reporting a bug unrelated to a specific site required: true - - label: I've verified that I'm running yt-dlp version **2022.09.01** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.03.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/yt-dlp/yt-dlp/wiki/FAQ#video-url-contains-an-ampersand--and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command) required: true - - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates + - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true @@ -43,6 +43,8 @@ body: options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) required: true + - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" + required: false - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below required: true - type: textarea @@ -55,7 +57,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2022.09.01 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.03.04 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -63,8 +65,8 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2022.09.01, Current version: 2022.09.01 - yt-dlp is up to date (2022.09.01) + Latest version: 2023.03.04, Current version: 2023.03.04 + yt-dlp is up to date (2023.03.04) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml index 0eaee4441b..b17c656587 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml @@ -7,7 +7,7 @@ body: label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE description: Fill all fields even if you think it is irrelevant for the issue options: - - label: I understand that I will be **blocked** if I remove or skip any mandatory\* field + - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field required: true - type: checkboxes id: checklist @@ -20,9 +20,9 @@ body: required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - - label: I've verified that I'm running yt-dlp version **2022.09.01** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.03.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates + - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true @@ -40,6 +40,8 @@ body: label: Provide verbose output that clearly demonstrates the problem options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) + - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" + required: false - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below - type: textarea id: log @@ -51,7 +53,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2022.09.01 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.03.04 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -59,7 +61,7 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2022.09.01, Current version: 2022.09.01 - yt-dlp is up to date (2022.09.01) + Latest version: 2023.03.04, Current version: 2023.03.04 + yt-dlp is up to date (2023.03.04) render: shell diff --git a/.github/ISSUE_TEMPLATE/6_question.yml b/.github/ISSUE_TEMPLATE/6_question.yml index acfbeb74b9..c694e5a5a1 100644 --- a/.github/ISSUE_TEMPLATE/6_question.yml +++ b/.github/ISSUE_TEMPLATE/6_question.yml @@ -7,7 +7,7 @@ body: label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE description: Fill all fields even if you think it is irrelevant for the issue options: - - label: I understand that I will be **blocked** if I remove or skip any mandatory\* field + - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field required: true - type: markdown attributes: @@ -26,9 +26,9 @@ body: required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - - label: I've verified that I'm running yt-dlp version **2022.09.01** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.03.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates + - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true @@ -46,6 +46,8 @@ body: label: Provide verbose output that clearly demonstrates the problem options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) + - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" + required: false - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below - type: textarea id: log @@ -57,7 +59,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2022.09.01 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.03.04 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -65,7 +67,7 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2022.09.01, Current version: 2022.09.01 - yt-dlp is up to date (2022.09.01) + Latest version: 2023.03.04, Current version: 2023.03.04 + yt-dlp is up to date (2023.03.04) render: shell diff --git a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml index e1b1e51380..1f6f926341 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml @@ -1,5 +1,5 @@ name: Broken site -description: Report broken or misfunctioning site +description: Report error in a supported site labels: [triage, site-bug] body: %(no_skip)s @@ -10,7 +10,7 @@ body: description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - - label: I'm reporting a broken site + - label: I'm reporting that a **supported** site is broken required: true - label: I've verified that I'm running yt-dlp version **%(version)s** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true @@ -18,7 +18,7 @@ body: required: true - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/yt-dlp/yt-dlp/wiki/FAQ#video-url-contains-an-ampersand--and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command) required: true - - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates + - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true diff --git a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml index 12a1c65987..75d62e7bb2 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml @@ -18,7 +18,7 @@ body: required: true - label: I've checked that none of provided URLs [violate any copyrights](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-website-primarily-used-for-piracy) or contain any [DRM](https://en.wikipedia.org/wiki/Digital_rights_management) to the best of my knowledge required: true - - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates + - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true diff --git a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml index 2b46650f70..18b30f5783 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml @@ -16,7 +16,7 @@ body: required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true - - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates + - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true diff --git a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml index 377efbe338..90f59e70b0 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml @@ -18,7 +18,7 @@ body: required: true - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/yt-dlp/yt-dlp/wiki/FAQ#video-url-contains-an-ampersand--and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command) required: true - - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates + - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true diff --git a/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml index 8bbc5d733f..ef3bb22693 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml @@ -16,7 +16,7 @@ body: required: true - label: I've verified that I'm running yt-dlp version **%(version)s** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates + - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true diff --git a/.github/ISSUE_TEMPLATE_tmpl/6_question.yml b/.github/ISSUE_TEMPLATE_tmpl/6_question.yml index ee09e82a38..4bef82d5af 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/6_question.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/6_question.yml @@ -22,7 +22,7 @@ body: required: true - label: I've verified that I'm running yt-dlp version **%(version)s** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates + - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 5abc6ce41e..c4d3e812e2 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -2,8 +2,6 @@ ### Description of your *pull request* and other information - - +### 2023.03.04 + +#### Extractor changes +- bilibili + - [Fix for downloading wrong subtitles](https://github.com/yt-dlp/yt-dlp/commit/8a83baaf218ab89e6e7faa76b7c7be3a2ec19e3a) ([#6358](https://github.com/yt-dlp/yt-dlp/issues/6358)) by [LXYan2333](https://github.com/LXYan2333) +- ESPNcricinfo + - [Handle new URL pattern](https://github.com/yt-dlp/yt-dlp/commit/640c934823fc2d1ec77ec932566078014058635f) ([#6321](https://github.com/yt-dlp/yt-dlp/issues/6321)) by [venkata-krishnas](https://github.com/venkata-krishnas) +- lefigaro + - [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/eb8fd6d044e8926532772b72be0645c6b8ecb3aa) ([#6309](https://github.com/yt-dlp/yt-dlp/issues/6309)) by [elyse0](https://github.com/elyse0) +- lumni + - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1f8489cccbdc6e96027ef527b88717458f0900e8) ([#6302](https://github.com/yt-dlp/yt-dlp/issues/6302)) by [carusocr](https://github.com/carusocr) +- Prankcast + - [Fix tags](https://github.com/yt-dlp/yt-dlp/commit/ed4cc4ea793314c50ae3f82e98248c1de1c25694) ([#6316](https://github.com/yt-dlp/yt-dlp/issues/6316)) by [columndeeply](https://github.com/columndeeply) +- rutube + - [Extract chapters from description](https://github.com/yt-dlp/yt-dlp/commit/22ccd5420b3eb0782776071f12cccd1fedaa1fd0) ([#6345](https://github.com/yt-dlp/yt-dlp/issues/6345)) by [mushbite](https://github.com/mushbite) +- SportDeutschland + - [Rewrite extractor](https://github.com/yt-dlp/yt-dlp/commit/45db357289b4e1eec09093c8bc5446520378f426) by [pukkandan](https://github.com/pukkandan) +- telecaribe + - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/b40471282286bd2b09c485bf79afd271d229272c) ([#6311](https://github.com/yt-dlp/yt-dlp/issues/6311)) by [elyse0](https://github.com/elyse0) +- tubetugraz + - [Support `--twofactor` (#6424)](https://github.com/yt-dlp/yt-dlp/commit/f44cb4e77bb9be8be291d02ab6f79dc0b4c0d4a1) ([#6427](https://github.com/yt-dlp/yt-dlp/issues/6427)) by [Ferdi265](https://github.com/Ferdi265) +- tunein + - [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/46580ced56c90b559885aded6aa8f46f20a9cdce) ([#6310](https://github.com/yt-dlp/yt-dlp/issues/6310)) by [elyse0](https://github.com/elyse0) +- twitch + - [Update for GraphQL API changes](https://github.com/yt-dlp/yt-dlp/commit/4a6272c6d1bff89969b67cd22b26ebe6d7e72279) ([#6318](https://github.com/yt-dlp/yt-dlp/issues/6318)) by [elyse0](https://github.com/elyse0) +- twitter + - [Fix retweet extraction](https://github.com/yt-dlp/yt-dlp/commit/cf605226521e99c89fc8dff26a319025810e63a0) ([#6422](https://github.com/yt-dlp/yt-dlp/issues/6422)) by [selfisekai](https://github.com/selfisekai) +- xvideos + - quickies: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/283a0b5bc511f3b350eead4488158f50c20ec526) ([#6414](https://github.com/yt-dlp/yt-dlp/issues/6414)) by [Yakabuff](https://github.com/Yakabuff) + +#### Misc. changes +- build + - [Fix publishing to PyPI and homebrew](https://github.com/yt-dlp/yt-dlp/commit/55676fe498345a389a2539d8baaba958d6d61c3e) by [bashonly](https://github.com/bashonly) + - [Only archive if `vars.ARCHIVE_REPO` is set](https://github.com/yt-dlp/yt-dlp/commit/08ff6d59f97b5f5f0128f6bf6fbef56fd836cc52) by [Grub4K](https://github.com/Grub4K) +- cleanup + - Miscellaneous: [392389b](https://github.com/yt-dlp/yt-dlp/commit/392389b7df7b818f794b231f14dc396d4875fbad) by [pukkandan](https://github.com/pukkandan) +- devscripts + - `make_changelog`: [Stop at `Release ...` commit](https://github.com/yt-dlp/yt-dlp/commit/7accdd9845fe7ce9d0aa5a9d16faaa489c1294eb) by [pukkandan](https://github.com/pukkandan) + +### 2023.03.03 + +#### Important changes +- **A new release type has been added!** + * [`nightly`](https://github.com/yt-dlp/yt-dlp/releases/tag/nightly) builds will be made after each push, containing the latest fixes (but also possibly bugs). + * When using `--update`/`-U`, a release binary will only update to its current channel (either `stable` or `nightly`). + * The `--update-to` option has been added allowing the user more control over program upgrades (or downgrades). + * `--update-to` can change the release channel (`stable`, `nightly`) and also upgrade or downgrade to specific tags. + * **Usage**: `--update-to CHANNEL`, `--update-to TAG`, `--update-to CHANNEL@TAG` +- **YouTube throttling fixes!** + +#### Core changes +- [Add option `--break-match-filters`](https://github.com/yt-dlp/yt-dlp/commit/fe2ce85aff0aa03735fc0152bb8cb9c3d4ef0753) by [pukkandan](https://github.com/pukkandan) +- [Fix `--break-on-existing` with `--lazy-playlist`](https://github.com/yt-dlp/yt-dlp/commit/d21056f4cf0a1623daa107f9181074f5725ac436) by [pukkandan](https://github.com/pukkandan) +- dependencies + - [Simplify `Cryptodome`](https://github.com/yt-dlp/yt-dlp/commit/65f6e807804d2af5e00f2aecd72bfc43af19324a) by [pukkandan](https://github.com/pukkandan) +- jsinterp + - [Handle `Date` at epoch 0](https://github.com/yt-dlp/yt-dlp/commit/9acf1ee25f7ad3920ede574a9de95b8c18626af4) by [pukkandan](https://github.com/pukkandan) +- plugins + - [Don't look in `.egg` directories](https://github.com/yt-dlp/yt-dlp/commit/b059188383eee4fa336ef728dda3ff4bb7335625) by [pukkandan](https://github.com/pukkandan) +- update + - [Add option `--update-to`, including to nightly](https://github.com/yt-dlp/yt-dlp/commit/77df20f14cc9ed41dfe3a1fe2d77fd27f5365a94) ([#6220](https://github.com/yt-dlp/yt-dlp/issues/6220)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan) +- utils + - `LenientJSONDecoder`: [Parse unclosed objects](https://github.com/yt-dlp/yt-dlp/commit/cc09083636ce21e58ff74f45eac2dbda507462b0) by [pukkandan](https://github.com/pukkandan) + - `Popen`: [Shim undocumented `text_mode` property](https://github.com/yt-dlp/yt-dlp/commit/da8e2912b165005f76779a115a071cd6132ceedf) by [Grub4K](https://github.com/Grub4K) + +#### Extractor changes +- [Fix DRM detection in m3u8](https://github.com/yt-dlp/yt-dlp/commit/43a3eaf96393b712d60cbcf5c6cb1e90ed7f42f5) by [pukkandan](https://github.com/pukkandan) +- generic + - [Detect manifest links via extension](https://github.com/yt-dlp/yt-dlp/commit/b38cae49e6f4849c8ee2a774bdc3c1c647ae5f0e) by [bashonly](https://github.com/bashonly) + - [Handle basic-auth when checking redirects](https://github.com/yt-dlp/yt-dlp/commit/8e9fe43cd393e69fa49b3d842aa3180c1d105b8f) by [pukkandan](https://github.com/pukkandan) +- GoogleDrive + - [Fix some audio](https://github.com/yt-dlp/yt-dlp/commit/4d248e29d20d983ededab0b03d4fe69dff9eb4ed) by [pukkandan](https://github.com/pukkandan) +- iprima + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/9fddc12ab022a31754e0eaa358fc4e1dfa974587) ([#6291](https://github.com/yt-dlp/yt-dlp/issues/6291)) by [std-move](https://github.com/std-move) +- mediastream + - [Improve WinSports support](https://github.com/yt-dlp/yt-dlp/commit/2d5a8c5db2bd4ff1c2e45e00cd890a10f8ffca9e) ([#6401](https://github.com/yt-dlp/yt-dlp/issues/6401)) by [bashonly](https://github.com/bashonly) +- ntvru + - [Extract HLS and DASH formats](https://github.com/yt-dlp/yt-dlp/commit/77d6d136468d0c23c8e79bc937898747804f585a) ([#6403](https://github.com/yt-dlp/yt-dlp/issues/6403)) by [bashonly](https://github.com/bashonly) +- tencent + - [Add more formats and info](https://github.com/yt-dlp/yt-dlp/commit/18d295c9e0f95adc179eef345b7af64d6372db78) ([#5950](https://github.com/yt-dlp/yt-dlp/issues/5950)) by [Hill-98](https://github.com/Hill-98) +- yle_areena + - [Extract non-Kaltura videos](https://github.com/yt-dlp/yt-dlp/commit/40d77d89027cd0e0ce31d22aec81db3e1d433900) ([#6402](https://github.com/yt-dlp/yt-dlp/issues/6402)) by [bashonly](https://github.com/bashonly) +- youtube + - [Construct dash formats with `range` query](https://github.com/yt-dlp/yt-dlp/commit/5038f6d713303e0967d002216e7a88652401c22a) by [pukkandan](https://github.com/pukkandan) (With fixes in [f34804b](https://github.com/yt-dlp/yt-dlp/commit/f34804b2f920f62a6e893a14a9e2a2144b14dd23) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz)) + - [Detect and break on looping comments](https://github.com/yt-dlp/yt-dlp/commit/7f51861b1820c37b157a239b1fe30628d907c034) ([#6301](https://github.com/yt-dlp/yt-dlp/issues/6301)) by [coletdjnz](https://github.com/coletdjnz) + - [Extract channel `view_count` when `/about` tab is passed](https://github.com/yt-dlp/yt-dlp/commit/31e183557fcd1b937582f9429f29207c1261f501) by [pukkandan](https://github.com/pukkandan) + +#### Misc. changes +- build + - [Add `cffi` as a dependency for `yt_dlp_linux`](https://github.com/yt-dlp/yt-dlp/commit/776d1c3f0c9b00399896dd2e40e78e9a43218109) by [bashonly](https://github.com/bashonly) + - [Automated builds and nightly releases](https://github.com/yt-dlp/yt-dlp/commit/29cb20bd563c02671b31dd840139e93dd37150a1) ([#6220](https://github.com/yt-dlp/yt-dlp/issues/6220)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) (With fixes in [bfc861a](https://github.com/yt-dlp/yt-dlp/commit/bfc861a91ee65c9b0ac169754f512e052c6827cf) by [pukkandan](https://github.com/pukkandan)) + - [Sign SHA files and release public key](https://github.com/yt-dlp/yt-dlp/commit/12647e03d417feaa9ea6a458bea5ebd747494a53) by [Grub4K](https://github.com/Grub4K) +- cleanup + - [Fix `Changelog`](https://github.com/yt-dlp/yt-dlp/commit/17ca19ab60a6a13eb8a629c51442b5248b0d8394) by [pukkandan](https://github.com/pukkandan) + - jsinterp: [Give functions names to help debugging](https://github.com/yt-dlp/yt-dlp/commit/b2e0343ba0fc5d8702e90f6ba2b71358e2677e0b) by [pukkandan](https://github.com/pukkandan) + - Miscellaneous: [4815bbf](https://github.com/yt-dlp/yt-dlp/commit/4815bbfc41cf641e4a0650289dbff968cb3bde76), [5b28cef](https://github.com/yt-dlp/yt-dlp/commit/5b28cef72db3b531680d89c121631c73ae05354f) by [pukkandan](https://github.com/pukkandan) +- devscripts + - [Script to generate changelog](https://github.com/yt-dlp/yt-dlp/commit/d400e261cf029a3f20d364113b14de973be75404) ([#6220](https://github.com/yt-dlp/yt-dlp/issues/6220)) by [Grub4K](https://github.com/Grub4K) (With fixes in [9344964](https://github.com/yt-dlp/yt-dlp/commit/93449642815a6973a4b09b289982ca7e1f961b5f)) + +### 2023.02.17 + +* Merge youtube-dl: Upto [commit/2dd6c6e](https://github.com/ytdl-org/youtube-dl/commit/2dd6c6e) +* Fix `--concat-playlist` +* Imply `--no-progress` when `--print` +* Improve default subtitle language selection by [sdht0](https://github.com/sdht0) +* Make `title` completely non-fatal +* Sanitize formats before sorting by [pukkandan](https://github.com/pukkandan) +* Support module level `__bool__` and `property` +* [dependencies] Standardize `Cryptodome` imports +* [hls] Allow extractors to provide AES key by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) +* [ExtractAudio] Handle outtmpl without ext by [carusocr](https://github.com/carusocr) +* [extractor/common] Fix `_search_nuxt_data` by [LowSuggestion912](https://github.com/LowSuggestion912) +* [extractor/generic] Avoid catastrophic backtracking in KVS regex by [bashonly](https://github.com/bashonly) +* [jsinterp] Support `if` statements +* [plugins] Fix zip search paths +* [utils] `traverse_obj`: Various improvements by [Grub4K](https://github.com/Grub4K) +* [utils] `traverse_obj`: Fix more bugs +* [utils] `traverse_obj`: Fix several behavioral problems by [Grub4K](https://github.com/Grub4K) +* [utils] Don't use Content-length with encoding by [felixonmars](https://github.com/felixonmars) +* [utils] Fix `time_seconds` to use the provided TZ by [Grub4K](https://github.com/Grub4K), [Lesmiscore](https://github.com/Lesmiscore) +* [utils] Fix race condition in `make_dir` by [aionescu](https://github.com/aionescu) +* [utils] Use local kernel32 for file locking on Windows by [Grub4K](https://github.com/Grub4K) +* [compat_utils] Improve `passthrough_module` +* [compat_utils] Simplify `EnhancedModule` +* [build] Update pyinstaller +* [pyinst] Fix for pyinstaller 5.8 +* [devscripts] Provide `pyinstaller` hooks +* [devscripts/pyinstaller] Analyze sub-modules of `Cryptodome` +* [cleanup] Misc fixes and cleanup +* [extractor/anchorfm] Add episode extractor by [HobbyistDev](https://github.com/HobbyistDev), [bashonly](https://github.com/bashonly) +* [extractor/boxcast] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/ebay] Add extractor by [JChris246](https://github.com/JChris246) +* [extractor/hypergryph] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [bashonly](https://github.com/bashonly) +* [extractor/NZOnScreen] Add extractor by [gregsadetsky](https://github.com/gregsadetsky), [pukkandan](https://github.com/pukkandan) +* [extractor/rozhlas] Add extractor RozhlasVltavaIE by [amra](https://github.com/amra) +* [extractor/tempo] Add IVXPlayer extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/txxx] Add extractors by [chio0hai](https://github.com/chio0hai) +* [extractor/vocaroo] Add extractor by [SuperSonicHub1](https://github.com/SuperSonicHub1), [qbnu](https://github.com/qbnu) +* [extractor/wrestleuniverse] Add extractors by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) +* [extractor/yappy] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [dirkf](https://github.com/dirkf) +* [extractor/youtube] **Fix `uploader_id` extraction** by [bashonly](https://github.com/bashonly) +* [extractor/youtube] Add hyperpipe instances by [Generator](https://github.com/Generator) +* [extractor/youtube] Handle `consent.youtube` +* [extractor/youtube] Support `/live/` URL +* [extractor/youtube] Update invidious and piped instances by [rohieb](https://github.com/rohieb) +* [extractor/91porn] Fix title and comment extraction by [pmitchell86](https://github.com/pmitchell86) +* [extractor/AbemaTV] Cache user token whenever appropriate by [Lesmiscore](https://github.com/Lesmiscore) +* [extractor/bfmtv] Support `rmc` prefix by [carusocr](https://github.com/carusocr) +* [extractor/biliintl] Add intro and ending chapters by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/clyp] Support `wav` by [qulaz](https://github.com/qulaz) +* [extractor/crunchyroll] Add intro chapter by [ByteDream](https://github.com/ByteDream) +* [extractor/crunchyroll] Better message for premium videos +* [extractor/crunchyroll] Fix incorrect premium-only error by [Grub4K](https://github.com/Grub4K) +* [extractor/DouyuTV] Use new API by [hatienl0i261299](https://github.com/hatienl0i261299) +* [extractor/embedly] Embedded links may be for other extractors +* [extractor/freesound] Workaround invalid URL in webpage by [rebane2001](https://github.com/rebane2001) +* [extractor/GoPlay] Use new API by [jeroenj](https://github.com/jeroenj) +* [extractor/Hidive] Fix subtitles and age-restriction by [chexxor](https://github.com/chexxor) +* [extractor/huya] Support HD streams by [felixonmars](https://github.com/felixonmars) +* [extractor/moviepilot] Fix extractor by [panatexxa](https://github.com/panatexxa) +* [extractor/nbc] Fix `NBC` and `NBCStations` extractors by [bashonly](https://github.com/bashonly) +* [extractor/nbc] Fix XML parsing by [bashonly](https://github.com/bashonly) +* [extractor/nebula] Remove broken cookie support by [hheimbuerger](https://github.com/hheimbuerger) +* [extractor/nfl] Add `NFLPlus` extractors by [bashonly](https://github.com/bashonly) +* [extractor/niconico] Add support for like history by [Matumo](https://github.com/Matumo), [pukkandan](https://github.com/pukkandan) +* [extractor/nitter] Update instance list by [OIRNOIR](https://github.com/OIRNOIR) +* [extractor/npo] Fix extractor and add HD support by [seproDev](https://github.com/seproDev) +* [extractor/odkmedia] Add `OnDemandChinaEpisodeIE` by [HobbyistDev](https://github.com/HobbyistDev), [pukkandan](https://github.com/pukkandan) +* [extractor/pornez] Handle relative URLs in iframe by [JChris246](https://github.com/JChris246) +* [extractor/radiko] Fix format sorting for Time Free by [road-master](https://github.com/road-master) +* [extractor/rcs] Fix extractors by [nixxo](https://github.com/nixxo), [pukkandan](https://github.com/pukkandan) +* [extractor/reddit] Support user posts by [OMEGARAZER](https://github.com/OMEGARAZER) +* [extractor/rumble] Fix format sorting by [pukkandan](https://github.com/pukkandan) +* [extractor/servus] Rewrite extractor by [Ashish0804](https://github.com/Ashish0804), [FrankZ85](https://github.com/FrankZ85), [StefanLobbenmeier](https://github.com/StefanLobbenmeier) +* [extractor/slideslive] Fix slides and chapters/duration by [bashonly](https://github.com/bashonly) +* [extractor/SportDeutschland] Fix extractor by [FriedrichRehren](https://github.com/FriedrichRehren) +* [extractor/Stripchat] Fix extractor by [JChris246](https://github.com/JChris246), [bashonly](https://github.com/bashonly) +* [extractor/tnaflix] Fix extractor by [bashonly](https://github.com/bashonly), [oxamun](https://github.com/oxamun) +* [extractor/tvp] Support `stream.tvp.pl` by [selfisekai](https://github.com/selfisekai) +* [extractor/twitter] Fix `--no-playlist` and add media `view_count` when using GraphQL by [Grub4K](https://github.com/Grub4K) +* [extractor/twitter] Fix graphql extraction on some tweets by [selfisekai](https://github.com/selfisekai) +* [extractor/vimeo] Fix `playerConfig` extraction by [LeoniePhiline](https://github.com/LeoniePhiline), [bashonly](https://github.com/bashonly) +* [extractor/viu] Add `ViuOTTIndonesiaIE` extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/vk] Fix playlists for new API by [the-marenga](https://github.com/the-marenga) +* [extractor/vlive] Replace with `VLiveWebArchiveIE` by [seproDev](https://github.com/seproDev) +* [extractor/ximalaya] Update album `_VALID_URL` by [carusocr](https://github.com/carusocr) +* [extractor/zdf] Use android API endpoint for UHD downloads by [seproDev](https://github.com/seproDev) +* [extractor/drtv] Fix bug in [ab4cbef](https://github.com/yt-dlp/yt-dlp/commit/ab4cbef) by [bashonly](https://github.com/bashonly) + + +### 2023.01.06 + +* Fix config locations by [Grub4K](https://github.com/Grub4K), [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +* [downloader/aria2c] Disable native progress +* [utils] `mimetype2ext`: `weba` is not standard +* [utils] `windows_enable_vt_mode`: Better error handling +* [build] Add minimal `pyproject.toml` +* [update] Fix updater file removal on windows by [Grub4K](https://github.com/Grub4K) +* [cleanup] Misc fixes and cleanup +* [extractor/aitube] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/drtv] Add series extractors by [FrederikNS](https://github.com/FrederikNS) +* [extractor/volejtv] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/xanimu] Add extractor by [JChris246](https://github.com/JChris246) +* [extractor/youtube] Retry manifest refresh for live-from-start by [mzhou](https://github.com/mzhou) +* [extractor/biliintl] Add `/media` to `VALID_URL` by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/biliIntl] Add fallback to `video_data` by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/crunchyroll:show] Add `language` to entries by [Chrissi2812](https://github.com/Chrissi2812) +* [extractor/joj] Fix extractor by [OndrejBakan](https://github.com/OndrejBakan), [pukkandan](https://github.com/pukkandan) +* [extractor/nbc] Update graphql query by [jacobtruman](https://github.com/jacobtruman) +* [extractor/reddit] Add subreddit as `channel_id` by [gschizas](https://github.com/gschizas) +* [extractor/tiktok] Add `TikTokLive` extractor by [JC-Chung](https://github.com/JC-Chung) + +### 2023.01.02 + +* **Improve plugin architecture** by [Grub4K](https://github.com/Grub4K), [coletdjnz](https://github.com/coletdjnz), [flashdagger](https://github.com/flashdagger), [pukkandan](https://github.com/pukkandan) + * Plugins can be loaded in any distribution of yt-dlp (binary, pip, source, etc.) and can be distributed and installed as packages. See [the readme](https://github.com/yt-dlp/yt-dlp/tree/05997b6e98e638d97d409c65bb5eb86da68f3b64#plugins) for more information +* Add `--compat-options 2021,2022` + * This allows devs to change defaults and make other potentially breaking changes more easily. If you need everything to work exactly as-is, put Use `--compat 2022` in your config to guard against future compat changes. +* [downloader/aria2c] Native progress for aria2c via RPC by [Lesmiscore](https://github.com/Lesmiscore), [pukkandan](https://github.com/pukkandan) +* Merge youtube-dl: Upto [commit/195f22f](https://github.com/ytdl-org/youtube-dl/commit/195f22f6) by [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan) +* Add pre-processor stage `video` +* Let `--parse/replace-in-metadata` run at any post-processing stage +* Add `--enable-file-urls` by [coletdjnz](https://github.com/coletdjnz) +* Add new field `aspect_ratio` +* Add `ac4` to known codecs +* Add `weba` to known extensions +* [FFmpegVideoConvertor] Add `gif` to `--recode-video` +* Add message when there are no subtitles/thumbnails +* Deprioritize HEVC-over-FLV formats by [Lesmiscore](https://github.com/Lesmiscore) +* Make early reject of `--match-filter` stricter +* Fix `--cookies-from-browser` CLI parsing +* Fix `original_url` in playlists +* Fix bug in writing playlist info-json +* Fix bugs in `PlaylistEntries` +* [downloader/ffmpeg] Fix headers for video+audio formats by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) +* [extractor] Add a way to distinguish IEs that returns only videos +* [extractor] Implement universal format sorting and deprecate `_sort_formats` +* [extractor] Let `_extract_format` functions obey `--ignore-no-formats` +* [extractor/generic] Add `fragment_query` extractor arg for DASH and HLS by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) +* [extractor/generic] Decode unicode-escaped embed URLs by [bashonly](https://github.com/bashonly) +* [extractor/generic] Don't report redirect to https +* [extractor/generic] Fix JSON LD manifest extraction by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) +* [extractor/generic] Use `Accept-Encoding: identity` for initial request by [coletdjnz](https://github.com/coletdjnz) +* [FormatSort] Add `mov` to `vext` +* [jsinterp] Escape regex that looks like nested set +* [webvtt] Handle premature EOF by [flashdagger](https://github.com/flashdagger) +* [utils] `classproperty`: Add cache support +* [utils] `get_exe_version`: Detect broken executables by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan) +* [utils] `js_to_json`: Fix bug in [f55523c](https://github.com/yt-dlp/yt-dlp/commit/f55523c) by [ChillingPepper](https://github.com/ChillingPepper), [pukkandan](https://github.com/pukkandan) +* [utils] Make `ExtractorError` mutable +* [utils] Move `FileDownloader.parse_bytes` into utils +* [utils] Move format sorting code into `utils` +* [utils] `windows_enable_vt_mode`: Proper implementation by [Grub4K](https://github.com/Grub4K) +* [update] Workaround [#5632](https://github.com/yt-dlp/yt-dlp/issues/5632) +* [docs] Improvements +* [cleanup] Misc fixes and cleanup +* [cleanup] Use `random.choices` by [freezboltz](https://github.com/freezboltz) +* [extractor/airtv] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/amazonminitv] Add extractors by [GautamMKGarg](https://github.com/GautamMKGarg), [nyuszika7h](https://github.com/nyuszika7h) +* [extractor/beatbump] Add extractors by [Bobscorn](https://github.com/Bobscorn), [pukkandan](https://github.com/pukkandan) +* [extractor/europarl] Add EuroParlWebstream extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/kanal2] Add extractor by [bashonly](https://github.com/bashonly), [glensc](https://github.com/glensc), [pukkandan](https://github.com/pukkandan) +* [extractor/kankanews] Add extractor by [synthpop123](https://github.com/synthpop123) +* [extractor/kick] Add extractor by [bashonly](https://github.com/bashonly) +* [extractor/mediastream] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [elyse0](https://github.com/elyse0) +* [extractor/noice] Add NoicePodcast extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/oneplace] Add OnePlacePodcast extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/rumble] Add RumbleIE extractor by [flashdagger](https://github.com/flashdagger) +* [extractor/screencastify] Add extractor by [bashonly](https://github.com/bashonly) +* [extractor/trtcocuk] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/Veoh] Add user extractor by [tntmod54321](https://github.com/tntmod54321) +* [extractor/videoken] Add extractors by [bashonly](https://github.com/bashonly) +* [extractor/webcamerapl] Add extractor by [milkknife](https://github.com/milkknife) +* [extractor/amazon] Add `AmazonReviews` extractor by [bashonly](https://github.com/bashonly) +* [extractor/netverse] Add `NetverseSearch` extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/vimeo] Add `VimeoProIE` by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) +* [extractor/xiami] Remove extractors by [synthpop123](https://github.com/synthpop123) +* [extractor/youtube] Add `piped.video` by [Bnyro](https://github.com/Bnyro) +* [extractor/youtube] Consider language in format de-duplication +* [extractor/youtube] Extract DRC formats +* [extractor/youtube] Fix `ytuser:` +* [extractor/youtube] Fix bug in handling of music URLs +* [extractor/youtube] Subtitles cannot be translated to `und` +* [extractor/youtube:tab] Extract metadata from channel items by [coletdjnz](https://github.com/coletdjnz) +* [extractor/ARD] Add vtt subtitles by [CapacitorSet](https://github.com/CapacitorSet) +* [extractor/ArteTV] Extract chapters by [bashonly](https://github.com/bashonly), [iw0nderhow](https://github.com/iw0nderhow) +* [extractor/bandcamp] Add `album_artist` by [stelcodes](https://github.com/stelcodes) +* [extractor/bilibili] Fix `--no-playlist` for anthology +* [extractor/bilibili] Improve `_VALID_URL` by [skbeh](https://github.com/skbeh) +* [extractor/biliintl:series] Make partial download of series faster +* [extractor/BiliLive] Fix extractor +* [extractor/brightcove] Add `BrightcoveNewBaseIE` and fix embed extraction +* [extractor/cda] Support premium and misc improvements by [selfisekai](https://github.com/selfisekai) +* [extractor/ciscowebex] Support password-protected videos by [damianoamatruda](https://github.com/damianoamatruda) +* [extractor/curiositystream] Fix auth by [mnn](https://github.com/mnn) +* [extractor/embedly] Handle vimeo embeds +* [extractor/fifa] Fix Preplay extraction by [dirkf](https://github.com/dirkf) +* [extractor/foxsports] Fix extractor by [bashonly](https://github.com/bashonly) +* [extractor/gronkh] Fix `_VALID_URL` by [muddi900](https://github.com/muddi900) +* [extractor/hotstar] Improve format metadata +* [extractor/iqiyi] Fix `Iq` JS regex by [bashonly](https://github.com/bashonly) +* [extractor/la7] Improve extractor by [nixxo](https://github.com/nixxo) +* [extractor/mediaset] Better embed detection and error messages by [nixxo](https://github.com/nixxo) +* [extractor/mixch] Support `--wait-for-video` +* [extractor/naver] Improve `_VALID_URL` for `NaverNowIE` by [bashonly](https://github.com/bashonly) +* [extractor/naver] Treat fan subtitles as separate language +* [extractor/netverse] Extract comments by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/nosnl] Add support for /video by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/odnoklassniki] Extract subtitles by [bashonly](https://github.com/bashonly) +* [extractor/pinterest] Fix extractor by [bashonly](https://github.com/bashonly) +* [extractor/plutotv] Fix videos with non-zero start by [digitall](https://github.com/digitall) +* [extractor/polskieradio] Adapt to next.js redesigns by [selfisekai](https://github.com/selfisekai) +* [extractor/reddit] Add vcodec to fallback format by [chengzhicn](https://github.com/chengzhicn) +* [extractor/reddit] Extract crossposted media by [bashonly](https://github.com/bashonly) +* [extractor/reddit] Extract video embeds in text posts by [bashonly](https://github.com/bashonly) +* [extractor/rutube] Support private videos by [mexus](https://github.com/mexus) +* [extractor/sibnet] Separate from VKIE +* [extractor/slideslive] Fix extractor by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) +* [extractor/slideslive] Support embeds and slides by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) +* [extractor/soundcloud] Support user permalink by [nosoop](https://github.com/nosoop) +* [extractor/spankbang] Fix extractor by [JChris246](https://github.com/JChris246) +* [extractor/stv] Detect DRM +* [extractor/swearnet] Fix description bug +* [extractor/tencent] Fix geo-restricted video by [elyse0](https://github.com/elyse0) +* [extractor/tiktok] Fix subs, `DouyinIE`, improve `_VALID_URL` by [bashonly](https://github.com/bashonly) +* [extractor/tiktok] Update `_VALID_URL`, add `api_hostname` arg by [bashonly](https://github.com/bashonly) +* [extractor/tiktok] Update API hostname by [redraskal](https://github.com/redraskal) +* [extractor/twitcasting] Fix videos with password by [Spicadox](https://github.com/Spicadox), [bashonly](https://github.com/bashonly) +* [extractor/twitter] Heed `--no-playlist` for multi-video tweets by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) +* [extractor/twitter] Refresh guest token when expired by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) +* [extractor/twitter:spaces] Add `Referer` to m3u8 by [nixxo](https://github.com/nixxo) +* [extractor/udemy] Fix lectures that have no URL and detect DRM +* [extractor/unsupported] Add more URLs +* [extractor/urplay] Support for audio-only formats by [barsnick](https://github.com/barsnick) +* [extractor/wistia] Improve extension detection by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) +* [extractor/yle_areena] Support restricted videos by [docbender](https://github.com/docbender) +* [extractor/youku] Fix extractor by [KurtBestor](https://github.com/KurtBestor) +* [extractor/youporn] Fix metadata by [marieell](https://github.com/marieell) +* [extractor/redgifs] Fix bug in [8c188d5](https://github.com/yt-dlp/yt-dlp/commit/8c188d5d09177ed213a05c900d3523867c5897fd) + + +### 2022.11.11 + +* Merge youtube-dl: Upto [commit/de39d12](https://github.com/ytdl-org/youtube-dl/commit/de39d128) +* Backport SSL configuration from Python 3.10 by [coletdjnz](https://github.com/coletdjnz) +* Do more processing in `--flat-playlist` +* Fix `--list` options not implying `-s` in some cases by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) +* Fix end time of clips by [cruel-efficiency](https://github.com/cruel-efficiency) +* Fix for `formats=None` +* Write API params in debug head +* [outtmpl] Ensure ASCII in json and add option for Unicode +* [SponsorBlock] Add `type` field, obey `--retry-sleep extractor`, relax duration check for large segments +* [SponsorBlock] **Support `chapter` category** by [ajayyy](https://github.com/ajayyy), [pukkandan](https://github.com/pukkandan) +* [ThumbnailsConvertor] Fix filename escaping by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan) +* [ModifyChapters] Handle the entire video being marked for removal +* [embedthumbnail] Fix thumbnail name in mp3 by [How-Bout-No](https://github.com/How-Bout-No) +* [downloader/fragment] HLS download can continue without first fragment +* [cookies] Improve `LenientSimpleCookie` by [Grub4K](https://github.com/Grub4K) +* [jsinterp] Improve separating regex +* [extractor/common] Fix `fatal=False` for `_search_nuxt_data` +* [extractor/common] Improve `_generic_title` +* [extractor/common] Fix `json_ld` type checks by [Grub4K](https://github.com/Grub4K) +* [extractor/generic] Separate embed extraction into own function +* [extractor/generic:quoted-html] Add extractor by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +* [extractor/unsupported] Raise error on known DRM-only sites by [coletdjnz](https://github.com/coletdjnz) +* [utils] `js_to_json`: Improve escape handling by [Grub4K](https://github.com/Grub4K) +* [utils] `strftime_or_none`: Workaround Python bug on Windows +* [utils] `traverse_obj`: Always return list when branching, allow `re.Match` objects by [Grub4K](https://github.com/Grub4K) +* [build, test] Harden workflows' security by [sashashura](https://github.com/sashashura) +* [build] `py2exe`: Migrate to freeze API by [SG5](https://github.com/SG5), [pukkandan](https://github.com/pukkandan) +* [build] Create `armv7l` and `aarch64` releases by [MrOctopus](https://github.com/MrOctopus), [pukkandan](https://github.com/pukkandan) +* [build] Make linux binary truly standalone using `conda` by [mlampe](https://github.com/mlampe) +* [build] Replace `set-output` with `GITHUB_OUTPUT` by [Lesmiscore](https://github.com/Lesmiscore) +* [update] Use error code `100` for update errors +* [compat] Fix `shutils.move` in restricted ACL mode on BSD by [ClosedPort22](https://github.com/ClosedPort22), [pukkandan](https://github.com/pukkandan) +* [docs, devscripts] Document `pyinst`'s argument passthrough by [jahway603](https://github.com/jahway603) +* [test] Allow `extract_flat` in download tests by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +* [cleanup] Misc fixes and cleanup by [pukkandan](https://github.com/pukkandan), [Alienmaster](https://github.com/Alienmaster) +* [extractor/aeon] Add extractor by [DoubleCouponDay](https://github.com/DoubleCouponDay) +* [extractor/agora] Add extractors by [selfisekai](https://github.com/selfisekai) +* [extractor/camsoda] Add extractor by [zulaport](https://github.com/zulaport) +* [extractor/cinetecamilano] Add extractor by [timendum](https://github.com/timendum) +* [extractor/deuxm] Add extractors by [CrankDatSouljaBoy](https://github.com/CrankDatSouljaBoy) +* [extractor/genius] Add extractors by [bashonly](https://github.com/bashonly) +* [extractor/japandiet] Add extractors by [Lesmiscore](https://github.com/Lesmiscore) +* [extractor/listennotes] Add extractor by [lksj](https://github.com/lksj), [pukkandan](https://github.com/pukkandan) +* [extractor/nos.nl] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/oftv] Add extractors by [DoubleCouponDay](https://github.com/DoubleCouponDay) +* [extractor/podbayfm] Add extractor by [schnusch](https://github.com/schnusch) +* [extractor/qingting] Add extractor by [bashonly](https://github.com/bashonly), [changren-wcr](https://github.com/changren-wcr) +* [extractor/screen9] Add extractor by [tpikonen](https://github.com/tpikonen) +* [extractor/swearnet] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/YleAreena] Add extractor by [pukkandan](https://github.com/pukkandan), [vitkhab](https://github.com/vitkhab) +* [extractor/zeenews] Add extractor by [m4tu4g](https://github.com/m4tu4g), [pukkandan](https://github.com/pukkandan) +* [extractor/youtube:tab] **Update tab handling for redesign** by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) + * Channel URLs download all uploads of the channel as multiple playlists, separated by tab +* [extractor/youtube] Differentiate between no comments and disabled comments by [coletdjnz](https://github.com/coletdjnz) +* [extractor/youtube] Extract `concurrent_view_count` for livestreams by [coletdjnz](https://github.com/coletdjnz) +* [extractor/youtube] Fix `duration` for premieres by [nosoop](https://github.com/nosoop) +* [extractor/youtube] Fix `live_status` by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +* [extractor/youtube] Ignore incomplete data error for comment replies by [coletdjnz](https://github.com/coletdjnz) +* [extractor/youtube] Improve chapter parsing from description +* [extractor/youtube] Mark videos as fully watched by [bsun0000](https://github.com/bsun0000) +* [extractor/youtube] Update piped instances by [Generator](https://github.com/Generator) +* [extractor/youtube] Update playlist metadata extraction for new layout by [coletdjnz](https://github.com/coletdjnz) +* [extractor/youtube:tab] Fix video metadata from tabs by [coletdjnz](https://github.com/coletdjnz) +* [extractor/youtube:tab] Let `approximate_date` return timestamp +* [extractor/americastestkitchen] Fix extractor by [bashonly](https://github.com/bashonly) +* [extractor/bbc] Support onion domains by [DoubleCouponDay](https://github.com/DoubleCouponDay) +* [extractor/bilibili] Add chapters and misc cleanup by [lockmatrix](https://github.com/lockmatrix), [pukkandan](https://github.com/pukkandan) +* [extractor/bilibili] Fix BilibiliIE and Bangumi extractors by [lockmatrix](https://github.com/lockmatrix), [pukkandan](https://github.com/pukkandan) +* [extractor/bitchute] Better error for geo-restricted videos by [flashdagger](https://github.com/flashdagger) +* [extractor/bitchute] Improve `BitChuteChannelIE` by [flashdagger](https://github.com/flashdagger), [pukkandan](https://github.com/pukkandan) +* [extractor/bitchute] Simplify extractor by [flashdagger](https://github.com/flashdagger), [pukkandan](https://github.com/pukkandan) +* [extractor/cda] Support login through API by [selfisekai](https://github.com/selfisekai) +* [extractor/crunchyroll] Beta is now the only layout by [tejing1](https://github.com/tejing1) +* [extractor/detik] Avoid unnecessary extraction +* [extractor/doodstream] Remove extractor +* [extractor/dplay] Add MotorTrendOnDemand extractor by [bashonly](https://github.com/bashonly) +* [extractor/epoch] Support videos without data-trailer by [gibson042](https://github.com/gibson042), [pukkandan](https://github.com/pukkandan) +* [extractor/fox] Extract thumbnail by [vitkhab](https://github.com/vitkhab) +* [extractor/foxnews] Add `FoxNewsVideo` extractor +* [extractor/hotstar] Add season support by [m4tu4g](https://github.com/m4tu4g) +* [extractor/hotstar] Refactor v1 API calls +* [extractor/iprima] Make json+ld non-fatal by [bashonly](https://github.com/bashonly) +* [extractor/iq] Increase phantomjs timeout +* [extractor/kaltura] Support playlists by [jwoglom](https://github.com/jwoglom), [pukkandan](https://github.com/pukkandan) +* [extractor/lbry] Authenticate with cookies by [flashdagger](https://github.com/flashdagger) +* [extractor/livestreamfails] Support posts by [invertico](https://github.com/invertico) +* [extractor/mlb] Add `MLBArticle` extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/mxplayer] Improve extractor by [m4tu4g](https://github.com/m4tu4g) +* [extractor/niconico] Always use HTTPS for requests +* [extractor/nzherald] Support new video embed by [coletdjnz](https://github.com/coletdjnz) +* [extractor/odnoklassniki] Support boosty.to embeds by [Lesmiscore](https://github.com/Lesmiscore), [megapro17](https://github.com/megapro17), [pukkandan](https://github.com/pukkandan) +* [extractor/paramountplus] Update API token by [bashonly](https://github.com/bashonly) +* [extractor/reddit] Add fallback format by [bashonly](https://github.com/bashonly) +* [extractor/redgifs] Fix extractors by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) +* [extractor/redgifs] Refresh auth token for 401 by [endotronic](https://github.com/endotronic), [pukkandan](https://github.com/pukkandan) +* [extractor/rumble] Add HLS formats and extract more metadata by [flashdagger](https://github.com/flashdagger) +* [extractor/sbs] Improve `_VALID_URL` by [bashonly](https://github.com/bashonly) +* [extractor/skyit] Fix extractors by [nixxo](https://github.com/nixxo) +* [extractor/stripchat] Fix hostname for HLS stream by [zulaport](https://github.com/zulaport) +* [extractor/stripchat] Improve error message by [freezboltz](https://github.com/freezboltz) +* [extractor/telegram] Add playlist support and more metadata by [bashonly](https://github.com/bashonly), [bsun0000](https://github.com/bsun0000) +* [extractor/Tnaflix] Fix for HTTP 500 by [SG5](https://github.com/SG5), [pukkandan](https://github.com/pukkandan) +* [extractor/tubitv] Better DRM detection by [bashonly](https://github.com/bashonly) +* [extractor/tvp] Update extractors by [selfisekai](https://github.com/selfisekai) +* [extractor/twitcasting] Fix `data-movie-playlist` extraction by [Lesmiscore](https://github.com/Lesmiscore) +* [extractor/twitter] Add onion site to `_VALID_URL` by [DoubleCouponDay](https://github.com/DoubleCouponDay) +* [extractor/twitter] Add Spaces extractor and GraphQL API by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly), [nixxo](https://github.com/nixxo), [pukkandan](https://github.com/pukkandan) +* [extractor/twitter] Support multi-video posts by [Grub4K](https://github.com/Grub4K) +* [extractor/uktvplay] Fix `_VALID_URL` +* [extractor/viu] Support subtitles of on-screen text by [tkgmomosheep](https://github.com/tkgmomosheep) +* [extractor/VK] Fix playlist URLs by [the-marenga](https://github.com/the-marenga) +* [extractor/vlive] Extract `release_timestamp` +* [extractor/voot] Improve `_VALID_URL` by [freezboltz](https://github.com/freezboltz) +* [extractor/wordpress:mb.miniAudioPlayer] Add embed extractor by [coletdjnz](https://github.com/coletdjnz) +* [extractor/YoutubeWebArchive] Improve metadata extraction by [coletdjnz](https://github.com/coletdjnz) +* [extractor/zee5] Improve `_VALID_URL` by [m4tu4g](https://github.com/m4tu4g) +* [extractor/zenyandex] Fix extractors by [lksj](https://github.com/lksj), [puc9](https://github.com/puc9), [pukkandan](https://github.com/pukkandan) + + +### 2022.10.04 + +* Allow a `set` to be passed as `download_archive` by [pukkandan](https://github.com/pukkandan), [bashonly](https://github.com/bashonly) +* Allow open ranges for time ranges by [Lesmiscore](https://github.com/Lesmiscore) +* Allow plugin extractors to replace the built-in ones +* Don't download entire video when no matching `--download-sections` +* Fix `--config-location -` +* Improve [5736d79](https://github.com/yt-dlp/yt-dlp/pull/5044/commits/5736d79172c47ff84740d5720467370a560febad) +* Fix for when playlists don't have `webpage_url` +* Support environment variables in `--ffmpeg-location` +* Workaround `libc_ver` not be available on Windows Store version of Python +* [outtmpl] Curly braces to filter keys by [pukkandan](https://github.com/pukkandan) +* [outtmpl] Make `%s` work in strfformat for all systems +* [jsinterp] Workaround operator associativity issue +* [cookies] Let `_get_mac_keyring_password` fail gracefully +* [cookies] Parse cookies leniently by [Grub4K](https://github.com/Grub4K) +* [phantomjs] Fix bug in [587021c](https://github.com/yt-dlp/yt-dlp/commit/587021cd9f717181b44e881941aca3f8d753758b) by [elyse0](https://github.com/elyse0) +* [downloader/aria2c] Fix filename containing leading whitespace by [std-move](https://github.com/std-move) +* [downloader/ism] Support ec-3 codec by [nixxo](https://github.com/nixxo) +* [extractor] Fix `fatal=False` in `RetryManager` +* [extractor] Improve json-ld extraction +* [extractor] Make `_search_json` able to parse lists +* [extractor] Escape `%` in `representation_id` of m3u8 +* [extractor/generic] Pass through referer from json-ld +* [utils] `base_url`: URL paths can contain `&` by [elyse0](https://github.com/elyse0) +* [utils] `js_to_json`: Improve +* [utils] `Popen.run`: Fix default return in binary mode +* [utils] `traverse_obj`: Rewrite, document and add tests by [Grub4K](https://github.com/Grub4K) +* [devscripts] `make_lazy_extractors`: Fix for Docker by [josanabr](https://github.com/josanabr) +* [docs] Misc Improvements +* [cleanup] Misc fixes and cleanup by [pukkandan](https://github.com/pukkandan), [gamer191](https://github.com/gamer191) +* [extractor/24tv.ua] Add extractors by [coletdjnz](https://github.com/coletdjnz) +* [extractor/BerufeTV] Add extractor by [Fabi019](https://github.com/Fabi019) +* [extractor/booyah] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [elyse0](https://github.com/elyse0) +* [extractor/bundesliga] Add extractor by [Fabi019](https://github.com/Fabi019) +* [extractor/GoPlay] Add extractor by [CNugteren](https://github.com/CNugteren), [basrieter](https://github.com/basrieter), [jeroenj](https://github.com/jeroenj) +* [extractor/iltalehti] Add extractor by [tpikonen](https://github.com/tpikonen) +* [extractor/IsraelNationalNews] Add extractor by [Bobscorn](https://github.com/Bobscorn) +* [extractor/mediaworksnzvod] Add extractor by [coletdjnz](https://github.com/coletdjnz) +* [extractor/MicrosoftEmbed] Add extractor by [DoubleCouponDay](https://github.com/DoubleCouponDay) +* [extractor/nbc] Add NBCStations extractor by [bashonly](https://github.com/bashonly) +* [extractor/onenewsnz] Add extractor by [coletdjnz](https://github.com/coletdjnz) +* [extractor/prankcast] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [columndeeply](https://github.com/columndeeply) +* [extractor/Smotrim] Add extractor by [Lesmiscore](https://github.com/Lesmiscore), [nikita-moor](https://github.com/nikita-moor) +* [extractor/tencent] Add Iflix extractor by [elyse0](https://github.com/elyse0) +* [extractor/unscripted] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/adobepass] Add MSO AlticeOne (Optimum TV) by [CplPwnies](https://github.com/CplPwnies) +* [extractor/youtube] **Download `post_live` videos from start** by [Lesmiscore](https://github.com/Lesmiscore), [pukkandan](https://github.com/pukkandan) +* [extractor/youtube] Add support for Shorts audio pivot feed by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +* [extractor/youtube] Detect `lazy-load-for-videos` embeds +* [extractor/youtube] Do not warn on duplicate chapters +* [extractor/youtube] Fix video like count extraction by [coletdjnz](https://github.com/coletdjnz) +* [extractor/youtube] Support changing extraction language by [coletdjnz](https://github.com/coletdjnz) +* [extractor/youtube:tab] Improve continuation items extraction +* [extractor/youtube:tab] Support `reporthistory` page +* [extractor/amazonstore] Fix JSON extraction by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +* [extractor/amazonstore] Retry to avoid captcha page by [Lesmiscore](https://github.com/Lesmiscore) +* [extractor/animeondemand] Remove extractor by [TokyoBlackHole](https://github.com/TokyoBlackHole) +* [extractor/anvato] Fix extractor and refactor by [bashonly](https://github.com/bashonly) +* [extractor/artetv] Remove duplicate stream urls by [Grub4K](https://github.com/Grub4K) +* [extractor/audioboom] Support direct URLs and refactor by [pukkandan](https://github.com/pukkandan), [tpikonen](https://github.com/tpikonen) +* [extractor/bandcamp] Extract `uploader_url` +* [extractor/bilibili] Add space.bilibili extractors by [lockmatrix](https://github.com/lockmatrix) +* [extractor/BilibiliSpace] Fix extractor and better error message by [lockmatrix](https://github.com/lockmatrix) +* [extractor/BiliIntl] Support uppercase lang in `_VALID_URL` by [coletdjnz](https://github.com/coletdjnz) +* [extractor/BiliIntlSeries] Fix `_VALID_URL` +* [extractor/bongacams] Update `_VALID_URL` by [0xGodspeed](https://github.com/0xGodspeed) +* [extractor/crunchyroll:beta] Improve handling of hardsubs by [Grub4K](https://github.com/Grub4K) +* [extractor/detik] Generalize extractors by [HobbyistDev](https://github.com/HobbyistDev), [coletdjnz](https://github.com/coletdjnz) +* [extractor/dplay:italy] Add default authentication by [Timendum](https://github.com/Timendum) +* [extractor/heise] Fix extractor by [coletdjnz](https://github.com/coletdjnz) +* [extractor/holodex] Fix `_VALID_URL` by [LiviaMedeiros](https://github.com/LiviaMedeiros) +* [extractor/hrfensehen] Fix extractor by [snapdgn](https://github.com/snapdgn) +* [extractor/hungama] Add subtitle by [GautamMKGarg](https://github.com/GautamMKGarg), [pukkandan](https://github.com/pukkandan) +* [extractor/instagram] Extract more metadata by [pritam20ps05](https://github.com/pritam20ps05) +* [extractor/JWPlatform] Fix extractor by [coletdjnz](https://github.com/coletdjnz) +* [extractor/malltv] Fix video_id extraction by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/MLBTV] Detect live streams +* [extractor/motorsport] Support native embeds +* [extractor/Mxplayer] Fix extractor by [itachi-19](https://github.com/itachi-19) +* [extractor/nebula] Add nebula.tv by [tannertechnology](https://github.com/tannertechnology) +* [extractor/nfl] Fix extractor by [bashonly](https://github.com/bashonly) +* [extractor/ondemandkorea] Update `jw_config` regex by [julien-hadleyjack](https://github.com/julien-hadleyjack) +* [extractor/paramountplus] Better DRM detection by [bashonly](https://github.com/bashonly) +* [extractor/patreon] Sort formats +* [extractor/rcs] Fix embed extraction by [coletdjnz](https://github.com/coletdjnz) +* [extractor/redgifs] Fix extractor by [jhwgh1968](https://github.com/jhwgh1968) +* [extractor/rutube] Fix `_EMBED_REGEX` by [coletdjnz](https://github.com/coletdjnz) +* [extractor/RUTV] Fix warnings for livestreams by [Lesmiscore](https://github.com/Lesmiscore) +* [extractor/soundcloud:search] More metadata in `--flat-playlist` by [SuperSonicHub1](https://github.com/SuperSonicHub1) +* [extractor/telegraaf] Use mobile GraphQL API endpoint by [coletdjnz](https://github.com/coletdjnz) +* [extractor/tennistv] Fix timestamp by [zenerdi0de](https://github.com/zenerdi0de) +* [extractor/tiktok] Fix TikTokIE by [bashonly](https://github.com/bashonly) +* [extractor/triller] Fix auth token by [bashonly](https://github.com/bashonly) +* [extractor/trovo] Fix extractors by [Mehavoid](https://github.com/Mehavoid) +* [extractor/tv2] Support new url format by [tobi1805](https://github.com/tobi1805) +* [extractor/web.archive:youtube] Fix `_YT_INITIAL_PLAYER_RESPONSE_RE` +* [extractor/wistia] Add support for channels by [coletdjnz](https://github.com/coletdjnz) +* [extractor/wistia] Match IDs in embed URLs by [bashonly](https://github.com/bashonly) +* [extractor/wordpress:playlist] Add generic embed extractor by [coletdjnz](https://github.com/coletdjnz) +* [extractor/yandexvideopreview] Update `_VALID_URL` by [Grub4K](https://github.com/Grub4K) +* [extractor/zee5] Fix `_VALID_URL` by [m4tu4g](https://github.com/m4tu4g) +* [extractor/zee5] Generate device ids by [freezboltz](https://github.com/freezboltz) + ### 2022.09.01 diff --git a/Collaborators.md b/Collaborators.md index 3f24d5c476..71baf5080b 100644 --- a/Collaborators.md +++ b/Collaborators.md @@ -8,6 +8,7 @@ You can also find lists of all [contributors of yt-dlp](CONTRIBUTORS) and [autho ## [pukkandan](https://github.com/pukkandan) [![ko-fi](https://img.shields.io/badge/_-Ko--fi-red.svg?logo=kofi&labelColor=555555&style=for-the-badge)](https://ko-fi.com/pukkandan) +[![gh-sponsor](https://img.shields.io/badge/_-Github-red.svg?logo=github&labelColor=555555&style=for-the-badge)](https://github.com/sponsors/pukkandan) * Owner of the fork @@ -25,8 +26,9 @@ You can also find lists of all [contributors of yt-dlp](CONTRIBUTORS) and [autho ## [coletdjnz](https://github.com/coletdjnz) -[![gh-sponsor](https://img.shields.io/badge/_-Sponsor-red.svg?logo=githubsponsors&labelColor=555555&style=for-the-badge)](https://github.com/sponsors/coletdjnz) +[![gh-sponsor](https://img.shields.io/badge/_-Github-red.svg?logo=github&labelColor=555555&style=for-the-badge)](https://github.com/sponsors/coletdjnz) +* Improved plugin architecture * YouTube improvements including: age-gate bypass, private playlists, multiple-clients (to avoid throttling) and a lot of under-the-hood improvements * Added support for new websites YoutubeWebArchive, MainStreaming, PRX, nzherald, Mediaklikk, StarTV etc * Improved/fixed support for Patreon, panopto, gfycat, itv, pbs, SouthParkDE etc @@ -42,7 +44,7 @@ You can also find lists of all [contributors of yt-dlp](CONTRIBUTORS) and [autho * Improved/fixed support for HiDive, HotStar, Hungama, LBRY, LinkedInLearning, Mxplayer, SonyLiv, TV2, Vimeo, VLive etc -## [Lesmiscore](https://github.com/Lesmiscore) (nao20010128nao) +## [Lesmiscore](https://github.com/Lesmiscore) (nao20010128nao) **Bitcoin**: bc1qfd02r007cutfdjwjmyy9w23rjvtls6ncve7r3s **Monacoin**: mona1q3tf7dzvshrhfe3md379xtvt2n22duhglv5dskr @@ -50,3 +52,20 @@ You can also find lists of all [contributors of yt-dlp](CONTRIBUTORS) and [autho * Download live from start to end for YouTube * Added support for new websites AbemaTV, mildom, PixivSketch, skeb, radiko, voicy, mirrativ, openrec, whowatch, damtomo, 17.live, mixch etc * Improved/fixed support for fc2, YahooJapanNews, tver, iwara etc + + +## [bashonly](https://github.com/bashonly) + +* `--update-to`, automated release, nightly builds +* `--cookies-from-browser` support for Firefox containers +* Added support for new websites Genius, Kick, NBCStations, Triller, VideoKen etc +* Improved/fixed support for Anvato, Brightcove, Instagram, ParamountPlus, Reddit, SlidesLive, TikTok, Twitter, Vimeo etc + + +## [Grub4K](https://github.com/Grub4K) + +[![ko-fi](https://img.shields.io/badge/_-Ko--fi-red.svg?logo=kofi&labelColor=555555&style=for-the-badge)](https://ko-fi.com/Grub4K) [![gh-sponsor](https://img.shields.io/badge/_-Github-red.svg?logo=github&labelColor=555555&style=for-the-badge)](https://github.com/sponsors/Grub4K) + +* `--update-to`, automated release, nightly builds +* Rework internals like `traverse_obj`, various core refactors and bugs fixes +* Helped fix crunchyroll, Twitter, wrestleuniverse, wistia, slideslive etc diff --git a/Makefile b/Makefile index 6cb9e2f57e..d5d47629b9 100644 --- a/Makefile +++ b/Makefile @@ -17,8 +17,8 @@ pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \ clean-test: rm -rf test/testdata/sigs/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \ *.frag.aria2 *.frag.urls *.info.json *.live_chat.json *.meta *.part* *.tmp *.temp *.unknown_video *.ytdl \ - *.3gp *.ape *.ass *.avi *.desktop *.f4v *.flac *.flv *.jpeg *.jpg *.m4a *.mpga *.m4v *.mhtml *.mkv *.mov \ - *.mp3 *.mp4 *.ogg *.opus *.png *.sbv *.srt *.swf *.swp *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp + *.3gp *.ape *.ass *.avi *.desktop *.f4v *.flac *.flv *.gif *.jpeg *.jpg *.m4a *.m4v *.mhtml *.mkv *.mov *.mp3 \ + *.mp4 *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.swf *.swp *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp clean-dist: rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \ yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS .mailmap @@ -74,17 +74,16 @@ offlinetest: codetest $(PYTHON) -m pytest -k "not download" # XXX: This is hard to maintain -CODE_FOLDERS = yt_dlp yt_dlp/downloader yt_dlp/extractor yt_dlp/postprocessor yt_dlp/compat \ - yt_dlp/extractor/anvato_token_generator +CODE_FOLDERS = yt_dlp yt_dlp/downloader yt_dlp/extractor yt_dlp/postprocessor yt_dlp/compat yt_dlp/dependencies yt-dlp: yt_dlp/*.py yt_dlp/*/*.py mkdir -p zip for d in $(CODE_FOLDERS) ; do \ mkdir -p zip/$$d ;\ cp -pPR $$d/*.py zip/$$d/ ;\ done - touch -t 200001010101 zip/yt_dlp/*.py zip/yt_dlp/*/*.py zip/yt_dlp/*/*/*.py + touch -t 200001010101 zip/yt_dlp/*.py zip/yt_dlp/*/*.py mv zip/yt_dlp/__main__.py zip/ - cd zip ; zip -q ../yt-dlp yt_dlp/*.py yt_dlp/*/*.py yt_dlp/*/*/*.py __main__.py + cd zip ; zip -q ../yt-dlp yt_dlp/*.py yt_dlp/*/*.py __main__.py rm -rf zip echo '#!$(PYTHON)' > yt-dlp cat yt-dlp.zip >> yt-dlp diff --git a/README.md b/README.md index 9f331663d5..f28bf8e12d 100644 --- a/README.md +++ b/README.md @@ -3,16 +3,16 @@ [![YT-DLP](https://raw.githubusercontent.com/yt-dlp/yt-dlp/master/.github/banner.svg)](#readme) -[![Release version](https://img.shields.io/github/v/release/yt-dlp/yt-dlp?color=brightgreen&label=Download&style=for-the-badge)](##installation "Installation") +[![Release version](https://img.shields.io/github/v/release/yt-dlp/yt-dlp?color=brightgreen&label=Download&style=for-the-badge)](#installation "Installation") [![PyPi](https://img.shields.io/badge/-PyPi-blue.svg?logo=pypi&labelColor=555555&style=for-the-badge)](https://pypi.org/project/yt-dlp "PyPi") [![Donate](https://img.shields.io/badge/_-Donate-red.svg?logo=githubsponsors&labelColor=555555&style=for-the-badge)](Collaborators.md#collaborators "Donate") [![Matrix](https://img.shields.io/matrix/yt-dlp:matrix.org?color=brightgreen&labelColor=555555&label=&logo=element&style=for-the-badge)](https://matrix.to/#/#yt-dlp:matrix.org "Matrix") [![Discord](https://img.shields.io/discord/807245652072857610?color=blue&labelColor=555555&label=&logo=discord&style=for-the-badge)](https://discord.gg/H5MNcFW63r "Discord") [![Supported Sites](https://img.shields.io/badge/-Supported_Sites-brightgreen.svg?style=for-the-badge)](supportedsites.md "Supported Sites") [![License: Unlicense](https://img.shields.io/badge/-Unlicense-blue.svg?style=for-the-badge)](LICENSE "License") -[![CI Status](https://img.shields.io/github/workflow/status/yt-dlp/yt-dlp/Core%20Tests/master?label=Tests&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/actions "CI Status") +[![CI Status](https://img.shields.io/github/actions/workflow/status/yt-dlp/yt-dlp/core.yml?branch=master&label=Tests&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/actions "CI Status") [![Commits](https://img.shields.io/github/commit-activity/m/yt-dlp/yt-dlp?label=commits&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/commits "Commit History") -[![Last Commit](https://img.shields.io/github/last-commit/yt-dlp/yt-dlp/master?label=&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/commits "Commit History") +[![Last Commit](https://img.shields.io/github/last-commit/yt-dlp/yt-dlp/master?label=&style=for-the-badge&display_timestamp=committer)](https://github.com/yt-dlp/yt-dlp/commits "Commit History") @@ -48,7 +48,9 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t * [SponsorBlock Options](#sponsorblock-options) * [Extractor Options](#extractor-options) * [CONFIGURATION](#configuration) + * [Configuration file encoding](#configuration-file-encoding) * [Authentication with .netrc file](#authentication-with-netrc-file) + * [Notes about environment variables](#notes-about-environment-variables) * [OUTPUT TEMPLATE](#output-template) * [Output template examples](#output-template-examples) * [FORMAT SELECTION](#format-selection) @@ -59,6 +61,8 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t * [Modifying metadata examples](#modifying-metadata-examples) * [EXTRACTOR ARGUMENTS](#extractor-arguments) * [PLUGINS](#plugins) + * [Installing Plugins](#installing-plugins) + * [Developing Plugins](#developing-plugins) * [EMBEDDING YT-DLP](#embedding-yt-dlp) * [Embedding examples](#embedding-examples) * [DEPRECATED OPTIONS](#deprecated-options) @@ -72,13 +76,13 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t # NEW FEATURES -* Merged with **youtube-dl v2021.12.17+ [commit/ed5c44e](https://github.com/ytdl-org/youtube-dl/commit/ed5c44e7b74ac77f87ca5ed6cb5e964a0c6a0678)** and **youtube-dlc v2020.11.11-3+ [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl) +* Merged with **youtube-dl v2021.12.17+ [commit/2dd6c6e](https://github.com/ytdl-org/youtube-dl/commit/2dd6c6e)** ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21)) and **youtube-dlc v2020.11.11-3+ [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl) -* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in youtube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API +* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API * **[Format Sorting](#sorting-formats)**: The default format sorting options have been changed so that higher resolution and better codecs will be now preferred instead of simply using larger bitrate. Furthermore, you can now specify the sort order using `-S`. This allows for much easier format selection than what is possible by simply using `--format` ([examples](#format-selection-examples)) -* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. Note that the NicoNico livestreams are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details. +* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. Note that NicoNico livestreams are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details. * **YouTube improvements**: * Supports Clips, Stories (`ytstories:`), Search (including filters)**\***, YouTube Music Search, Channel-specific search, Search prefixes (`ytsearch:`, `ytsearchdate:`)**\***, Mixes, YouTube Music Albums/Channels ([except self-uploaded music](https://github.com/yt-dlp/yt-dlp/issues/723)), and Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`, `:ytnotif`) @@ -86,9 +90,9 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t * Supports some (but not all) age-gated content without cookies * Download livestreams from the start using `--live-from-start` (*experimental*) * `255kbps` audio is extracted (if available) from YouTube Music when premium cookies are given - * Redirect channel's home URL automatically to `/video` to preserve the old behaviour + * Channel URLs download all uploads of the channel, including shorts and live -* **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE]` +* **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE][::CONTAINER]` * **Download time range**: Videos can be downloaded partially based on either timestamps or chapters using `--download-sections` @@ -110,13 +114,15 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t * **Output template improvements**: Output templates can now have date-time formatting, numeric offsets, object traversal etc. See [output template](#output-template) for details. Even more advanced operations can also be done with the help of `--parse-metadata` and `--replace-in-metadata` -* **Other new options**: Many new options have been added such as `--alias`, `--print`, `--concat-playlist`, `--wait-for-video`, `--retry-sleep`, `--sleep-requests`, `--convert-thumbnails`, `--force-download-archive`, `--force-overwrites`, `--break-on-reject` etc +* **Other new options**: Many new options have been added such as `--alias`, `--print`, `--concat-playlist`, `--wait-for-video`, `--retry-sleep`, `--sleep-requests`, `--convert-thumbnails`, `--force-download-archive`, `--force-overwrites`, `--break-match-filter` etc * **Improvements**: Regex and other operators in `--format`/`--match-filter`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection), merge multi-video/audio, multiple `--config-locations`, `--exec` at different stages, etc * **Plugins**: Extractors and PostProcessors can be loaded from an external file. See [plugins](#plugins) for details -* **Self-updater**: The releases can be updated using `yt-dlp -U` +* **Self updater**: The releases can be updated using `yt-dlp -U`, and downgraded using `--update-to` if required + +* **Nightly builds**: [Automated nightly builds](#update-channels) can be used with `--update-to nightly` See [changelog](Changelog.md) or [commits](https://github.com/yt-dlp/yt-dlp/commits) for the full list of changes @@ -126,6 +132,7 @@ Features marked with a **\*** have been back-ported to youtube-dl Some of yt-dlp's default options are different from that of youtube-dl and youtube-dlc: +* yt-dlp supports only [Python 3.7+](## "Windows 7"), and *may* remove support for more versions as they [become EOL](https://devguide.python.org/versions/#python-release-cycle); while [youtube-dl still supports Python 2.6+ and 3.2+](https://github.com/ytdl-org/youtube-dl/issues/30568#issue-1118238743) * The options `--auto-number` (`-A`), `--title` (`-t`) and `--literal` (`-l`), no longer work. See [removed options](#Removed) for details * `avconv` is not supported as an alternative to `ffmpeg` * yt-dlp stores config files in slightly different locations to youtube-dl. See [CONFIGURATION](#configuration) for a list of correct locations @@ -140,8 +147,8 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu * `playlist_index` behaves differently when used with options like `--playlist-reverse` and `--playlist-items`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details. You can use `--compat-options playlist-index` if you want to keep the earlier behavior * The output of `-F` is listed in a new format. Use `--compat-options list-formats` to revert this * Live chats (if available) are considered as subtitles. Use `--sub-langs all,-live_chat` to download all subtitles except live chat. You can also use `--compat-options no-live-chat` to prevent any live chat/danmaku from downloading -* Youtube channel URLs are automatically redirected to `/video`. Append a `/featured` to the URL to download only the videos in the home page. If the channel does not have a videos tab, we try to download the equivalent `UU` playlist instead. For all other tabs, if the channel does not show the requested tab, an error will be raised. Also, `/live` URLs raise an error if there are no live videos instead of silently downloading the entire channel. You may use `--compat-options no-youtube-channel-redirect` to revert all these redirections -* Unavailable videos are also listed for youtube playlists. Use `--compat-options no-youtube-unavailable-videos` to remove this +* YouTube channel URLs download all uploads of the channel. To download only the videos in a specific tab, pass the tab's URL. If the channel does not show the requested tab, an error will be raised. Also, `/live` URLs raise an error if there are no live videos instead of silently downloading the entire channel. You may use `--compat-options no-youtube-channel-redirect` to revert all these redirections +* Unavailable videos are also listed for YouTube playlists. Use `--compat-options no-youtube-unavailable-videos` to remove this * The upload dates extracted from YouTube are in UTC [when available](https://github.com/yt-dlp/yt-dlp/blob/89e4d86171c7b7c997c77d4714542e0383bf0db0/yt_dlp/extractor/youtube.py#L3898-L3900). Use `--compat-options no-youtube-prefer-utc-upload-date` to prefer the non-UTC upload date. * If `ffmpeg` is used as the downloader, the downloading and merging of formats happen in a single step when possible. Use `--compat-options no-direct-merge` to revert this * Thumbnail embedding in `mp4` is done with mutagen if possible. Use `--compat-options embed-thumbnail-atomicparsley` to force the use of AtomicParsley instead @@ -149,12 +156,15 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu * When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this * `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi` * yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior +* yt-dlp tries to parse the external downloader outputs into the standard progress output if possible (Currently implemented: [~~aria2c~~](https://github.com/yt-dlp/yt-dlp/issues/5931)). You can use `--compat-options no-external-downloader-progress` to get the downloader output as-is For ease of use, a few more compat options are available: * `--compat-options all`: Use all compat options (Do NOT use) * `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams` * `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect` +* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date` +* `--compat-options 2022`: Same as `--compat-options no-external-downloader-progress`. Use this to enable all future compat options # INSTALLATION @@ -173,12 +183,25 @@ You can install yt-dlp using [the binaries](#release-files), [PIP](https://pypi. ## UPDATE -You can use `yt-dlp -U` to update if you are [using the release binaries](#release-files) +You can use `yt-dlp -U` to update if you are using the [release binaries](#release-files) If you [installed with PIP](https://github.com/yt-dlp/yt-dlp/wiki/Installation#with-pip), simply re-run the same command that was used to install the program -For other third-party package managers, see [the wiki](https://github.com/yt-dlp/yt-dlp/wiki/Installation) or refer their documentation +For other third-party package managers, see [the wiki](https://github.com/yt-dlp/yt-dlp/wiki/Installation#third-party-package-managers) or refer their documentation + + +There are currently two release channels for binaries, `stable` and `nightly`. +`stable` is the default channel, and many of its changes have been tested by users of the nightly channel. +The `nightly` channel has releases built after each push to the master branch, and will have the most recent fixes and additions, but also have more risk of regressions. They are available in [their own repo](https://github.com/yt-dlp/yt-dlp-nightly-builds/releases). + +When using `--update`/`-U`, a release binary will only update to its current channel. +This release channel can be changed by using the `--update-to` option. `--update-to` can also be used to upgrade or downgrade to specific tags from a channel. + +Example usage: +* `yt-dlp --update-to nightly` change to `nightly` channel and update to its latest release +* `yt-dlp --update-to stable@2023.02.17` upgrade/downgrade to release to `stable` channel tag `2023.02.17` +* `yt-dlp --update-to 2023.01.06` upgrade/downgrade to tag `2023.01.06` if it exists on the current channel ## RELEASE FILES @@ -199,6 +222,8 @@ File|Description [yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_min.exe)|Windows (Win7 SP1+) standalone x64 binary built with `py2exe`
([Not recommended](#standalone-py2exe-builds-windows)) [yt-dlp_linux](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux)|Linux standalone x64 binary [yt-dlp_linux.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux.zip)|Unpackaged Linux executable (no auto-update) +[yt-dlp_linux_armv7l](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_armv7l)|Linux standalone armv7l (32-bit) binary +[yt-dlp_linux_aarch64](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_aarch64)|Linux standalone aarch64 (64-bit) binary [yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged Windows executable (no auto-update) [yt-dlp_macos.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos.zip)|Unpackaged MacOS (10.15+) executable (no auto-update) [yt-dlp_macos_legacy](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos_legacy)|MacOS (10.9+) standalone x64 executable @@ -209,11 +234,20 @@ File|Description :---|:--- [yt-dlp.tar.gz](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)|Source tarball [SHA2-512SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-512SUMS)|GNU-style SHA512 sums +[SHA2-512SUMS.sig](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-512SUMS.sig)|GPG signature file for SHA512 sums [SHA2-256SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-256SUMS)|GNU-style SHA256 sums +[SHA2-256SUMS.sig](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-256SUMS.sig)|GPG signature file for SHA256 sums + +The public key that can be used to verify the GPG signatures is [available here](https://github.com/yt-dlp/yt-dlp/blob/master/public.key) +Example usage: +``` +curl -L https://github.com/yt-dlp/yt-dlp/raw/master/public.key | gpg --import +gpg --verify SHA2-256SUMS.sig SHA2-256SUMS +gpg --verify SHA2-512SUMS.sig SHA2-512SUMS +``` - -Note: The manpages, shell completion files etc. are available in the [source tarball](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) +**Note**: The manpages, shell completion files etc. are available inside the [source tarball](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) ## DEPENDENCIES Python versions 3.7+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly. @@ -229,8 +263,9 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly * [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging separate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. License [depends on the build](https://www.ffmpeg.org/legal.html) - - **Note**: There are some regressions in newer ffmpeg versions that causes various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds + There are bugs in ffmpeg that causes various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for some of these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds + + **Important**: What you need is ffmpeg *binary*, **NOT** [the python package of the same name](https://pypi.org/project/ffmpeg) ### Networking * [**certifi**](https://github.com/certifi/python-certifi)\* - Provides Mozilla's root certificate bundle. Licensed under [MPLv2](https://github.com/certifi/python-certifi/blob/master/LICENSE) @@ -275,7 +310,9 @@ To build the standalone executable, you must have Python and `pyinstaller` (plus On some systems, you may need to use `py` or `python` instead of `python3`. -Note that pyinstaller with versions below 4.4 [do not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment. +`pyinst.py` accepts any arguments that can be passed to `pyinstaller`, such as `--onefile/-F` or `--onedir/-D`, which is further [documented here](https://pyinstaller.org/en/stable/usage.html#what-to-generate). + +**Note**: Pyinstaller versions below 4.4 [do not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment. **Important**: Running `pyinstaller` directly **without** using `pyinst.py` is **not** officially supported. This may or may not work correctly. @@ -298,11 +335,15 @@ If you wish to build it anyway, install Python and py2exe, and then simply run ` ### Related scripts -* **`devscripts/update-version.py [revision]`** - Update the version number based on current date -* **`devscripts/set-variant.py variant [-M update_message]`** - Set the build variant of the executable +* **`devscripts/update-version.py`** - Update the version number based on current date. +* **`devscripts/set-variant.py`** - Set the build variant of the executable. +* **`devscripts/make_changelog.py`** - Create a markdown changelog using short commit messages and update `CONTRIBUTORS` file. * **`devscripts/make_lazy_extractors.py`** - Create lazy extractors. Running this before building the binaries (any variant) will improve their startup performance. Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS=1` if you wish to forcefully disable lazy extractor loading. -You can also fork the project on github and run your fork's [build workflow](.github/workflows/build.yml) to automatically build a full release +Note: See their `--help` for more info. + +### Forking the project +If you fork the project on GitHub, you can run your fork's [build workflow](.github/workflows/build.yml) to automatically build the selected version(s) as artifacts. Alternatively, you can run the [release workflow](.github/workflows/release.yml) or enable the [nightly workflow](.github/workflows/release-nightly.yml) to create full (pre-)releases. # USAGE AND OPTIONS @@ -318,6 +359,11 @@ You can also fork the project on github and run your fork's [build workflow](.gi --version Print program version and exit -U, --update Update this program to the latest version --no-update Do not check for updates (default) + --update-to [CHANNEL]@[TAG] Upgrade/downgrade to a specific version. + CHANNEL and TAG defaults to "stable" and + "latest" respectively if omitted; See + "UPDATE" for details. Supported channels: + stable, nightly -i, --ignore-errors Ignore download and postprocessing errors. The download will be considered successful even if the postprocessing fails @@ -408,6 +454,8 @@ You can also fork the project on github and run your fork's [build workflow](.gi --source-address IP Client-side IP address to bind to -4, --force-ipv4 Make all connections via IPv4 -6, --force-ipv6 Make all connections via IPv6 + --enable-file-urls Enable file:// URLs. This is disabled by + default for security reasons. ## Geo-restriction: --geo-verification-proxy URL Use this proxy to verify the IP address for @@ -426,23 +474,24 @@ You can also fork the project on github and run your fork's [build workflow](.gi explicitly provided IP block in CIDR notation ## Video Selection: - -I, --playlist-items ITEM_SPEC Comma separated playlist_index of the videos + -I, --playlist-items ITEM_SPEC Comma separated playlist_index of the items to download. You can specify a range using "[START]:[STOP][:STEP]". For backward compatibility, START-STOP is also supported. Use negative indices to count from the right and negative STEP to download in reverse order. E.g. "-I 1:3,7,-5::2" used on a - playlist of size 15 will download the videos + playlist of size 15 will download the items at index 1,2,3,7,11,13,15 - --min-filesize SIZE Do not download any videos smaller than + --min-filesize SIZE Abort download if filesize is smaller than + SIZE, e.g. 50k or 44.6M + --max-filesize SIZE Abort download if filesize is larger than SIZE, e.g. 50k or 44.6M - --max-filesize SIZE Do not download any videos larger than SIZE, - e.g. 50k or 44.6M --date DATE Download only videos uploaded on this date. The date can be "YYYYMMDD" or in the format [now|today|yesterday][-N[day|week|month|year]]. - E.g. --date today-2weeks + E.g. "--date today-2weeks" downloads only + videos uploaded on the same day two weeks ago --datebefore DATE Download only videos uploaded on or before this date. The date formats accepted is the same as --date @@ -469,7 +518,10 @@ You can also fork the project on github and run your fork's [build workflow](.gi dogs" (caseless). Use "--match-filter -" to interactively ask whether to download each video - --no-match-filter Do not use generic video filter (default) + --no-match-filter Do not use any --match-filter (default) + --break-match-filters FILTER Same as "--match-filters" but stops the + download process when a video is rejected + --no-break-match-filters Do not use any --break-match-filters (default) --no-playlist Download only the video, if the URL refers to a video and a playlist --yes-playlist Download the playlist, if the URL refers to @@ -483,11 +535,9 @@ You can also fork the project on github and run your fork's [build workflow](.gi --max-downloads NUMBER Abort after downloading NUMBER files --break-on-existing Stop the download process when encountering a file that is in the archive - --break-on-reject Stop the download process when encountering - a file that has been filtered out - --break-per-input --break-on-existing, --break-on-reject, - --max-downloads, and autonumber resets per - input URL + --break-per-input Alters --max-downloads, --break-on-existing, + --break-match-filter, and autonumber to + reset per input URL --no-break-per-input --break-on-existing and similar options terminates the entire download queue --skip-playlist-after-errors N Number of allowed failures until the rest of @@ -519,8 +569,8 @@ You can also fork the project on github and run your fork's [build workflow](.gi linear=1::2 --retry-sleep fragment:exp=1:20 --skip-unavailable-fragments Skip unavailable fragments for DASH, hlsnative and ISM downloads (default) - (Alias: --no-abort-on-unavailable-fragment) - --abort-on-unavailable-fragment + (Alias: --no-abort-on-unavailable-fragments) + --abort-on-unavailable-fragments Abort download if a fragment is unavailable (Alias: --no-skip-unavailable-fragments) --keep-fragments Keep downloaded fragments on disk after @@ -562,7 +612,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi Needs ffmpeg. This option can be used multiple times to download multiple sections, e.g. --download-sections - "*10:15-15:00" --download-sections "intro" + "*10:15-inf" --download-sections "intro" --downloader [PROTO:]NAME Name or path of the external downloader to use (optionally) prefixed by the protocols (http, ftp, m3u8, dash, rstp, rtmp, mms) to @@ -679,8 +729,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi --cache-dir DIR Location in the filesystem where yt-dlp can store some downloaded information (such as client ids and signatures) permanently. By - default $XDG_CACHE_HOME/yt-dlp or - ~/.cache/yt-dlp + default ${XDG_CACHE_HOME}/yt-dlp --no-cache-dir Disable filesystem caching --rm-cache-dir Delete all filesystem cache files @@ -720,7 +769,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi screen, optionally prefixed with when to print it, separated by a ":". Supported values of "WHEN" are the same as that of - --use-postprocessor, and "video" (default). + --use-postprocessor (default: video). Implies --quiet. Implies --simulate unless --no-simulate or later stages of WHEN are used. This option can be used multiple times @@ -773,7 +822,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi --prefer-insecure Use an unencrypted connection to retrieve information about the video (Currently supported only for YouTube) - --add-header FIELD:VALUE Specify a custom HTTP header and its value, + --add-headers FIELD:VALUE Specify a custom HTTP header and its value, separated by a colon ":". You can use this option multiple times --bidi-workaround Work around terminals that lack @@ -888,11 +937,11 @@ You can also fork the project on github and run your fork's [build workflow](.gi specific bitrate like 128K (default 5) --remux-video FORMAT Remux the video into another container if necessary (currently supported: avi, flv, - mkv, mov, mp4, webm, aac, aiff, alac, flac, - m4a, mka, mp3, ogg, opus, vorbis, wav). If - target container does not support the - video/audio codec, remuxing will fail. You - can specify multiple rules; e.g. + gif, mkv, mov, mp4, webm, aac, aiff, alac, + flac, m4a, mka, mp3, ogg, opus, vorbis, + wav). If target container does not support + the video/audio codec, remuxing will fail. + You can specify multiple rules; e.g. "aac>m4a/mov>mp4/mkv" will remux aac to m4a, mov to mp4 and anything else to mkv --recode-video FORMAT Re-encode the video into another format if @@ -947,13 +996,18 @@ You can also fork the project on github and run your fork's [build workflow](.gi mkv/mka video files --no-embed-info-json Do not embed the infojson as an attachment to the video file - --parse-metadata FROM:TO Parse additional metadata like title/artist + --parse-metadata [WHEN:]FROM:TO + Parse additional metadata like title/artist from other fields; see "MODIFYING METADATA" - for details - --replace-in-metadata FIELDS REGEX REPLACE + for details. Supported values of "WHEN" are + the same as that of --use-postprocessor + (default: pre_process) + --replace-in-metadata [WHEN:]FIELDS REGEX REPLACE Replace text in a metadata field using the given regex. This option can be used - multiple times + multiple times. Supported values of "WHEN" + are the same as that of --use-postprocessor + (default: pre_process) --xattrs Write metadata to the video file's xattrs (using dublin core and xdg standards) --concat-playlist POLICY Concatenate videos in a playlist. One of @@ -974,18 +1028,18 @@ You can also fork the project on github and run your fork's [build workflow](.gi --ffmpeg-location PATH Location of the ffmpeg binary; either the path to the binary or its containing directory --exec [WHEN:]CMD Execute a command, optionally prefixed with - when to execute it (after_move if - unspecified), separated by a ":". Supported - values of "WHEN" are the same as that of - --use-postprocessor. Same syntax as the - output template can be used to pass any - field as arguments to the command. After - download, an additional field "filepath" - that contains the final path of the - downloaded file is also available, and if no - fields are passed, %(filepath)q is appended - to the end of the command. This option can - be used multiple times + when to execute it, separated by a ":". + Supported values of "WHEN" are the same as + that of --use-postprocessor (default: + after_move). Same syntax as the output + template can be used to pass any field as + arguments to the command. After download, an + additional field "filepath" that contains + the final path of the downloaded file is + also available, and if no fields are passed, + %(filepath,_filename|)q is appended to the + end of the command. This option can be used + multiple times --no-exec Remove any previously defined --exec --convert-subs FORMAT Convert the subtitles to another format (currently supported: ass, lrc, srt, vtt) @@ -1023,14 +1077,16 @@ You can also fork the project on github and run your fork's [build workflow](.gi postprocessor is invoked. It can be one of "pre_process" (after video extraction), "after_filter" (after video passes filter), - "before_dl" (before each video download), - "post_process" (after each video download; - default), "after_move" (after moving video - file to it's final locations), "after_video" - (after downloading and processing all - formats of a video), or "playlist" (at end - of playlist). This option can be used - multiple times to add different postprocessors + "video" (after --format; before + --print/--output), "before_dl" (before each + video download), "post_process" (after each + video download; default), "after_move" + (after moving video file to it's final + locations), "after_video" (after downloading + and processing all formats of a video), or + "playlist" (at end of playlist). This option + can be used multiple times to add different + postprocessors ## SponsorBlock Options: Make chapter entries for, or remove various segments (sponsor, @@ -1041,10 +1097,10 @@ Make chapter entries for, or remove various segments (sponsor, for, separated by commas. Available categories are sponsor, intro, outro, selfpromo, preview, filler, interaction, - music_offtopic, poi_highlight, all and - default (=all). You can prefix the category - with a "-" to exclude it. See [1] for - description of the categories. E.g. + music_offtopic, poi_highlight, chapter, all + and default (=all). You can prefix the + category with a "-" to exclude it. See [1] + for description of the categories. E.g. --sponsorblock-mark all,-preview [1] https://wiki.sponsor.ajay.app/w/Segment_Categories --sponsorblock-remove CATS SponsorBlock categories to be removed from @@ -1053,8 +1109,8 @@ Make chapter entries for, or remove various segments (sponsor, remove takes precedence. The syntax and available categories are the same as for --sponsorblock-mark except that "default" - refers to "all,-filler" and poi_highlight is - not available + refers to "all,-filler" and poi_highlight, + chapter are not available --sponsorblock-chapter-title TEMPLATE An output template for the title of the SponsorBlock chapters created by @@ -1079,29 +1135,40 @@ Make chapter entries for, or remove various segments (sponsor, --no-hls-split-discontinuity Do not split HLS playlists to different formats at discontinuities such as ad breaks (default) - --extractor-args KEY:ARGS Pass these arguments to the extractor. See - "EXTRACTOR ARGUMENTS" for details. You can - use this option multiple times to give + --extractor-args IE_KEY:ARGS Pass ARGS arguments to the IE_KEY extractor. + See "EXTRACTOR ARGUMENTS" for details. You + can use this option multiple times to give arguments for different extractors # CONFIGURATION You can configure yt-dlp by placing any supported command line option to a configuration file. The configuration is loaded from the following locations: -1. **Main Configuration**: The file given by `--config-location` -1. **Portable Configuration**: `yt-dlp.conf` in the same directory as the bundled binary. If you are running from source-code (`/yt_dlp/__main__.py`), the root directory is used instead. -1. **Home Configuration**: `yt-dlp.conf` in the home path given by `-P`, or in the current directory if no such path is given +1. **Main Configuration**: + * The file given by `--config-location` +1. **Portable Configuration**: (Recommended for portable installations) + * If using a binary, `yt-dlp.conf` in the same directory as the binary + * If running from source-code, `yt-dlp.conf` in the parent directory of `yt_dlp` +1. **Home Configuration**: + * `yt-dlp.conf` in the home path given by `-P` + * If `-P` is not given, the current directory is searched 1. **User Configuration**: - * `%XDG_CONFIG_HOME%/yt-dlp/config` (recommended on Linux/macOS) - * `%XDG_CONFIG_HOME%/yt-dlp.conf` - * `%APPDATA%/yt-dlp/config` (recommended on Windows) - * `%APPDATA%/yt-dlp/config.txt` + * `${XDG_CONFIG_HOME}/yt-dlp.conf` + * `${XDG_CONFIG_HOME}/yt-dlp/config` (recommended on Linux/macOS) + * `${XDG_CONFIG_HOME}/yt-dlp/config.txt` + * `${APPDATA}/yt-dlp.conf` + * `${APPDATA}/yt-dlp/config` (recommended on Windows) + * `${APPDATA}/yt-dlp/config.txt` * `~/yt-dlp.conf` * `~/yt-dlp.conf.txt` - - `%XDG_CONFIG_HOME%` defaults to `~/.config` if undefined. On windows, `%APPDATA%` generally points to `C:\Users\\AppData\Roaming` and `~` points to `%HOME%` if present, `%USERPROFILE%` (generally `C:\Users\`), or `%HOMEDRIVE%%HOMEPATH%` + * `~/.yt-dlp/config` + * `~/.yt-dlp/config.txt` -1. **System Configuration**: `/etc/yt-dlp.conf` + See also: [Notes about environment variables](#notes-about-environment-variables) +1. **System Configuration**: + * `/etc/yt-dlp.conf` + * `/etc/yt-dlp/config` + * `/etc/yt-dlp/config.txt` E.g. with the following configuration file yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory: ``` @@ -1120,22 +1187,22 @@ E.g. with the following configuration file yt-dlp will always extract the audio, -o ~/YouTube/%(title)s.%(ext)s ``` -Note that options in configuration file are just the same options aka switches used in regular command line calls; thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. +**Note**: Options in configuration file are just the same options aka switches used in regular command line calls; thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. They must also be quoted when necessary as-if it were a UNIX shell. You can use `--ignore-config` if you want to disable all configuration files for a particular yt-dlp run. If `--ignore-config` is found inside any configuration file, no further configuration will be loaded. For example, having the option in the portable configuration file prevents loading of home, user, and system configurations. Additionally, (for backward compatibility) if `--ignore-config` is found inside the system configuration file, the user configuration is not loaded. -### Config file encoding +### Configuration file encoding -The config files are decoded according to the UTF BOM if present, and in the encoding from system locale otherwise. +The configuration files are decoded according to the UTF BOM if present, and in the encoding from system locale otherwise. If you want your file to be decoded differently, add `# coding: ENCODING` to the beginning of the file (e.g. `# coding: shift-jis`). There must be no characters before that, even spaces or BOM. ### Authentication with `.netrc` file -You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every yt-dlp execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in `--netrc-location` and restrict permissions to read/write by only you: +You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every yt-dlp execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per-extractor basis. For that you will need to create a `.netrc` file in `--netrc-location` and restrict permissions to read/write by only you: ``` -touch $HOME/.netrc -chmod a-rwx,u+rw $HOME/.netrc +touch ${HOME}/.netrc +chmod a-rwx,u+rw ${HOME}/.netrc ``` After that you can add credentials for an extractor in the following format, where *extractor* is the name of the extractor in lowercase: ``` @@ -1148,7 +1215,14 @@ machine twitch login my_twitch_account_name password my_twitch_password ``` To activate authentication with the `.netrc` file you should pass `--netrc` to yt-dlp or place it in the [configuration file](#configuration). -The default location of the .netrc file is `$HOME` (`~`) in UNIX. On Windows, it is `%HOME%` if present, `%USERPROFILE%` (generally `C:\Users\`) or `%HOMEDRIVE%%HOMEPATH%` +The default location of the .netrc file is `~` (see below). + +### Notes about environment variables +* Environment variables are normally specified as `${VARIABLE}`/`$VARIABLE` on UNIX and `%VARIABLE%` on Windows; but is always shown as `${VARIABLE}` in this documentation +* yt-dlp also allow using UNIX-style variables on Windows for path-like options; e.g. `--output`, `--config-location` +* If unset, `${XDG_CONFIG_HOME}` defaults to `~/.config` and `${XDG_CACHE_HOME}` to `~/.cache` +* On Windows, `~` points to `${HOME}` if present; or, `${USERPROFILE}` or `${HOMEDRIVE}${HOMEPATH}` otherwise +* On Windows, `${USERPROFILE}` generally points to `C:\Users\` and `${APPDATA}` to `${USERPROFILE}\AppData\Roaming` # OUTPUT TEMPLATE @@ -1172,13 +1246,13 @@ The field names themselves (the part inside the parenthesis) can also have some 1. **Alternatives**: Alternate fields can be specified separated with a `,`. E.g. `%(release_date>%Y,upload_date>%Y|Unknown)s` -1. **Replacement**: A replacement value can specified using a `&` separator. If the field is *not* empty, this replacement value will be used instead of the actual field content. This is done after alternate fields are considered; thus the replacement is used if *any* of the alternative fields is *not* empty. +1. **Replacement**: A replacement value can be specified using a `&` separator. If the field is *not* empty, this replacement value will be used instead of the actual field content. This is done after alternate fields are considered; thus the replacement is used if *any* of the alternative fields is *not* empty. 1. **Default**: A literal default value can be specified for when the field is empty using a `|` separator. This overrides `--output-na-placeholder`. E.g. `%(uploader|Unknown)s` -1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, yt-dlp additionally supports converting to `B` = **B**ytes, `j` = **j**son (flag `#` for pretty-printing), `h` = HTML escaping, `l` = a comma separated **l**ist (flag `#` for `\n` newline-separated), `q` = a string **q**uoted for the terminal (flag `#` to split a list into different arguments), `D` = add **D**ecimal suffixes (e.g. 10M) (flag `#` to use 1024 as factor), and `S` = **S**anitize as filename (flag `#` for restricted) +1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, yt-dlp additionally supports converting to `B` = **B**ytes, `j` = **j**son (flag `#` for pretty-printing, `+` for Unicode), `h` = HTML escaping, `l` = a comma separated **l**ist (flag `#` for `\n` newline-separated), `q` = a string **q**uoted for the terminal (flag `#` to split a list into different arguments), `D` = add **D**ecimal suffixes (e.g. 10M) (flag `#` to use 1024 as factor), and `S` = **S**anitize as filename (flag `#` for restricted) -1. **Unicode normalization**: The format type `U` can be used for NFC [unicode normalization](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize). The alternate form flag (`#`) changes the normalization to NFD and the conversion flag `+` can be used for NFKC/NFKD compatibility equivalence normalization. E.g. `%(title)+.100U` is NFKC +1. **Unicode normalization**: The format type `U` can be used for NFC [Unicode normalization](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize). The alternate form flag (`#`) changes the normalization to NFD and the conversion flag `+` can be used for NFKC/NFKD compatibility equivalence normalization. E.g. `%(title)+.100U` is NFKC To summarize, the general syntax for a field is: ``` @@ -1187,6 +1261,10 @@ To summarize, the general syntax for a field is: Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation` (deprecated), `infojson`, `link`, `pl_thumbnail`, `pl_description`, `pl_infojson`, `chapter`, `pl_video`. E.g. `-o "%(title)s.%(ext)s" -o "thumbnail:%(title)s\%(title)s.%(ext)s"` will put the thumbnails in a folder with the same name as the video. If any of the templates is empty, that type of file will not be written. E.g. `--write-thumbnail -o "thumbnail:"` will write thumbnails only for playlists and not for video. +
+ +**Note**: Due to post-processing (i.e. merging etc.), the actual output filename might differ. Use `--print after_move:filepath` to get the name after all post-processing is complete. + The available fields are: - `id` (string): Video identifier @@ -1213,6 +1291,7 @@ The available fields are: - `duration` (numeric): Length of the video in seconds - `duration_string` (string): Length of the video (HH:mm:ss) - `view_count` (numeric): How many users have watched the video on the platform + - `concurrent_view_count` (numeric): How many users are currently watching the video on the platform. - `like_count` (numeric): Number of positive ratings of the video - `dislike_count` (numeric): Number of negative ratings of the video - `repost_count` (numeric): Number of reposts of the video @@ -1286,7 +1365,7 @@ Available only when using `--download-sections` and for `chapter:` prefix when u Available only when used in `--print`: - `urls` (string): The URLs of all requested formats, one in each line - - `filename` (string): Name of the video file. Note that the actual filename may be different due to post-processing. Use `--exec echo` to get the name after all postprocessing is complete + - `filename` (string): Name of the video file. Note that the [actual filename may differ](#outtmpl-postprocess-note) - `formats_table` (table): The video format table as printed by `--list-formats` - `thumbnails_table` (table): The thumbnail format table as printed by `--list-thumbnails` - `subtitles_table` (table): The subtitle format table as printed by `--list-subs` @@ -1297,14 +1376,15 @@ Available only in `--sponsorblock-chapter-title`: - `start_time` (numeric): Start time of the chapter in seconds - `end_time` (numeric): End time of the chapter in seconds - - `categories` (list): The SponsorBlock categories the chapter belongs to + - `categories` (list): The [SponsorBlock categories](https://wiki.sponsor.ajay.app/w/Types#Category) the chapter belongs to - `category` (string): The smallest SponsorBlock category the chapter belongs to - `category_names` (list): Friendly names of the categories - `name` (string): Friendly name of the smallest category + - `type` (string): The [SponsorBlock action type](https://wiki.sponsor.ajay.app/w/Types#Action_Type) of the chapter Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. E.g. for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `yt-dlp test video` and id `BaW_jenozKc`, this will result in a `yt-dlp test video-BaW_jenozKc.mp4` file created in the current directory. -Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default). +**Note**: Some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default). **Tip**: Look at the `-j` output to identify which fields are available for the particular URL @@ -1399,7 +1479,7 @@ For example, to download the worst quality video-only format you can use `-f wor You can select the n'th best format of a type by using `best.`. For example, `best.2` will select the 2nd best combined format. Similarly, `bv*.3` will select the 3rd best format that contains a video stream. -If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes. Note that formats on the left hand side are preferred; e.g. `-f 22/17/18` will download format 22 if it's available, otherwise it will download format 17 if it's available, otherwise it will download format 18 if it's available, otherwise it will complain that no suitable formats are available for download. +If you want to download multiple videos, and they don't have the same formats available, you can specify the order of preference using slashes. Note that formats on the left hand side are preferred; e.g. `-f 22/17/18` will download format 22 if it's available, otherwise it will download format 17 if it's available, otherwise it will download format 18 if it's available, otherwise it will complain that no suitable formats are available for download. If you want to download several formats of the same video use a comma as a separator, e.g. `-f 22,17,18` will download all these three formats, of course if they are available. Or a more sophisticated example combined with the precedence feature: `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`. @@ -1407,7 +1487,7 @@ You can merge the video and audio of multiple formats into a single file using ` **Deprecation warning**: Since the *below* described behavior is complex and counter-intuitive, this will be removed and multistreams will be enabled by default in the future. A new operator will be instead added to limit formats to single audio/video -Unless `--video-multistreams` is used, all formats with a video stream except the first one are ignored. Similarly, unless `--audio-multistreams` is used, all formats with an audio stream except the first one are ignored. E.g. `-f bestvideo+best+bestaudio --video-multistreams --audio-multistreams` will download and merge all 3 given formats. The resulting file will have 2 video streams and 2 audio streams. But `-f bestvideo+best+bestaudio --no-video-multistreams` will download and merge only `bestvideo` and `bestaudio`. `best` is ignored since another format containing a video stream (`bestvideo`) has already been selected. The order of the formats is therefore important. `-f best+bestaudio --no-audio-multistreams` will download and merge both formats while `-f bestaudio+best --no-audio-multistreams` will ignore `best` and download only `bestaudio`. +Unless `--video-multistreams` is used, all formats with a video stream except the first one are ignored. Similarly, unless `--audio-multistreams` is used, all formats with an audio stream except the first one are ignored. E.g. `-f bestvideo+best+bestaudio --video-multistreams --audio-multistreams` will download and merge all 3 given formats. The resulting file will have 2 video streams and 2 audio streams. But `-f bestvideo+best+bestaudio --no-video-multistreams` will download and merge only `bestvideo` and `bestaudio`. `best` is ignored since another format containing a video stream (`bestvideo`) has already been selected. The order of the formats is therefore important. `-f best+bestaudio --no-audio-multistreams` will download only `best` while `-f bestaudio+best --no-audio-multistreams` will ignore `best` and download only `bestaudio`. ## Filtering Formats @@ -1419,6 +1499,7 @@ The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, ` - `filesize_approx`: An estimate for the number of bytes - `width`: Width of the video, if known - `height`: Height of the video, if known + - `aspect_ratio`: Aspect ratio of the video, if known - `tbr`: Average bitrate of audio and video in KBit/s - `abr`: Average audio bitrate in KBit/s - `vbr`: Average video bitrate in KBit/s @@ -1444,7 +1525,7 @@ Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain). The comparand of a string comparison needs to be quoted with either double or single quotes if it contains spaces or special characters other than `._-`. -Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering. +**Note**: None of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering. Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats. @@ -1456,17 +1537,17 @@ You can change the criteria for being considered the `best` by using `-S` (`--fo The available fields are: - - `hasvid`: Gives priority to formats that has a video stream - - `hasaud`: Gives priority to formats that has a audio stream + - `hasvid`: Gives priority to formats that have a video stream + - `hasaud`: Gives priority to formats that have an audio stream - `ie_pref`: The format preference - `lang`: The language preference - `quality`: The quality of the format - `source`: The preference of the source - `proto`: Protocol used for download (`https`/`ftps` > `http`/`ftp` > `m3u8_native`/`m3u8` > `http_dash_segments`> `websocket_frag` > `mms`/`rtsp` > `f4f`/`f4m`) - `vcodec`: Video Codec (`av01` > `vp9.2` > `vp9` > `h265` > `h264` > `vp8` > `h263` > `theora` > other) - - `acodec`: Audio Codec (`flac`/`alac` > `wav`/`aiff` > `opus` > `vorbis` > `aac` > `mp4a` > `mp3` > `eac3` > `ac3` > `dts` > other) + - `acodec`: Audio Codec (`flac`/`alac` > `wav`/`aiff` > `opus` > `vorbis` > `aac` > `mp4a` > `mp3` > `ac4` > `eac3` > `ac3` > `dts` > other) - `codec`: Equivalent to `vcodec,acodec` - - `vext`: Video Extension (`mp4` > `webm` > `flv` > other). If `--prefer-free-formats` is used, `webm` is preferred. + - `vext`: Video Extension (`mp4` > `mov` > `webm` > `flv` > other). If `--prefer-free-formats` is used, `webm` is preferred. - `aext`: Audio Extension (`m4a` > `aac` > `mp3` > `ogg` > `opus` > `webm` > other). If `--prefer-free-formats` is used, the order changes to `ogg` > `opus` > `webm` > `mp3` > `m4a` > `aac` - `ext`: Equivalent to `vext,aext` - `filesize`: Exact filesize, if known in advance @@ -1542,7 +1623,7 @@ $ yt-dlp -S "+size,+br" $ yt-dlp -f "bv*[ext=mp4]+ba[ext=m4a]/b[ext=mp4] / bv*+ba/b" # Download the best video with the best extension -# (For video, mp4 > webm > flv. For audio, m4a > aac > mp3 ...) +# (For video, mp4 > mov > webm > flv. For audio, m4a > aac > mp3 ...) $ yt-dlp -S "ext" @@ -1625,9 +1706,9 @@ The metadata obtained by the extractors can be modified by using `--parse-metada `--replace-in-metadata FIELDS REGEX REPLACE` is used to replace text in any metadata field using [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax). [Backreferences](https://docs.python.org/3/library/re.html?highlight=backreferences#re.sub) can be used in the replace string for advanced use. -The general syntax of `--parse-metadata FROM:TO` is to give the name of a field or an [output template](#output-template) to extract data from, and the format to interpret it as, separated by a colon `:`. Either a [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax) with named capture groups or a similar syntax to the [output template](#output-template) (only `%(field)s` formatting is supported) can be used for `TO`. The option can be used multiple times to parse and modify various fields. +The general syntax of `--parse-metadata FROM:TO` is to give the name of a field or an [output template](#output-template) to extract data from, and the format to interpret it as, separated by a colon `:`. Either a [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax) with named capture groups, a single field name, or a similar syntax to the [output template](#output-template) (only `%(field)s` formatting is supported) can be used for `TO`. The option can be used multiple times to parse and modify various fields. -Note that any field created by this can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--add-metadata`. +Note that these options preserve their relative order, allowing replacements to be made in parsed fields and viceversa. Also, any field thus created can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--embed-metadata`. This option also has a few special uses: @@ -1673,11 +1754,11 @@ $ yt-dlp --parse-metadata "description:Artist - (?P.+)" $ yt-dlp --parse-metadata "%(series)s S%(season_number)02dE%(episode_number)02d:%(title)s" # Prioritize uploader as the "artist" field in video metadata -$ yt-dlp --parse-metadata "%(uploader|)s:%(meta_artist)s" --add-metadata +$ yt-dlp --parse-metadata "%(uploader|)s:%(meta_artist)s" --embed-metadata # Set "comment" field in video metadata using description instead of webpage_url, # handling multiple lines correctly -$ yt-dlp --parse-metadata "description:(?s)(?P.+)" --add-metadata +$ yt-dlp --parse-metadata "description:(?s)(?P.+)" --embed-metadata # Do not set any "synopsis" in the video metadata $ yt-dlp --parse-metadata ":(?P)" @@ -1694,35 +1775,36 @@ $ yt-dlp --replace-in-metadata "title,uploader" "[ _]" "-" Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=android_embedded,web;include_live_dash" --extractor-args "funimation:version=uncut"` +Note: In CLI, `ARG` can use `-` instead of `_`; e.g. `youtube:player-client"` becomes `youtube:player_client"` + The following extractors use this feature: #### youtube +* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively -* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb` and `tv_embedded` (agegate bypass) with no variants. By default, `android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients. +* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb` and `tv_embedded` (agegate bypass) with no variants. By default, `android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients. * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details -* `include_live_dash`: Include live dash formats even without `--live-from-start` (These formats don't download properly) * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) * `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all` * E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total +* `include_incomplete_formats`: Extract formats that cannot be downloaded completely (live dash and post-live m3u8) * `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others * `innertube_key`: Innertube API key to use for all API requests -* `lang`: Language code to prefer translated metadata of this language (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes #### youtubetab (YouTube playlists, channels, feeds, etc.) * `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details) -* `approximate_date`: Extract approximate `upload_date` in flat-playlist. This may cause date-based filters to be slightly off +* `approximate_date`: Extract approximate `upload_date` and `timestamp` in flat-playlist. This may cause date-based filters to be slightly off + +#### generic +* `fragment_query`: Passthrough any query in mpd/m3u8 manifest URLs to their fragments. Does not apply to ffmpeg #### funimation -* `language`: Languages to extract, e.g. `funimation:language=english,japanese` +* `language`: Audio languages to extract, e.g. `funimation:language=english,japanese` * `version`: The video version to extract - `uncut` or `simulcast` -#### crunchyroll -* `language`: Languages to extract, e.g. `crunchyroll:language=jaJp` -* `hardsub`: Which hard-sub versions to extract, e.g. `crunchyroll:hardsub=None,enUS` - -#### crunchyrollbeta +#### crunchyrollbeta (Crunchyroll) * `format`: Which stream type(s) to extract (default: `adaptive_hls`). Potentially useful values include `adaptive_hls`, `adaptive_dash`, `vo_adaptive_hls`, `vo_adaptive_dash`, `download_hls`, `download_dash`, `multitrack_adaptive_hls_v2` -* `hardsub`: Preference order for which hardsub versions to extract (default: `None` = no hardsubs), e.g. `crunchyrollbeta:hardsub=en-US,None` +* `hardsub`: Preference order for which hardsub versions to extract, or `all` (default: `None` = no hardsubs), e.g. `crunchyrollbeta:hardsub=en-US,None` #### vikichannel * `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers` @@ -1742,33 +1824,88 @@ The following extractors use this feature: * `dr`: dynamic range to ignore - one or more of `sdr`, `hdr10`, `dv` #### tiktok +* `api_hostname`: Hostname to use for mobile API requests, e.g. `api-h2.tiktokv.com` * `app_version`: App version to call mobile APIs with - should be set along with `manifest_app_version`, e.g. `20.2.1` * `manifest_app_version`: Numeric app version to call mobile APIs with, e.g. `221` #### rokfinchannel * `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks` +#### twitter +* `force_graphql`: Force usage of the GraphQL API. By default it will only be used if login cookies are provided -NOTE: These options may be changed/removed in the future without concern for backward compatibility +**Note**: These options may be changed/removed in the future without concern for backward compatibility # PLUGINS -Plugins are loaded from `/ytdlp_plugins//__init__.py`; where `` is the directory of the binary (`/yt-dlp`), or the root directory of the module if you are running directly from source-code (`/yt_dlp/__main__.py`). Plugins are currently not supported for the `pip` version +Note that **all** plugins are imported even if not invoked, and that **there are no checks** performed on plugin code. **Use plugins at your own risk and only if you trust the code!** -Plugins can be of ``s `extractor` or `postprocessor`. Extractor plugins do not need to be enabled from the CLI and are automatically invoked when the input URL is suitable for it. Postprocessor plugins can be invoked using `--use-postprocessor NAME`. +Plugins can be of ``s `extractor` or `postprocessor`. +- Extractor plugins do not need to be enabled from the CLI and are automatically invoked when the input URL is suitable for it. +- Extractor plugins take priority over builtin extractors. +- Postprocessor plugins can be invoked using `--use-postprocessor NAME`. -See [ytdlp_plugins](ytdlp_plugins) for example plugins. -Note that **all** plugins are imported even if not invoked, and that **there are no checks** performed on plugin code. Use plugins at your own risk and only if you trust the code +Plugins are loaded from the namespace packages `yt_dlp_plugins.extractor` and `yt_dlp_plugins.postprocessor`. -If you are a plugin author, add [ytdlp-plugins](https://github.com/topics/ytdlp-plugins) as a topic to your repository for discoverability +In other words, the file structure on the disk looks something like: + + yt_dlp_plugins/ + extractor/ + myplugin.py + postprocessor/ + myplugin.py + +yt-dlp looks for these `yt_dlp_plugins` namespace folders in many locations (see below) and loads in plugins from **all** of them. See the [wiki for some known plugins](https://github.com/yt-dlp/yt-dlp/wiki/Plugins) +## Installing Plugins +Plugins can be installed using various methods and locations. + +1. **Configuration directories**: + Plugin packages (containing a `yt_dlp_plugins` namespace folder) can be dropped into the following standard [configuration locations](#configuration): + * **User Plugins** + * `${XDG_CONFIG_HOME}/yt-dlp/plugins//yt_dlp_plugins/` (recommended on Linux/macOS) + * `${XDG_CONFIG_HOME}/yt-dlp-plugins//yt_dlp_plugins/` + * `${APPDATA}/yt-dlp/plugins//yt_dlp_plugins/` (recommended on Windows) + * `${APPDATA}/yt-dlp-plugins//yt_dlp_plugins/` + * `~/.yt-dlp/plugins//yt_dlp_plugins/` + * `~/yt-dlp-plugins//yt_dlp_plugins/` + * **System Plugins** + * `/etc/yt-dlp/plugins//yt_dlp_plugins/` + * `/etc/yt-dlp-plugins//yt_dlp_plugins/` +2. **Executable location**: Plugin packages can similarly be installed in a `yt-dlp-plugins` directory under the executable location: + * Binary: where `/yt-dlp.exe`, `/yt-dlp-plugins//yt_dlp_plugins/` + * Source: where `/yt_dlp/__main__.py`, `/yt-dlp-plugins//yt_dlp_plugins/` + +3. **pip and other locations in `PYTHONPATH`** + * Plugin packages can be installed and managed using `pip`. See [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for an example. + * Note: plugin files between plugin packages installed with pip must have unique filenames. + * Any path in `PYTHONPATH` is searched in for the `yt_dlp_plugins` namespace folder. + * Note: This does not apply for Pyinstaller/py2exe builds. + + +`.zip`, `.egg` and `.whl` archives containing a `yt_dlp_plugins` namespace folder in their root are also supported as plugin packages. +* e.g. `${XDG_CONFIG_HOME}/yt-dlp/plugins/mypluginpkg.zip` where `mypluginpkg.zip` contains `yt_dlp_plugins//myplugin.py` + +Run yt-dlp with `--verbose` to check if the plugin has been loaded. + +## Developing Plugins + +See the [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) repo for a template plugin package and the [Plugin Development](https://github.com/yt-dlp/yt-dlp/wiki/Plugin-Development) section of the wiki for a plugin development guide. + +All public classes with a name ending in `IE`/`PP` are imported from each file for extractors and postprocessors repectively. This respects underscore prefix (e.g. `_MyBasePluginIE` is private) and `__all__`. Modules can similarly be excluded by prefixing the module name with an underscore (e.g. `_myplugin.py`). + +To replace an existing extractor with a subclass of one, set the `plugin_name` class keyword argument (e.g. `class MyPluginIE(ABuiltInIE, plugin_name='myplugin')` will replace `ABuiltInIE` with `MyPluginIE`). Since the extractor replaces the parent, you should exclude the subclass extractor from being imported separately by making it private using one of the methods described above. + +If you are a plugin author, add [yt-dlp-plugins](https://github.com/topics/yt-dlp-plugins) as a topic to your repository for discoverability. + +See the [Developer Instructions](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions) on how to write and test an extractor. # EMBEDDING YT-DLP @@ -1786,7 +1923,7 @@ with YoutubeDL() as ydl: ydl.download(URLS) ``` -Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L180). +Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L184). **Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the [example below](#extracting-information) @@ -1996,6 +2133,7 @@ While these options are redundant, they are still expected to be used due to the --reject-title REGEX --match-filter "title !~= (?i)REGEX" --min-views COUNT --match-filter "view_count >=? COUNT" --max-views COUNT --match-filter "view_count <=? COUNT" + --break-on-reject Use --break-match-filter --user-agent UA --add-header "User-Agent:UA" --referer URL --add-header "Referer:URL" --playlist-start NUMBER -I NUMBER: diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json new file mode 100644 index 0000000000..e5c9d1aa21 --- /dev/null +++ b/devscripts/changelog_override.json @@ -0,0 +1,12 @@ +[ + { + "action": "add", + "when": "776d1c3f0c9b00399896dd2e40e78e9a43218109", + "short": "[priority] **A new release type has been added!**\n * [`nightly`](https://github.com/yt-dlp/yt-dlp/releases/tag/nightly) builds will be made after each push, containing the latest fixes (but also possibly bugs).\n * When using `--update`/`-U`, a release binary will only update to its current channel (either `stable` or `nightly`).\n * The `--update-to` option has been added allowing the user more control over program upgrades (or downgrades).\n * `--update-to` can change the release channel (`stable`, `nightly`) and also upgrade or downgrade to specific tags.\n * **Usage**: `--update-to CHANNEL`, `--update-to TAG`, `--update-to CHANNEL@TAG`" + }, + { + "action": "add", + "when": "776d1c3f0c9b00399896dd2e40e78e9a43218109", + "short": "[priority] **YouTube throttling fixes!**" + } +] diff --git a/devscripts/changelog_override.schema.json b/devscripts/changelog_override.schema.json new file mode 100644 index 0000000000..9bd747b701 --- /dev/null +++ b/devscripts/changelog_override.schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft/2020-12/schema", + "type": "array", + "uniqueItems": true, + "items": { + "type": "object", + "oneOf": [ + { + "type": "object", + "properties": { + "action": { + "enum": [ + "add" + ] + }, + "when": { + "type": "string", + "pattern": "^([0-9a-f]{40}|\\d{4}\\.\\d{2}\\.\\d{2})$" + }, + "hash": { + "type": "string", + "pattern": "^[0-9a-f]{40}$" + }, + "short": { + "type": "string" + }, + "authors": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "action", + "short" + ] + }, + { + "type": "object", + "properties": { + "action": { + "enum": [ + "remove" + ] + }, + "when": { + "type": "string", + "pattern": "^([0-9a-f]{40}|\\d{4}\\.\\d{2}\\.\\d{2})$" + }, + "hash": { + "type": "string", + "pattern": "^[0-9a-f]{40}$" + } + }, + "required": [ + "action", + "hash" + ] + }, + { + "type": "object", + "properties": { + "action": { + "enum": [ + "change" + ] + }, + "when": { + "type": "string", + "pattern": "^([0-9a-f]{40}|\\d{4}\\.\\d{2}\\.\\d{2})$" + }, + "hash": { + "type": "string", + "pattern": "^[0-9a-f]{40}$" + }, + "short": { + "type": "string" + }, + "authors": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "action", + "hash", + "short", + "authors" + ] + } + ] + } +} diff --git a/devscripts/lazy_load_template.py b/devscripts/lazy_load_template.py index 626b85d620..c8815e01bc 100644 --- a/devscripts/lazy_load_template.py +++ b/devscripts/lazy_load_template.py @@ -10,7 +10,7 @@ from ..utils import ( ) # These bloat the lazy_extractors, so allow them to passthrough silently -ALLOWED_CLASSMETHODS = {'get_testcases', 'extract_from_webpage'} +ALLOWED_CLASSMETHODS = {'extract_from_webpage', 'get_testcases', 'get_webpage_testcases'} _WARNED = False diff --git a/devscripts/make_changelog.py b/devscripts/make_changelog.py new file mode 100644 index 0000000000..b159bc1b9b --- /dev/null +++ b/devscripts/make_changelog.py @@ -0,0 +1,470 @@ +from __future__ import annotations + +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import enum +import itertools +import json +import logging +import re +from collections import defaultdict +from dataclasses import dataclass +from functools import lru_cache +from pathlib import Path + +from devscripts.utils import read_file, run_process, write_file + +BASE_URL = 'https://github.com' +LOCATION_PATH = Path(__file__).parent +HASH_LENGTH = 7 + +logger = logging.getLogger(__name__) + + +class CommitGroup(enum.Enum): + UPSTREAM = None + PRIORITY = 'Important' + CORE = 'Core' + EXTRACTOR = 'Extractor' + DOWNLOADER = 'Downloader' + POSTPROCESSOR = 'Postprocessor' + MISC = 'Misc.' + + @classmethod + @lru_cache + def commit_lookup(cls): + return { + name: group + for group, names in { + cls.PRIORITY: {''}, + cls.UPSTREAM: {'upstream'}, + cls.CORE: { + 'aes', + 'cache', + 'compat_utils', + 'compat', + 'cookies', + 'core', + 'dependencies', + 'jsinterp', + 'outtmpl', + 'plugins', + 'update', + 'utils', + }, + cls.MISC: { + 'build', + 'cleanup', + 'devscripts', + 'docs', + 'misc', + 'test', + }, + cls.EXTRACTOR: {'extractor', 'extractors'}, + cls.DOWNLOADER: {'downloader'}, + cls.POSTPROCESSOR: {'postprocessor'}, + }.items() + for name in names + } + + @classmethod + def get(cls, value): + result = cls.commit_lookup().get(value) + if result: + logger.debug(f'Mapped {value!r} => {result.name}') + return result + + +@dataclass +class Commit: + hash: str | None + short: str + authors: list[str] + + def __str__(self): + result = f'{self.short!r}' + + if self.hash: + result += f' ({self.hash[:HASH_LENGTH]})' + + if self.authors: + authors = ', '.join(self.authors) + result += f' by {authors}' + + return result + + +@dataclass +class CommitInfo: + details: str | None + sub_details: tuple[str, ...] + message: str + issues: list[str] + commit: Commit + fixes: list[Commit] + + def key(self): + return ((self.details or '').lower(), self.sub_details, self.message) + + +class Changelog: + MISC_RE = re.compile(r'(?:^|\b)(?:lint(?:ing)?|misc|format(?:ting)?|fixes)(?:\b|$)', re.IGNORECASE) + + def __init__(self, groups, repo): + self._groups = groups + self._repo = repo + + def __str__(self): + return '\n'.join(self._format_groups(self._groups)).replace('\t', ' ') + + def _format_groups(self, groups): + for item in CommitGroup: + group = groups[item] + if group: + yield self.format_module(item.value, group) + + def format_module(self, name, group): + result = f'\n#### {name} changes\n' if name else '\n' + return result + '\n'.join(self._format_group(group)) + + def _format_group(self, group): + sorted_group = sorted(group, key=CommitInfo.key) + detail_groups = itertools.groupby(sorted_group, lambda item: (item.details or '').lower()) + for _, items in detail_groups: + items = list(items) + details = items[0].details + if not details: + indent = '' + else: + yield f'- {details}' + indent = '\t' + + if details == 'cleanup': + items, cleanup_misc_items = self._filter_cleanup_misc_items(items) + + sub_detail_groups = itertools.groupby(items, lambda item: tuple(map(str.lower, item.sub_details))) + for sub_details, entries in sub_detail_groups: + if not sub_details: + for entry in entries: + yield f'{indent}- {self.format_single_change(entry)}' + continue + + entries = list(entries) + prefix = f'{indent}- {", ".join(entries[0].sub_details)}' + if len(entries) == 1: + yield f'{prefix}: {self.format_single_change(entries[0])}' + continue + + yield prefix + for entry in entries: + yield f'{indent}\t- {self.format_single_change(entry)}' + + if details == 'cleanup' and cleanup_misc_items: + yield from self._format_cleanup_misc_sub_group(cleanup_misc_items) + + def _filter_cleanup_misc_items(self, items): + cleanup_misc_items = defaultdict(list) + non_misc_items = [] + for item in items: + if self.MISC_RE.search(item.message): + cleanup_misc_items[tuple(item.commit.authors)].append(item) + else: + non_misc_items.append(item) + + return non_misc_items, cleanup_misc_items + + def _format_cleanup_misc_sub_group(self, group): + prefix = '\t- Miscellaneous' + if len(group) == 1: + yield f'{prefix}: {next(self._format_cleanup_misc_items(group))}' + return + + yield prefix + for message in self._format_cleanup_misc_items(group): + yield f'\t\t- {message}' + + def _format_cleanup_misc_items(self, group): + for authors, infos in group.items(): + message = ', '.join( + self._format_message_link(None, info.commit.hash) + for info in sorted(infos, key=lambda item: item.commit.hash or '')) + yield f'{message} by {self._format_authors(authors)}' + + def format_single_change(self, info): + message = self._format_message_link(info.message, info.commit.hash) + if info.issues: + message = f'{message} ({self._format_issues(info.issues)})' + + if info.commit.authors: + message = f'{message} by {self._format_authors(info.commit.authors)}' + + if info.fixes: + fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes) + + authors = sorted({author for fix in info.fixes for author in fix.authors}, key=str.casefold) + if authors != info.commit.authors: + fix_message = f'{fix_message} by {self._format_authors(authors)}' + + message = f'{message} (With fixes in {fix_message})' + + return message + + def _format_message_link(self, message, hash): + assert message or hash, 'Improperly defined commit message or override' + message = message if message else hash[:HASH_LENGTH] + return f'[{message}]({self.repo_url}/commit/{hash})' if hash else message + + def _format_issues(self, issues): + return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues) + + @staticmethod + def _format_authors(authors): + return ', '.join(f'[{author}]({BASE_URL}/{author})' for author in authors) + + @property + def repo_url(self): + return f'{BASE_URL}/{self._repo}' + + +class CommitRange: + COMMAND = 'git' + COMMIT_SEPARATOR = '-----' + + AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE) + MESSAGE_RE = re.compile(r''' + (?:\[ + (?P[^\]\/:,]+) + (?:/(?P
[^\]:,]+))? + (?:[:,](?P[^\]]+))? + \]\ )? + (?:(?P`?[^:`]+`?): )? + (?P.+?) + (?:\ \((?P\#\d+(?:,\ \#\d+)*)\))? + ''', re.VERBOSE | re.DOTALL) + EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE) + FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert)\s+([\da-f]{40})') + UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)') + + def __init__(self, start, end, default_author=None): + self._start, self._end = start, end + self._commits, self._fixes = self._get_commits_and_fixes(default_author) + self._commits_added = [] + + def __iter__(self): + return iter(itertools.chain(self._commits.values(), self._commits_added)) + + def __len__(self): + return len(self._commits) + len(self._commits_added) + + def __contains__(self, commit): + if isinstance(commit, Commit): + if not commit.hash: + return False + commit = commit.hash + + return commit in self._commits + + def _get_commits_and_fixes(self, default_author): + result = run_process( + self.COMMAND, 'log', f'--format=%H%n%s%n%b%n{self.COMMIT_SEPARATOR}', + f'{self._start}..{self._end}' if self._start else self._end).stdout + + commits = {} + fixes = defaultdict(list) + lines = iter(result.splitlines(False)) + for i, commit_hash in enumerate(lines): + short = next(lines) + skip = short.startswith('Release ') or short == '[version] update' + + authors = [default_author] if default_author else [] + for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR): + match = self.AUTHOR_INDICATOR_RE.match(line) + if match: + authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold) + + commit = Commit(commit_hash, short, authors) + if skip and (self._start or not i): + logger.debug(f'Skipped commit: {commit}') + continue + elif skip: + logger.debug(f'Reached Release commit, breaking: {commit}') + break + + fix_match = self.FIXES_RE.search(commit.short) + if fix_match: + commitish = fix_match.group(1) + fixes[commitish].append(commit) + + commits[commit.hash] = commit + + for commitish, fix_commits in fixes.items(): + if commitish in commits: + hashes = ', '.join(commit.hash[:HASH_LENGTH] for commit in fix_commits) + logger.info(f'Found fix(es) for {commitish[:HASH_LENGTH]}: {hashes}') + for fix_commit in fix_commits: + del commits[fix_commit.hash] + else: + logger.debug(f'Commit with fixes not in changes: {commitish[:HASH_LENGTH]}') + + return commits, fixes + + def apply_overrides(self, overrides): + for override in overrides: + when = override.get('when') + if when and when not in self and when != self._start: + logger.debug(f'Ignored {when!r}, not in commits {self._start!r}') + continue + + override_hash = override.get('hash') + if override['action'] == 'add': + commit = Commit(override.get('hash'), override['short'], override.get('authors') or []) + logger.info(f'ADD {commit}') + self._commits_added.append(commit) + + elif override['action'] == 'remove': + if override_hash in self._commits: + logger.info(f'REMOVE {self._commits[override_hash]}') + del self._commits[override_hash] + + elif override['action'] == 'change': + if override_hash not in self._commits: + continue + commit = Commit(override_hash, override['short'], override['authors']) + logger.info(f'CHANGE {self._commits[commit.hash]} -> {commit}') + self._commits[commit.hash] = commit + + self._commits = {key: value for key, value in reversed(self._commits.items())} + + def groups(self): + groups = defaultdict(list) + for commit in self: + upstream_re = self.UPSTREAM_MERGE_RE.match(commit.short) + if upstream_re: + commit.short = f'[upstream] Merge up to youtube-dl {upstream_re.group(1)}' + + match = self.MESSAGE_RE.fullmatch(commit.short) + if not match: + logger.error(f'Error parsing short commit message: {commit.short!r}') + continue + + prefix, details, sub_details, sub_details_alt, message, issues = match.groups() + group = None + if prefix: + if prefix == 'priority': + prefix, _, details = (details or '').partition('/') + logger.debug(f'Priority: {message!r}') + group = CommitGroup.PRIORITY + + if not details and prefix: + if prefix not in ('core', 'downloader', 'extractor', 'misc', 'postprocessor', 'upstream'): + logger.debug(f'Replaced details with {prefix!r}') + details = prefix or None + + if details == 'common': + details = None + + if details: + details = details.strip() + + else: + group = CommitGroup.CORE + + sub_details = f'{sub_details or ""},{sub_details_alt or ""}'.replace(':', ',') + sub_details = tuple(filter(None, map(str.strip, sub_details.split(',')))) + + issues = [issue.strip()[1:] for issue in issues.split(',')] if issues else [] + + if not group: + group = CommitGroup.get(prefix.lower()) + if not group: + if self.EXTRACTOR_INDICATOR_RE.search(commit.short): + group = CommitGroup.EXTRACTOR + else: + group = CommitGroup.POSTPROCESSOR + logger.warning(f'Failed to map {commit.short!r}, selected {group.name}') + + commit_info = CommitInfo( + details, sub_details, message.strip(), + issues, commit, self._fixes[commit.hash]) + logger.debug(f'Resolved {commit.short!r} to {commit_info!r}') + groups[group].append(commit_info) + + return groups + + +def get_new_contributors(contributors_path, commits): + contributors = set() + if contributors_path.exists(): + for line in read_file(contributors_path).splitlines(): + author, _, _ = line.strip().partition(' (') + authors = author.split('/') + contributors.update(map(str.casefold, authors)) + + new_contributors = set() + for commit in commits: + for author in commit.authors: + author_folded = author.casefold() + if author_folded not in contributors: + contributors.add(author_folded) + new_contributors.add(author) + + return sorted(new_contributors, key=str.casefold) + + +if __name__ == '__main__': + import argparse + + parser = argparse.ArgumentParser( + description='Create a changelog markdown from a git commit range') + parser.add_argument( + 'commitish', default='HEAD', nargs='?', + help='The commitish to create the range from (default: %(default)s)') + parser.add_argument( + '-v', '--verbosity', action='count', default=0, + help='increase verbosity (can be used twice)') + parser.add_argument( + '-c', '--contributors', action='store_true', + help='update CONTRIBUTORS file (default: %(default)s)') + parser.add_argument( + '--contributors-path', type=Path, default=LOCATION_PATH.parent / 'CONTRIBUTORS', + help='path to the CONTRIBUTORS file') + parser.add_argument( + '--no-override', action='store_true', + help='skip override json in commit generation (default: %(default)s)') + parser.add_argument( + '--override-path', type=Path, default=LOCATION_PATH / 'changelog_override.json', + help='path to the changelog_override.json file') + parser.add_argument( + '--default-author', default='pukkandan', + help='the author to use without a author indicator (default: %(default)s)') + parser.add_argument( + '--repo', default='yt-dlp/yt-dlp', + help='the github repository to use for the operations (default: %(default)s)') + args = parser.parse_args() + + logging.basicConfig( + datefmt='%Y-%m-%d %H-%M-%S', format='{asctime} | {levelname:<8} | {message}', + level=logging.WARNING - 10 * args.verbosity, style='{', stream=sys.stderr) + + commits = CommitRange(None, args.commitish, args.default_author) + + if not args.no_override: + if args.override_path.exists(): + overrides = json.loads(read_file(args.override_path)) + commits.apply_overrides(overrides) + else: + logger.warning(f'File {args.override_path.as_posix()} does not exist') + + logger.info(f'Loaded {len(commits)} commits') + + new_contributors = get_new_contributors(args.contributors_path, commits) + if new_contributors: + if args.contributors: + write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a') + logger.info(f'New contributors: {", ".join(new_contributors)}') + + print(Changelog(commits.groups(), args.repo)) diff --git a/devscripts/make_issue_template.py b/devscripts/make_issue_template.py index fd964c6c65..39b95c8da6 100644 --- a/devscripts/make_issue_template.py +++ b/devscripts/make_issue_template.py @@ -24,6 +24,8 @@ VERBOSE_TMPL = ''' options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) required: true + - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" + required: false - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below required: true - type: textarea @@ -58,7 +60,7 @@ NO_SKIP = ''' label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE description: Fill all fields even if you think it is irrelevant for the issue options: - - label: I understand that I will be **blocked** if I remove or skip any mandatory\\* field + - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\\* field required: true '''.strip() diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index 43885331f8..d74ea202f0 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -2,6 +2,7 @@ # Allow direct execution import os +import shutil import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -13,10 +14,17 @@ from devscripts.utils import get_filename_args, read_file, write_file NO_ATTR = object() STATIC_CLASS_PROPERTIES = [ - 'IE_NAME', 'IE_DESC', 'SEARCH_KEY', '_VALID_URL', '_WORKING', '_ENABLED', '_NETRC_MACHINE', 'age_limit' + 'IE_NAME', '_ENABLED', '_VALID_URL', # Used for URL matching + '_WORKING', 'IE_DESC', '_NETRC_MACHINE', 'SEARCH_KEY', # Used for --extractor-descriptions + 'age_limit', # Used for --age-limit (evaluated) + '_RETURN_TYPE', # Accessed in CLI only with instance (evaluated) ] CLASS_METHODS = [ - 'ie_key', 'working', 'description', 'suitable', '_match_valid_url', '_match_id', 'get_temp_id', 'is_suitable' + 'ie_key', 'suitable', '_match_valid_url', # Used for URL matching + 'working', 'get_temp_id', '_match_id', # Accessed just before instance creation + 'description', # Used for --extractor-descriptions + 'is_suitable', # Used for --age-limit + 'supports_login', 'is_single_video', # Accessed in CLI only with instance ] IE_TEMPLATE = ''' class {name}({bases}): @@ -32,8 +40,12 @@ def main(): _ALL_CLASSES = get_all_ies() # Must be before import + import yt_dlp.plugins from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor + # Filter out plugins + _ALL_CLASSES = [cls for cls in _ALL_CLASSES if not cls.__module__.startswith(f'{yt_dlp.plugins.PACKAGE_NAME}.')] + DummyInfoExtractor = type('InfoExtractor', (InfoExtractor,), {'IE_NAME': NO_ATTR}) module_src = '\n'.join(( MODULE_TEMPLATE, @@ -50,12 +62,13 @@ def get_all_ies(): PLUGINS_DIRNAME = 'ytdlp_plugins' BLOCKED_DIRNAME = f'{PLUGINS_DIRNAME}_blocked' if os.path.exists(PLUGINS_DIRNAME): - os.rename(PLUGINS_DIRNAME, BLOCKED_DIRNAME) + # os.rename cannot be used, e.g. in Docker. See https://github.com/yt-dlp/yt-dlp/pull/4958 + shutil.move(PLUGINS_DIRNAME, BLOCKED_DIRNAME) try: from yt_dlp.extractor.extractors import _ALL_CLASSES finally: if os.path.exists(BLOCKED_DIRNAME): - os.rename(BLOCKED_DIRNAME, PLUGINS_DIRNAME) + shutil.move(BLOCKED_DIRNAME, PLUGINS_DIRNAME) return _ALL_CLASSES diff --git a/devscripts/make_readme.py b/devscripts/make_readme.py index fad993a199..2270b31d3b 100755 --- a/devscripts/make_readme.py +++ b/devscripts/make_readme.py @@ -45,33 +45,43 @@ switch_col_width = len(re.search(r'(?m)^\s{5,}', options).group()) delim = f'\n{" " * switch_col_width}' PATCHES = ( - ( # Standardize update message + ( # Standardize `--update` message r'(?m)^( -U, --update\s+).+(\n \s.+)*$', r'\1Update this program to the latest version', ), - ( # Headings + ( # Headings r'(?m)^ (\w.+\n)( (?=\w))?', r'## \1' ), - ( # Do not split URLs + ( # Fixup `--date` formatting + rf'(?m)( --date DATE.+({delim}[^\[]+)*)\[.+({delim}.+)*$', + (rf'\1[now|today|yesterday][-N[day|week|month|year]].{delim}' + f'E.g. "--date today-2weeks" downloads only{delim}' + 'videos uploaded on the same day two weeks ago'), + ), + ( # Do not split URLs rf'({delim[:-1]})? (?Pnicenice', tag='a')), [('nice', 'nice')]) + GET_ELEMENT_BY_TAG_TEST_STRING = ''' random text lorem ipsum

@@ -1863,6 +1954,8 @@ Line 1 vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['m4a']), 'mkv') self.assertEqual(get_compatible_ext( vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['webm']), 'webm') + self.assertEqual(get_compatible_ext( + vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['weba']), 'webm') self.assertEqual(get_compatible_ext( vcodecs=['h264'], acodecs=['mp4a'], vexts=['mov'], aexts=['m4a']), 'mp4') @@ -1874,6 +1967,301 @@ Line 1 self.assertEqual(get_compatible_ext( vcodecs=['av1'], acodecs=['mp4a'], vexts=['webm'], aexts=['m4a'], preferences=('webm', 'mkv')), 'mkv') + def test_traverse_obj(self): + _TEST_DATA = { + 100: 100, + 1.2: 1.2, + 'str': 'str', + 'None': None, + '...': ..., + 'urls': [ + {'index': 0, 'url': 'https://www.example.com/0'}, + {'index': 1, 'url': 'https://www.example.com/1'}, + ], + 'data': ( + {'index': 2}, + {'index': 3}, + ), + 'dict': {}, + } + + # Test base functionality + self.assertEqual(traverse_obj(_TEST_DATA, ('str',)), 'str', + msg='allow tuple path') + self.assertEqual(traverse_obj(_TEST_DATA, ['str']), 'str', + msg='allow list path') + self.assertEqual(traverse_obj(_TEST_DATA, (value for value in ("str",))), 'str', + msg='allow iterable path') + self.assertEqual(traverse_obj(_TEST_DATA, 'str'), 'str', + msg='single items should be treated as a path') + self.assertEqual(traverse_obj(_TEST_DATA, None), _TEST_DATA) + self.assertEqual(traverse_obj(_TEST_DATA, 100), 100) + self.assertEqual(traverse_obj(_TEST_DATA, 1.2), 1.2) + + # Test Ellipsis behavior + self.assertCountEqual(traverse_obj(_TEST_DATA, ...), + (item for item in _TEST_DATA.values() if item not in (None, {})), + msg='`...` should give all non discarded values') + self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, ...)), _TEST_DATA['urls'][0].values(), + msg='`...` selection for dicts should select all values') + self.assertEqual(traverse_obj(_TEST_DATA, (..., ..., 'url')), + ['https://www.example.com/0', 'https://www.example.com/1'], + msg='nested `...` queries should work') + self.assertCountEqual(traverse_obj(_TEST_DATA, (..., ..., 'index')), range(4), + msg='`...` query result should be flattened') + + # Test function as key + self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)), + [_TEST_DATA['urls']], + msg='function as query key should perform a filter based on (key, value)') + self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), {'str'}, + msg='exceptions in the query function should be catched') + if __debug__: + with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'): + traverse_obj(_TEST_DATA, lambda a: ...) + with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'): + traverse_obj(_TEST_DATA, lambda a, b, c: ...) + + # Test set as key (transformation/type, like `expected_type`) + self.assertEqual(traverse_obj(_TEST_DATA, (..., {str.upper}, )), ['STR'], + msg='Function in set should be a transformation') + self.assertEqual(traverse_obj(_TEST_DATA, (..., {str})), ['str'], + msg='Type in set should be a type filter') + self.assertEqual(traverse_obj(_TEST_DATA, {dict}), _TEST_DATA, + msg='A single set should be wrapped into a path') + self.assertEqual(traverse_obj(_TEST_DATA, (..., {str.upper})), ['STR'], + msg='Transformation function should not raise') + self.assertEqual(traverse_obj(_TEST_DATA, (..., {str_or_none})), + [item for item in map(str_or_none, _TEST_DATA.values()) if item is not None], + msg='Function in set should be a transformation') + if __debug__: + with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'): + traverse_obj(_TEST_DATA, set()) + with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'): + traverse_obj(_TEST_DATA, {str.upper, str}) + + # Test alternative paths + self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str', + msg='multiple `paths` should be treated as alternative paths') + self.assertEqual(traverse_obj(_TEST_DATA, 'str', 100), 'str', + msg='alternatives should exit early') + self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'fail'), None, + msg='alternatives should return `default` if exhausted') + self.assertEqual(traverse_obj(_TEST_DATA, (..., 'fail'), 100), 100, + msg='alternatives should track their own branching return') + self.assertEqual(traverse_obj(_TEST_DATA, ('dict', ...), ('data', ...)), list(_TEST_DATA['data']), + msg='alternatives on empty objects should search further') + + # Test branch and path nesting + self.assertEqual(traverse_obj(_TEST_DATA, ('urls', (3, 0), 'url')), ['https://www.example.com/0'], + msg='tuple as key should be treated as branches') + self.assertEqual(traverse_obj(_TEST_DATA, ('urls', [3, 0], 'url')), ['https://www.example.com/0'], + msg='list as key should be treated as branches') + self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ((1, 'fail'), (0, 'url')))), ['https://www.example.com/0'], + msg='double nesting in path should be treated as paths') + self.assertEqual(traverse_obj(['0', [1, 2]], [(0, 1), 0]), [1], + msg='do not fail early on branching') + self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', ((1, ('fail', 'url')), (0, 'url')))), + ['https://www.example.com/0', 'https://www.example.com/1'], + msg='tripple nesting in path should be treated as branches') + self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ('fail', (..., 'url')))), + ['https://www.example.com/0', 'https://www.example.com/1'], + msg='ellipsis as branch path start gets flattened') + + # Test dictionary as key + self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}), {0: 100, 1: 1.2}, + msg='dict key should result in a dict with the same keys') + self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', 0, 'url')}), + {0: 'https://www.example.com/0'}, + msg='dict key should allow paths') + self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', (3, 0), 'url')}), + {0: ['https://www.example.com/0']}, + msg='tuple in dict path should be treated as branches') + self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, 'fail'), (0, 'url')))}), + {0: ['https://www.example.com/0']}, + msg='double nesting in dict path should be treated as paths') + self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, ('fail', 'url')), (0, 'url')))}), + {0: ['https://www.example.com/1', 'https://www.example.com/0']}, + msg='tripple nesting in dict path should be treated as branches') + self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}), {}, + msg='remove `None` values when top level dict key fails') + self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}, default=...), {0: ...}, + msg='use `default` if key fails and `default`') + self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {}, + msg='remove empty values when dict key') + self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=...), {0: ...}, + msg='use `default` when dict key and `default`') + self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}), {}, + msg='remove empty values when nested dict key fails') + self.assertEqual(traverse_obj(None, {0: 'fail'}), {}, + msg='default to dict if pruned') + self.assertEqual(traverse_obj(None, {0: 'fail'}, default=...), {0: ...}, + msg='default to dict if pruned and default is given') + self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}, default=...), {0: {0: ...}}, + msg='use nested `default` when nested dict key fails and `default`') + self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', ...)}), {}, + msg='remove key if branch in dict key not successful') + + # Testing default parameter behavior + _DEFAULT_DATA = {'None': None, 'int': 0, 'list': []} + self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail'), None, + msg='default value should be `None`') + self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', 'fail', default=...), ..., + msg='chained fails should result in default') + self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', 'int'), 0, + msg='should not short cirquit on `None`') + self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', default=1), 1, + msg='invalid dict key should result in `default`') + self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', default=1), 1, + msg='`None` is a deliberate sentinel and should become `default`') + self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', 10)), None, + msg='`IndexError` should result in `default`') + self.assertEqual(traverse_obj(_DEFAULT_DATA, (..., 'fail'), default=1), 1, + msg='if branched but not successful return `default` if defined, not `[]`') + self.assertEqual(traverse_obj(_DEFAULT_DATA, (..., 'fail'), default=None), None, + msg='if branched but not successful return `default` even if `default` is `None`') + self.assertEqual(traverse_obj(_DEFAULT_DATA, (..., 'fail')), [], + msg='if branched but not successful return `[]`, not `default`') + self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', ...)), [], + msg='if branched but object is empty return `[]`, not `default`') + self.assertEqual(traverse_obj(None, ...), [], + msg='if branched but object is `None` return `[]`, not `default`') + self.assertEqual(traverse_obj({0: None}, (0, ...)), [], + msg='if branched but state is `None` return `[]`, not `default`') + + branching_paths = [ + ('fail', ...), + (..., 'fail'), + 100 * ('fail',) + (...,), + (...,) + 100 * ('fail',), + ] + for branching_path in branching_paths: + self.assertEqual(traverse_obj({}, branching_path), [], + msg='if branched but state is `None`, return `[]` (not `default`)') + self.assertEqual(traverse_obj({}, 'fail', branching_path), [], + msg='if branching in last alternative and previous did not match, return `[]` (not `default`)') + self.assertEqual(traverse_obj({0: 'x'}, 0, branching_path), 'x', + msg='if branching in last alternative and previous did match, return single value') + self.assertEqual(traverse_obj({0: 'x'}, branching_path, 0), 'x', + msg='if branching in first alternative and non-branching path does match, return single value') + self.assertEqual(traverse_obj({}, branching_path, 'fail'), None, + msg='if branching in first alternative and non-branching path does not match, return `default`') + + # Testing expected_type behavior + _EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0} + self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=str), + 'str', msg='accept matching `expected_type` type') + self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int), + None, msg='reject non matching `expected_type` type') + self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: str(x)), + '0', msg='transform type using type function') + self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=lambda _: 1 / 0), + None, msg='wrap expected_type fuction in try_call') + self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, ..., expected_type=str), + ['str'], msg='eliminate items that expected_type fails on') + self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}, expected_type=int), + {0: 100}, msg='type as expected_type should filter dict values') + self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2, 2: 'None'}, expected_type=str_or_none), + {0: '100', 1: '1.2'}, msg='function as expected_type should transform dict values') + self.assertEqual(traverse_obj(_TEST_DATA, ({0: 1.2}, 0, {int_or_none}), expected_type=int), + 1, msg='expected_type should not filter non final dict values') + self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 100, 1: 'str'}}, expected_type=int), + {0: {0: 100}}, msg='expected_type should transform deep dict values') + self.assertEqual(traverse_obj(_TEST_DATA, [({0: '...'}, {0: '...'})], expected_type=type(...)), + [{0: ...}, {0: ...}], msg='expected_type should transform branched dict values') + self.assertEqual(traverse_obj({1: {3: 4}}, [(1, 2), 3], expected_type=int), + [4], msg='expected_type regression for type matching in tuple branching') + self.assertEqual(traverse_obj(_TEST_DATA, ['data', ...], expected_type=int), + [], msg='expected_type regression for type matching in dict result') + + # Test get_all behavior + _GET_ALL_DATA = {'key': [0, 1, 2]} + self.assertEqual(traverse_obj(_GET_ALL_DATA, ('key', ...), get_all=False), 0, + msg='if not `get_all`, return only first matching value') + self.assertEqual(traverse_obj(_GET_ALL_DATA, ..., get_all=False), [0, 1, 2], + msg='do not overflatten if not `get_all`') + + # Test casesense behavior + _CASESENSE_DATA = { + 'KeY': 'value0', + 0: { + 'KeY': 'value1', + 0: {'KeY': 'value2'}, + }, + } + self.assertEqual(traverse_obj(_CASESENSE_DATA, 'key'), None, + msg='dict keys should be case sensitive unless `casesense`') + self.assertEqual(traverse_obj(_CASESENSE_DATA, 'keY', + casesense=False), 'value0', + msg='allow non matching key case if `casesense`') + self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ('keY',)), + casesense=False), ['value1'], + msg='allow non matching key case in branch if `casesense`') + self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ((0, 'keY'),)), + casesense=False), ['value2'], + msg='allow non matching key case in branch path if `casesense`') + + # Test traverse_string behavior + _TRAVERSE_STRING_DATA = {'str': 'str', 1.2: 1.2} + self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0)), None, + msg='do not traverse into string if not `traverse_string`') + self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0), + traverse_string=True), 's', + msg='traverse into string if `traverse_string`') + self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, (1.2, 1), + traverse_string=True), '.', + msg='traverse into converted data if `traverse_string`') + self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', ...), + traverse_string=True), 'str', + msg='`...` should result in string (same value) if `traverse_string`') + self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', slice(0, None, 2)), + traverse_string=True), 'sr', + msg='`slice` should result in string if `traverse_string`') + self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == "s"), + traverse_string=True), 'str', + msg='function should result in string if `traverse_string`') + self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)), + traverse_string=True), ['s', 'r'], + msg='branching should result in list if `traverse_string`') + + # Test is_user_input behavior + _IS_USER_INPUT_DATA = {'range8': list(range(8))} + self.assertEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3'), + is_user_input=True), 3, + msg='allow for string indexing if `is_user_input`') + self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3:'), + is_user_input=True), tuple(range(8))[3:], + msg='allow for string slice if `is_user_input`') + self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':4:2'), + is_user_input=True), tuple(range(8))[:4:2], + msg='allow step in string slice if `is_user_input`') + self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':'), + is_user_input=True), range(8), + msg='`:` should be treated as `...` if `is_user_input`') + with self.assertRaises(TypeError, msg='too many params should result in error'): + traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':::'), is_user_input=True) + + # Test re.Match as input obj + mobj = re.fullmatch(r'0(12)(?P3)(4)?', '0123') + self.assertEqual(traverse_obj(mobj, ...), [x for x in mobj.groups() if x is not None], + msg='`...` on a `re.Match` should give its `groups()`') + self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 2)), ['0123', '3'], + msg='function on a `re.Match` should give groupno, value starting at 0') + self.assertEqual(traverse_obj(mobj, 'group'), '3', + msg='str key on a `re.Match` should give group with that name') + self.assertEqual(traverse_obj(mobj, 2), '3', + msg='int key on a `re.Match` should give group with that name') + self.assertEqual(traverse_obj(mobj, 'gRoUp', casesense=False), '3', + msg='str key on a `re.Match` should respect casesense') + self.assertEqual(traverse_obj(mobj, 'fail'), None, + msg='failing str key on a `re.Match` should return `default`') + self.assertEqual(traverse_obj(mobj, 'gRoUpS', casesense=False), None, + msg='failing str key on a `re.Match` should return `default`') + self.assertEqual(traverse_obj(mobj, 8), None, + msg='failing int key on a `re.Match` should return `default`') + self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'], + msg='function on a `re.Match` should give group name as well') + if __name__ == '__main__': unittest.main() diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index c2dd0ac308..b3f323e21e 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -10,6 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import FakeYDL, is_download_test from yt_dlp.extractor import YoutubeIE, YoutubeTabIE +from yt_dlp.utils import ExtractorError @is_download_test @@ -53,6 +54,18 @@ class TestYoutubeLists(unittest.TestCase): self.assertEqual(video['duration'], 10) self.assertEqual(video['uploader'], 'Philipp Hagemeister') + def test_youtube_channel_no_uploads(self): + dl = FakeYDL() + dl.params['extract_flat'] = True + ie = YoutubeTabIE(dl) + # no uploads + with self.assertRaisesRegex(ExtractorError, r'no uploads'): + ie.extract('https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA') + + # no uploads and no UCID given + with self.assertRaisesRegex(ExtractorError, r'no uploads'): + ie.extract('https://www.youtube.com/news') + if __name__ == '__main__': unittest.main() diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index c3dcb4d68f..336e80291f 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -66,6 +66,10 @@ _SIG_TESTS = [ ] _NSIG_TESTS = [ + ( + 'https://www.youtube.com/s/player/7862ca1f/player_ias.vflset/en_US/base.js', + 'X_LCxVDjAavgE5t', 'yxJ1dM6iz5ogUg', + ), ( 'https://www.youtube.com/s/player/9216d1f7/player_ias.vflset/en_US/base.js', 'SLp9F5bwjAdhE9F-', 'gWnb9IK2DJ8Q1w', @@ -130,6 +134,14 @@ _NSIG_TESTS = [ 'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js', 'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ', ), + ( + 'https://www.youtube.com/s/player/7a062b77/player_ias.vflset/en_US/base.js', + 'NRcE3y3mVtm_cV-W', 'VbsCYUATvqlt5w', + ), + ( + 'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js', + 'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A', + ), ] diff --git a/test/testdata/ism/ec-3_test.Manifest b/test/testdata/ism/ec-3_test.Manifest new file mode 100644 index 0000000000..45f95de73f --- /dev/null +++ b/test/testdata/ism/ec-3_test.Manifest @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/test/testdata/yt_dlp_plugins/extractor/_ignore.py b/test/testdata/yt_dlp_plugins/extractor/_ignore.py new file mode 100644 index 0000000000..57faf75bbc --- /dev/null +++ b/test/testdata/yt_dlp_plugins/extractor/_ignore.py @@ -0,0 +1,5 @@ +from yt_dlp.extractor.common import InfoExtractor + + +class IgnorePluginIE(InfoExtractor): + pass diff --git a/test/testdata/yt_dlp_plugins/extractor/ignore.py b/test/testdata/yt_dlp_plugins/extractor/ignore.py new file mode 100644 index 0000000000..816a16aa20 --- /dev/null +++ b/test/testdata/yt_dlp_plugins/extractor/ignore.py @@ -0,0 +1,12 @@ +from yt_dlp.extractor.common import InfoExtractor + + +class IgnoreNotInAllPluginIE(InfoExtractor): + pass + + +class InAllPluginIE(InfoExtractor): + pass + + +__all__ = ['InAllPluginIE'] diff --git a/test/testdata/yt_dlp_plugins/extractor/normal.py b/test/testdata/yt_dlp_plugins/extractor/normal.py new file mode 100644 index 0000000000..b09009bdc6 --- /dev/null +++ b/test/testdata/yt_dlp_plugins/extractor/normal.py @@ -0,0 +1,9 @@ +from yt_dlp.extractor.common import InfoExtractor + + +class NormalPluginIE(InfoExtractor): + pass + + +class _IgnoreUnderscorePluginIE(InfoExtractor): + pass diff --git a/test/testdata/yt_dlp_plugins/postprocessor/normal.py b/test/testdata/yt_dlp_plugins/postprocessor/normal.py new file mode 100644 index 0000000000..315b85a488 --- /dev/null +++ b/test/testdata/yt_dlp_plugins/postprocessor/normal.py @@ -0,0 +1,5 @@ +from yt_dlp.postprocessor.common import PostProcessor + + +class NormalPluginPP(PostProcessor): + pass diff --git a/test/testdata/zipped_plugins/yt_dlp_plugins/extractor/zipped.py b/test/testdata/zipped_plugins/yt_dlp_plugins/extractor/zipped.py new file mode 100644 index 0000000000..01542e0d8d --- /dev/null +++ b/test/testdata/zipped_plugins/yt_dlp_plugins/extractor/zipped.py @@ -0,0 +1,5 @@ +from yt_dlp.extractor.common import InfoExtractor + + +class ZippedPluginIE(InfoExtractor): + pass diff --git a/test/testdata/zipped_plugins/yt_dlp_plugins/postprocessor/zipped.py b/test/testdata/zipped_plugins/yt_dlp_plugins/postprocessor/zipped.py new file mode 100644 index 0000000000..223822bd6f --- /dev/null +++ b/test/testdata/zipped_plugins/yt_dlp_plugins/postprocessor/zipped.py @@ -0,0 +1,5 @@ +from yt_dlp.postprocessor.common import PostProcessor + + +class ZippedPluginPP(PostProcessor): + pass diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 3b6281066b..f701738c96 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -32,7 +32,8 @@ from .extractor import gen_extractor_classes, get_info_extractor from .extractor.common import UnsupportedURLIE from .extractor.openload import PhantomJSwrapper from .minicurses import format_text -from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors +from .plugins import directories as plugin_directories +from .postprocessor import _PLUGIN_CLASSES as plugin_pps from .postprocessor import ( EmbedThumbnailPP, FFmpegFixupDuplicateMoovPP, @@ -67,6 +68,7 @@ from .utils import ( EntryNotInPlaylist, ExistingVideoReached, ExtractorError, + FormatSorter, GeoRestrictedError, HEADRequest, ISO3166Utils, @@ -148,7 +150,7 @@ from .utils import ( write_json_file, write_string, ) -from .version import RELEASE_GIT_HEAD, VARIANT, __version__ +from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__ if compat_os_name == 'nt': import ctypes @@ -298,8 +300,6 @@ class YoutubeDL: Videos already present in the file are not downloaded again. break_on_existing: Stop the download process after attempting to download a file that is in the archive. - break_on_reject: Stop the download process when encountering a video that - has been filtered out. break_per_url: Whether break_on_reject and break_on_existing should act on each input URL as opposed to for the entire queue cookiefile: File name or text stream from where cookies should be read and dumped to @@ -316,6 +316,7 @@ class YoutubeDL: If not provided and the key is encrypted, yt-dlp will ask interactively prefer_insecure: Use HTTP instead of HTTPS to retrieve information. (Only supported by some extractors) + enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons. http_headers: A dictionary of custom headers to be used for all requests proxy: URL of the proxy server to use geo_verification_proxy: URL of the proxy to use for IP address verification @@ -411,6 +412,8 @@ class YoutubeDL: - If it returns None, the video is downloaded. - If it returns utils.NO_DEFAULT, the user is interactively asked whether to download the video. + - Raise utils.DownloadCancelled(msg) to abort remaining + downloads when a video is rejected. match_filter_func in utils.py is one example for this. no_color: Do not emit color codes in output. geo_bypass: Bypass geographic restriction via faking X-Forwarded-For @@ -480,6 +483,9 @@ class YoutubeDL: The following options are deprecated and may be removed in the future: + break_on_reject: Stop the download process when encountering a video that + has been filtered out. + - `raise DownloadCancelled(msg)` in match_filter instead force_generic_extractor: Force downloader to use the generic extractor - Use allowed_extractors = ['generic', 'default'] playliststart: - Use playlist_items @@ -547,11 +553,11 @@ class YoutubeDL: _format_fields = { # NB: Keep in sync with the docstring of extractor/common.py 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note', - 'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels', - 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', + 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels', + 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns', 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'preference', 'language', 'language_preference', 'quality', 'source_preference', - 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options', + 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options', 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time' } _format_selection_exts = { @@ -583,7 +589,6 @@ class YoutubeDL: self._playlist_urls = set() self.cache = Cache(self) - windows_enable_vt_mode() stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout self._out_files = Namespace( out=stdout, @@ -592,6 +597,12 @@ class YoutubeDL: console=None if compat_os_name == 'nt' else next( filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None) ) + + try: + windows_enable_vt_mode() + except Exception as e: + self.write_debug(f'Failed to enable VT mode: {e}') + self._allow_colors = Namespace(**{ type_: not self.params.get('no_color') and supports_terminal_sequences(stream) for type_, stream in self._out_files.items_ if type_ != 'console' @@ -606,7 +617,7 @@ class YoutubeDL: '\n You will no longer receive updates on this version') if current_version < MIN_SUPPORTED: msg = 'Python version %d.%d is no longer supported' - self.deprecation_warning( + self.deprecated_feature( f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED)) if self.params.get('allow_unplayable_formats'): @@ -616,46 +627,6 @@ class YoutubeDL: ' If you experience any issues while using this option, ' f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report') - def check_deprecated(param, option, suggestion): - if self.params.get(param) is not None: - self.report_warning(f'{option} is deprecated. Use {suggestion} instead') - return True - return False - - if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'): - if self.params.get('geo_verification_proxy') is None: - self.params['geo_verification_proxy'] = self.params['cn_verification_proxy'] - - check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"') - check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"') - check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"') - - for msg in self.params.get('_warnings', []): - self.report_warning(msg) - for msg in self.params.get('_deprecation_warnings', []): - self.deprecated_feature(msg) - - self.params['compat_opts'] = set(self.params.get('compat_opts', ())) - if 'list-formats' in self.params['compat_opts']: - self.params['listformats_table'] = False - - if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None: - # nooverwrites was unnecessarily changed to overwrites - # in 0c3d0f51778b153f65c21906031c2e091fcfb641 - # This ensures compatibility with both keys - self.params['overwrites'] = not self.params['nooverwrites'] - elif self.params.get('overwrites') is None: - self.params.pop('overwrites', None) - else: - self.params['nooverwrites'] = not self.params['overwrites'] - - self.params.setdefault('forceprint', {}) - self.params.setdefault('print_to_file', {}) - - # Compatibility with older syntax - if not isinstance(params['forceprint'], dict): - self.params['forceprint'] = {'video': params['forceprint']} - if self.params.get('bidi_workaround', False): try: import pty @@ -676,9 +647,57 @@ class YoutubeDL: else: raise + self.params['compat_opts'] = set(self.params.get('compat_opts', ())) + if auto_init and auto_init != 'no_verbose_header': + self.print_debug_header() + + def check_deprecated(param, option, suggestion): + if self.params.get(param) is not None: + self.report_warning(f'{option} is deprecated. Use {suggestion} instead') + return True + return False + + if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'): + if self.params.get('geo_verification_proxy') is None: + self.params['geo_verification_proxy'] = self.params['cn_verification_proxy'] + + check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"') + check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"') + check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"') + + for msg in self.params.get('_warnings', []): + self.report_warning(msg) + for msg in self.params.get('_deprecation_warnings', []): + self.deprecated_feature(msg) + + if 'list-formats' in self.params['compat_opts']: + self.params['listformats_table'] = False + + if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None: + # nooverwrites was unnecessarily changed to overwrites + # in 0c3d0f51778b153f65c21906031c2e091fcfb641 + # This ensures compatibility with both keys + self.params['overwrites'] = not self.params['nooverwrites'] + elif self.params.get('overwrites') is None: + self.params.pop('overwrites', None) + else: + self.params['nooverwrites'] = not self.params['overwrites'] + + if self.params.get('simulate') is None and any(( + self.params.get('list_thumbnails'), + self.params.get('listformats'), + self.params.get('listsubtitles'), + )): + self.params['simulate'] = 'list_only' + + self.params.setdefault('forceprint', {}) + self.params.setdefault('print_to_file', {}) + + # Compatibility with older syntax + if not isinstance(params['forceprint'], dict): + self.params['forceprint'] = {'video': params['forceprint']} + if auto_init: - if auto_init != 'no_verbose_header': - self.print_debug_header() self.add_default_info_extractors() if (sys.platform != 'win32' @@ -846,7 +865,7 @@ class YoutubeDL: 'Use "YoutubeDL.to_screen" instead') self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out) - def to_screen(self, message, skip_eol=False, quiet=None): + def to_screen(self, message, skip_eol=False, quiet=None, only_once=False): """Print message to screen if not in quiet mode""" if self.params.get('logger'): self.params['logger'].debug(message) @@ -855,7 +874,7 @@ class YoutubeDL: return self._write_string( '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')), - self._out_files.screen) + self._out_files.screen, only_once=only_once) def to_stderr(self, message, only_once=False): """Print message to stderr""" @@ -1059,7 +1078,7 @@ class YoutubeDL: # correspondingly that is not what we want since we need to keep # '%%' intact for template dict substitution step. Working around # with boundary-alike separator hack. - sep = ''.join([random.choice(ascii_letters) for _ in range(32)]) + sep = ''.join(random.choices(ascii_letters, k=32)) outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$') # outtmpl should be expand_path'ed before template dict substitution @@ -1249,7 +1268,7 @@ class YoutubeDL: elif fmt[-1] == 'j': # json value, fmt = json.dumps( value, default=_dumpjson_default, - indent=4 if '#' in flags else None, ensure_ascii=False), str_fmt + indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt elif fmt[-1] == 'h': # html value, fmt = escapeHTML(str(value)), str_fmt elif fmt[-1] == 'q': # quoted @@ -1349,11 +1368,19 @@ class YoutubeDL: return self.get_output_path(dir_type, filename) def _match_entry(self, info_dict, incomplete=False, silent=False): - """ Returns None if the file should be downloaded """ + """Returns None if the file should be downloaded""" + _type = info_dict.get('_type', 'video') + assert incomplete or _type == 'video', 'Only video result can be considered complete' video_title = info_dict.get('title', info_dict.get('id', 'entry')) def check_filter(): + if _type in ('playlist', 'multi_video'): + return + elif _type in ('url', 'url_transparent') and not try_call( + lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])): + return + if 'title' in info_dict: # This can happen when we're just evaluating the playlist title = info_dict['title'] @@ -1365,6 +1392,7 @@ class YoutubeDL: if rejecttitle: if re.search(rejecttitle, title, re.IGNORECASE): return '"' + title + '" title matched reject pattern "' + rejecttitle + '"' + date = info_dict.get('upload_date') if date is not None: dateRange = self.params.get('daterange', DateRange()) @@ -1382,31 +1410,44 @@ class YoutubeDL: return 'Skipping "%s" because it is age restricted' % video_title match_filter = self.params.get('match_filter') - if match_filter is not None: + if match_filter is None: + return None + + cancelled = None + try: try: ret = match_filter(info_dict, incomplete=incomplete) except TypeError: # For backward compatibility ret = None if incomplete else match_filter(info_dict) - if ret is NO_DEFAULT: - while True: - filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME) - reply = input(self._format_screen( - f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip() - if reply in {'y', ''}: - return None - elif reply == 'n': - return f'Skipping {video_title}' - elif ret is not None: - return ret - return None + except DownloadCancelled as err: + if err.msg is not NO_DEFAULT: + raise + ret, cancelled = err.msg, err + + if ret is NO_DEFAULT: + while True: + filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME) + reply = input(self._format_screen( + f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip() + if reply in {'y', ''}: + return None + elif reply == 'n': + if cancelled: + raise type(cancelled)(f'Skipping {video_title}') + return f'Skipping {video_title}' + return ret if self.in_download_archive(info_dict): reason = '%s has already been recorded in the archive' % video_title break_opt, break_err = 'break_on_existing', ExistingVideoReached else: - reason = check_filter() - break_opt, break_err = 'break_on_reject', RejectedVideoReached + try: + reason = check_filter() + except DownloadCancelled as e: + reason, break_opt, break_err = e.msg, 'match_filter', type(e) + else: + break_opt, break_err = 'break_on_reject', RejectedVideoReached if reason is not None: if not silent: self.to_screen('[download] ' + reason) @@ -1608,8 +1649,8 @@ class YoutubeDL: if result_type in ('url', 'url_transparent'): ie_result['url'] = sanitize_url( ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https') - if ie_result.get('original_url'): - extra_info.setdefault('original_url', ie_result['original_url']) + if ie_result.get('original_url') and not extra_info.get('original_url'): + extra_info = {'original_url': ie_result['original_url'], **extra_info} extract_flat = self.params.get('extract_flat', False) if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) @@ -1621,6 +1662,7 @@ class YoutubeDL: self.add_default_extra_info(info_copy, ie, ie_result['url']) self.add_extra_info(info_copy, extra_info) info_copy, _ = self.pre_process(info_copy) + self._fill_common_fields(info_copy, False) self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True) self._raise_pending_errors(info_copy) if self.params.get('force_write_download_archive', False): @@ -1687,8 +1729,8 @@ class YoutubeDL: elif result_type in ('playlist', 'multi_video'): # Protect from infinite recursion due to recursively nested playlists # (see https://github.com/ytdl-org/youtube-dl/issues/27833) - webpage_url = ie_result['webpage_url'] - if webpage_url in self._playlist_urls: + webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url + if webpage_url and webpage_url in self._playlist_urls: self.to_screen( '[download] Skipping already downloaded playlist: %s' % ie_result.get('title') or ie_result.get('id')) @@ -1742,14 +1784,17 @@ class YoutubeDL: } if strict: return info + if ie_result.get('webpage_url'): + info.update({ + 'webpage_url': ie_result['webpage_url'], + 'webpage_url_basename': url_basename(ie_result['webpage_url']), + 'webpage_url_domain': get_domain(ie_result['webpage_url']), + }) return { **info, 'playlist_index': 0, - '__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)), + '__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)), 'extractor': ie_result['extractor'], - 'webpage_url': ie_result['webpage_url'], - 'webpage_url_basename': url_basename(ie_result['webpage_url']), - 'webpage_url_domain': get_domain(ie_result['webpage_url']), 'extractor_key': ie_result['extractor_key'], } @@ -1804,7 +1849,7 @@ class YoutubeDL: elif self.params.get('playlistrandom'): random.shuffle(entries) - self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} videos' + self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items' f'{format_field(ie_result, "playlist_count", " of %s")}') keep_resolved_entries = self.params.get('extract_flat') != 'discard' @@ -1837,14 +1882,13 @@ class YoutubeDL: resolved_entries[i] = (playlist_index, NO_DEFAULT) continue - self.to_screen('[download] Downloading video %s of %s' % ( + self.to_screen('[download] Downloading item %s of %s' % ( self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS))) - extra.update({ + entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({ 'playlist_index': playlist_index, 'playlist_autonumber': i + 1, - }) - entry_result = self.__process_iterable_entry(entry, download, extra) + }, extra)) if not entry_result: failures += 1 if failures >= max_failures: @@ -1855,8 +1899,11 @@ class YoutubeDL: resolved_entries[i] = (playlist_index, entry_result) # Update with processed data - ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT] ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT] + ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT] + if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))): + # Do not set for full playlist + ie_result.pop('requested_entries') # Write the updated info to json if _infojson_written is True and self._write_info_json( @@ -2162,6 +2209,7 @@ class YoutubeDL: 'vcodec': the_only_video.get('vcodec'), 'vbr': the_only_video.get('vbr'), 'stretched_ratio': the_only_video.get('stretched_ratio'), + 'aspect_ratio': the_only_video.get('aspect_ratio'), }) if the_only_audio: @@ -2376,15 +2424,10 @@ class YoutubeDL: else: info_dict['thumbnails'] = thumbnails - def _fill_common_fields(self, info_dict, is_video=True): + def _fill_common_fields(self, info_dict, final=True): # TODO: move sanitization here - if is_video: - # playlists are allowed to lack "title" - title = info_dict.get('title', NO_DEFAULT) - if title is NO_DEFAULT: - raise ExtractorError('Missing "title" field in extractor result', - video_id=info_dict['id'], ie=info_dict['extractor']) - info_dict['fulltitle'] = title + if final: + title = info_dict['fulltitle'] = info_dict.get('title') if not title: if title == '': self.write_debug('Extractor gave empty title. Creating a generic title') @@ -2423,11 +2466,13 @@ class YoutubeDL: for key in live_keys: if info_dict.get(key) is None: info_dict[key] = (live_status == key) + if live_status == 'post_live': + info_dict['was_live'] = True # Auto generate title fields corresponding to the *_number fields when missing # in order to always have clean titles. This is very common for TV series. for field in ('chapter', 'season', 'episode'): - if info_dict.get('%s_number' % field) is not None and not info_dict.get(field): + if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field): info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field]) def _raise_pending_errors(self, info): @@ -2435,6 +2480,11 @@ class YoutubeDL: if err: self.report_error(err, tb=False) + def sort_formats(self, info_dict): + formats = self._get_formats(info_dict) + formats.sort(key=FormatSorter( + self, info_dict.get('_format_sort_fields') or []).calculate_preference) + def process_video_result(self, info_dict, download=True): assert info_dict.get('_type', 'video') == 'video' self._num_videos += 1 @@ -2520,11 +2570,12 @@ class YoutubeDL: info_dict['requested_subtitles'] = self.process_subtitles( info_dict['id'], subtitles, automatic_captions) - if info_dict.get('formats') is None: - # There's only one format available - formats = [info_dict] - else: - formats = info_dict['formats'] + formats = self._get_formats(info_dict) + + # Backward compatibility with InfoExtractor._sort_formats + field_preference = (formats or [{}])[0].pop('__sort_fields', None) + if field_preference: + info_dict['_format_sort_fields'] = field_preference # or None ensures --clean-infojson removes it info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None @@ -2563,22 +2614,43 @@ class YoutubeDL: if not formats: self.raise_no_formats(info_dict) - formats_dict = {} - - # We check that all the formats have the format and format_id fields - for i, format in enumerate(formats): + for format in formats: sanitize_string_field(format, 'format_id') sanitize_numeric_fields(format) format['url'] = sanitize_url(format['url']) + if format.get('ext') is None: + format['ext'] = determine_ext(format['url']).lower() + if format.get('protocol') is None: + format['protocol'] = determine_protocol(format) + if format.get('resolution') is None: + format['resolution'] = self.format_resolution(format, default=None) + if format.get('dynamic_range') is None and format.get('vcodec') != 'none': + format['dynamic_range'] = 'SDR' + if format.get('aspect_ratio') is None: + format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2)) + if (info_dict.get('duration') and format.get('tbr') + and not format.get('filesize') and not format.get('filesize_approx')): + format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8)) + format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict)) + + # This is copied to http_headers by the above _calc_headers and can now be removed + if '__x_forwarded_for_ip' in info_dict: + del info_dict['__x_forwarded_for_ip'] + + self.sort_formats({ + 'formats': formats, + '_format_sort_fields': info_dict.get('_format_sort_fields') + }) + + # Sanitize and group by format_id + formats_dict = {} + for i, format in enumerate(formats): if not format.get('format_id'): format['format_id'] = str(i) else: # Sanitize format_id from characters used in format selector expression format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id']) - format_id = format['format_id'] - if format_id not in formats_dict: - formats_dict[format_id] = [] - formats_dict[format_id].append(format) + formats_dict.setdefault(format['format_id'], []).append(format) # Make sure all formats have unique format_id common_exts = set(itertools.chain(*self._format_selection_exts.values())) @@ -2587,38 +2659,17 @@ class YoutubeDL: for i, format in enumerate(ambiguous_formats): if ambigious_id: format['format_id'] = '%s-%d' % (format_id, i) - if format.get('ext') is None: - format['ext'] = determine_ext(format['url']).lower() # Ensure there is no conflict between id and ext in format selection # See https://github.com/yt-dlp/yt-dlp/issues/1282 if format['format_id'] != format['ext'] and format['format_id'] in common_exts: format['format_id'] = 'f%s' % format['format_id'] - for i, format in enumerate(formats): - if format.get('format') is None: - format['format'] = '{id} - {res}{note}'.format( - id=format['format_id'], - res=self.format_resolution(format), - note=format_field(format, 'format_note', ' (%s)'), - ) - if format.get('protocol') is None: - format['protocol'] = determine_protocol(format) - if format.get('resolution') is None: - format['resolution'] = self.format_resolution(format, default=None) - if format.get('dynamic_range') is None and format.get('vcodec') != 'none': - format['dynamic_range'] = 'SDR' - if (info_dict.get('duration') and format.get('tbr') - and not format.get('filesize') and not format.get('filesize_approx')): - format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8)) - - # Add HTTP headers, so that external programs can use them from the - # json output - full_format_info = info_dict.copy() - full_format_info.update(format) - format['http_headers'] = self._calc_headers(full_format_info) - # Remove private housekeeping stuff - if '__x_forwarded_for_ip' in info_dict: - del info_dict['__x_forwarded_for_ip'] + if format.get('format') is None: + format['format'] = '{id} - {res}{note}'.format( + id=format['format_id'], + res=self.format_resolution(format), + note=format_field(format, 'format_note', ' (%s)'), + ) if self.params.get('check_formats') is True: formats = LazyList(self._check_formats(formats[::-1]), reverse=True) @@ -2639,10 +2690,9 @@ class YoutubeDL: info_dict, _ = self.pre_process(info_dict, 'after_filter') # The pre-processors may have modified the formats - formats = info_dict.get('formats', [info_dict]) + formats = self._get_formats(info_dict) - list_only = self.params.get('simulate') is None and ( - self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles')) + list_only = self.params.get('simulate') == 'list_only' interactive_format_selection = not list_only and self.format_selector == '-' if self.params.get('list_thumbnails'): self.list_thumbnails(info_dict) @@ -2697,31 +2747,30 @@ class YoutubeDL: # Process what we can, even without any available formats. formats_to_download = [{}] - requested_ranges = self.params.get('download_ranges') - if requested_ranges: - requested_ranges = tuple(requested_ranges(info_dict, self)) - + requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self)) best_format, downloaded_formats = formats_to_download[-1], [] if download: - if best_format: + if best_format and requested_ranges: def to_screen(*msg): self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}') to_screen(f'Downloading {len(formats_to_download)} format(s):', (f['format_id'] for f in formats_to_download)) - if requested_ranges: + if requested_ranges != ({}, ): to_screen(f'Downloading {len(requested_ranges)} time ranges:', - (f'{int(c["start_time"])}-{int(c["end_time"])}' for c in requested_ranges)) + (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges)) max_downloads_reached = False - for fmt, chapter in itertools.product(formats_to_download, requested_ranges or [{}]): + for fmt, chapter in itertools.product(formats_to_download, requested_ranges): new_info = self._copy_infodict(info_dict) new_info.update(fmt) offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf') + end_time = offset + min(chapter.get('end_time', duration), duration) if chapter or offset: new_info.update({ 'section_start': offset + chapter.get('start_time', 0), - 'section_end': offset + min(chapter.get('end_time', duration), duration), + # duration may not be accurate. So allow deviations <1sec + 'section_end': end_time if end_time <= offset + duration + 1 else None, 'section_title': chapter.get('title'), 'section_number': chapter.get('index'), }) @@ -2777,10 +2826,14 @@ class YoutubeDL: self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True) except re.error as e: raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}') - elif normal_sub_langs: - requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1] else: - requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1] + requested_langs = LazyList(itertools.chain( + ['en'] if 'en' in normal_sub_langs else [], + filter(lambda f: f.startswith('en'), normal_sub_langs), + ['en'] if 'en' in all_sub_langs else [], + filter(lambda f: f.startswith('en'), all_sub_langs), + normal_sub_langs, all_sub_langs, + ))[:1] if requested_langs: self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}') @@ -2935,14 +2988,22 @@ class YoutubeDL: if 'format' not in info_dict and 'ext' in info_dict: info_dict['format'] = info_dict['ext'] - # This is mostly just for backward compatibility of process_info - # As a side-effect, this allows for format-specific filters if self._match_entry(info_dict) is not None: info_dict['__write_download_archive'] = 'ignore' return # Does nothing under normal operation - for backward compatibility of process_info self.post_extract(info_dict) + + def replace_info_dict(new_info): + nonlocal info_dict + if new_info == info_dict: + return + info_dict.clear() + info_dict.update(new_info) + + new_info, _ = self.pre_process(info_dict, 'video') + replace_info_dict(new_info) self._num_downloads += 1 # info_dict['_filename'] needs to be set for backward compatibility @@ -3056,13 +3117,6 @@ class YoutubeDL: for link_type, should_write in write_links.items()): return - def replace_info_dict(new_info): - nonlocal info_dict - if new_info == info_dict: - return - info_dict.clear() - info_dict.update(new_info) - new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move) replace_info_dict(new_info) @@ -3089,7 +3143,7 @@ class YoutubeDL: fd, success = None, True if info_dict.get('protocol') or info_dict.get('url'): fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-') - if fd is not FFmpegFD and ( + if fd is not FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and ( info_dict.get('section_start') or info_dict.get('section_end')): msg = ('This format cannot be partially downloaded' if FFmpegFD.available() else 'You have requested downloading the video partially, but ffmpeg is not installed') @@ -3354,6 +3408,7 @@ class YoutubeDL: reject = lambda k, v: v is None or k.startswith('__') or k in { 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries', 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber', + '_format_sort_fields', } else: reject = lambda k, v: False @@ -3423,7 +3478,8 @@ class YoutubeDL: return infodict def run_all_pps(self, key, info, *, additional_pps=None): - self._forceprint(key, info) + if key != 'video': + self._forceprint(key, info) for pp in (additional_pps or []) + self._pps[key]: info = self.run_pp(pp, info) return info @@ -3568,11 +3624,17 @@ class YoutubeDL: res += '~' + format_bytes(fdict['filesize_approx']) return res - def render_formats_table(self, info_dict): - if not info_dict.get('formats') and not info_dict.get('url'): - return None + def _get_formats(self, info_dict): + if info_dict.get('formats') is None: + if info_dict.get('url') and info_dict.get('_type', 'video') == 'video': + return [info_dict] + return [] + return info_dict['formats'] - formats = info_dict.get('formats', [info_dict]) + def render_formats_table(self, info_dict): + formats = self._get_formats(info_dict) + if not formats: + return if not self.params.get('listformats_table', True) is not False: table = [ [ @@ -3580,7 +3642,7 @@ class YoutubeDL: format_field(f, 'ext'), self.format_resolution(f), self._format_note(f) - ] for f in formats if f.get('preference') is None or f['preference'] >= -1000] + ] for f in formats if (f.get('preference') or 0) >= -1000] return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1) def simplified_codec(f, field): @@ -3619,6 +3681,7 @@ class YoutubeDL: format_field(f, 'asr', '\t%s', func=format_decimal_suffix), join_nonempty( self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None, + self._format_out('DRM', 'light red') if f.get('has_drm') else None, format_field(f, 'language', '[%s]'), join_nonempty(format_field(f, 'format_note'), format_field(f, 'container', ignore=(None, f.get('ext'))), @@ -3639,7 +3702,7 @@ class YoutubeDL: return None return render_table( self._list_format_headers('ID', 'Width', 'Height', 'URL'), - [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]) + [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails]) def render_subtitles_table(self, video_id, subtitles): def _row(lang, formats): @@ -3682,9 +3745,14 @@ class YoutubeDL: if not self.params.get('verbose'): return + from . import _IN_CLI # Must be delayed import + # These imports can be slow. So import them only as needed from .extractor.extractors import _LAZY_LOADER - from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors + from .extractor.extractors import ( + _PLUGIN_CLASSES as plugin_ies, + _PLUGIN_OVERRIDES as plugin_ie_overrides + ) def get_encoding(stream): ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)) @@ -3713,21 +3781,23 @@ class YoutubeDL: source = detect_variant() if VARIANT not in (None, 'pip'): source += '*' + klass = type(self) write_debug(join_nonempty( f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version', - __version__, - f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '', + f'{CHANNEL}@{__version__}', + f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '', '' if source == 'unknown' else f'({source})', + '' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}', delim=' ')) + + if not _IN_CLI: + write_debug(f'params: {self.params}') + if not _LAZY_LOADER: if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): write_debug('Lazy loading extractors is forcibly disabled') else: write_debug('Lazy loading extractors is disabled') - if plugin_extractors or plugin_postprocessors: - write_debug('Plugins: %s' % [ - '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}') - for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())]) if self.params['compat_opts']: write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts'])) @@ -3761,6 +3831,21 @@ class YoutubeDL: proxy_map.update(handler.proxies) write_debug(f'Proxy map: {proxy_map}') + for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items(): + display_list = ['%s%s' % ( + klass.__name__, '' if klass.__name__ == name else f' as {name}') + for name, klass in plugins.items()] + if plugin_type == 'Extractor': + display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})' + for parent, plugins in plugin_ie_overrides.items()) + if not display_list: + continue + write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}') + + plugin_dirs = plugin_directories() + if plugin_dirs: + write_debug(f'Plugin directories: {plugin_dirs}') + # Not implemented if False and self.params.get('call_home'): ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode() @@ -3810,9 +3895,12 @@ class YoutubeDL: # https://github.com/ytdl-org/youtube-dl/issues/8227) file_handler = urllib.request.FileHandler() - def file_open(*args, **kwargs): - raise urllib.error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons') - file_handler.file_open = file_open + if not self.params.get('enable_file_urls'): + def file_open(*args, **kwargs): + raise urllib.error.URLError( + 'file:// URLs are explicitly disabled in yt-dlp for security reasons. ' + 'Use --enable-file-urls to enable at your own risk.') + file_handler.file_open = file_open opener = urllib.request.build_opener( proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler) @@ -3874,7 +3962,7 @@ class YoutubeDL: elif not self.params.get('overwrites', True) and os.path.exists(descfn): self.to_screen(f'[info] {label.title()} description is already present') elif ie_result.get('description') is None: - self.report_warning(f'There\'s no {label} description to write') + self.to_screen(f'[info] There\'s no {label} description to write') return False else: try: @@ -3890,15 +3978,18 @@ class YoutubeDL: ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error''' ret = [] subtitles = info_dict.get('requested_subtitles') - if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')): + if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')): # subtitles download errors are already managed as troubles in relevant IE # that way it will silently go on when used with unsupporting IE return ret - + elif not subtitles: + self.to_screen('[info] There\'s no subtitles for the requested languages') + return ret sub_filename_base = self.prepare_filename(info_dict, 'subtitle') if not sub_filename_base: self.to_screen('[info] Skipping writing video subtitles') return ret + for sub_lang, sub_info in subtitles.items(): sub_format = sub_info['ext'] sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext')) @@ -3945,6 +4036,9 @@ class YoutubeDL: thumbnails, ret = [], [] if write_all or self.params.get('writethumbnail', False): thumbnails = info_dict.get('thumbnails') or [] + if not thumbnails: + self.to_screen(f'[info] There\'s no {label} thumbnails to download') + return ret multiple = write_all and len(thumbnails) > 1 if thumb_filename_base is None: diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 356155fcdd..9ef31601c9 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -16,11 +16,9 @@ import sys from .compat import compat_shlex_quote from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS -from .downloader import FileDownloader from .downloader.external import get_external_downloader from .extractor import list_extractor_classes from .extractor.adobepass import MSO_INFO -from .extractor.common import InfoExtractor from .options import parseOpts from .postprocessor import ( FFmpegExtractAudioPP, @@ -40,6 +38,7 @@ from .utils import ( DateRange, DownloadCancelled, DownloadError, + FormatSorter, GeoUtils, PlaylistEntries, SameFileError, @@ -50,6 +49,7 @@ from .utils import ( format_field, int_or_none, match_filter_func, + parse_bytes, parse_duration, preferredencoding, read_batch_urls, @@ -91,12 +91,11 @@ def get_urls(urls, batchfile, verbose): def print_extractor_information(opts, urls): - # Importing GenericIE is currently slow since it imports other extractors - # TODO: Move this back to module level after generalization of embed detection - from .extractor.generic import GenericIE - out = '' if opts.list_extractors: + # Importing GenericIE is currently slow since it imports YoutubeIE + from .extractor.generic import GenericIE + urls = dict.fromkeys(urls, False) for ie in list_extractor_classes(opts.age_limit): out += ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie.working() else '') + '\n' @@ -152,7 +151,7 @@ def set_compat_opts(opts): else: opts.embed_infojson = False if 'format-sort' in opts.compat_opts: - opts.format_sort.extend(InfoExtractor.FormatSort.ytdl_default) + opts.format_sort.extend(FormatSorter.ytdl_default) _video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False) _audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False) if _video_multistreams_set is False and _audio_multistreams_set is False: @@ -227,7 +226,7 @@ def validate_options(opts): # Format sort for f in opts.format_sort: - validate_regex('format sorting', f, InfoExtractor.FormatSort.regex) + validate_regex('format sorting', f, FormatSorter.regex) # Postprocessor formats validate_regex('merge output format', opts.merge_output_format, @@ -281,19 +280,19 @@ def validate_options(opts): raise ValueError(f'invalid {key} retry sleep expression {expr!r}') # Bytes - def parse_bytes(name, value): + def validate_bytes(name, value): if value is None: return None - numeric_limit = FileDownloader.parse_bytes(value) + numeric_limit = parse_bytes(value) validate(numeric_limit is not None, 'rate limit', value) return numeric_limit - opts.ratelimit = parse_bytes('rate limit', opts.ratelimit) - opts.throttledratelimit = parse_bytes('throttled rate limit', opts.throttledratelimit) - opts.min_filesize = parse_bytes('min filesize', opts.min_filesize) - opts.max_filesize = parse_bytes('max filesize', opts.max_filesize) - opts.buffersize = parse_bytes('buffer size', opts.buffersize) - opts.http_chunk_size = parse_bytes('http chunk size', opts.http_chunk_size) + opts.ratelimit = validate_bytes('rate limit', opts.ratelimit) + opts.throttledratelimit = validate_bytes('throttled rate limit', opts.throttledratelimit) + opts.min_filesize = validate_bytes('min filesize', opts.min_filesize) + opts.max_filesize = validate_bytes('max filesize', opts.max_filesize) + opts.buffersize = validate_bytes('buffer size', opts.buffersize) + opts.http_chunk_size = validate_bytes('http chunk size', opts.http_chunk_size) # Output templates def validate_outtmpl(tmpl, msg): @@ -319,21 +318,18 @@ def validate_options(opts): if outtmpl_default == '': opts.skip_download = None del opts.outtmpl['default'] - if outtmpl_default and not os.path.splitext(outtmpl_default)[1] and opts.extractaudio: - raise ValueError( - 'Cannot download a video and extract audio into the same file! ' - f'Use "{outtmpl_default}.%(ext)s" instead of "{outtmpl_default}" as the output template') def parse_chapters(name, value): chapters, ranges = [], [] + parse_timestamp = lambda x: float('inf') if x in ('inf', 'infinite') else parse_duration(x) for regex in value or []: if regex.startswith('*'): - for range in regex[1:].split(','): - dur = tuple(map(parse_duration, range.strip().split('-'))) - if len(dur) == 2 and all(t is not None for t in dur): - ranges.append(dur) - else: - raise ValueError(f'invalid {name} time range "{regex}". Must be of the form *start-end') + for range_ in map(str.strip, regex[1:].split(',')): + mobj = range_ != '-' and re.fullmatch(r'([^-]+)?\s*-\s*([^-]+)?', range_) + dur = mobj and (parse_timestamp(mobj.group(1) or '0'), parse_timestamp(mobj.group(2) or 'inf')) + if None in (dur or [None]): + raise ValueError(f'invalid {name} time range "{regex}". Must be of the form "*start-end"') + ranges.append(dur) continue try: chapters.append(re.compile(regex)) @@ -350,7 +346,7 @@ def validate_options(opts): mobj = re.fullmatch(r'''(?x) (?P[^+:]+) (?:\s*\+\s*(?P[^:]+))? - (?:\s*:\s*(?P.+?))? + (?:\s*:\s*(?!:)(?P.+?))? (?:\s*::\s*(?P.+))? ''', opts.cookiesfrombrowser) if mobj is None: @@ -386,10 +382,12 @@ def validate_options(opts): raise ValueError(f'{cmd} is invalid; {err}') yield action - parse_metadata = opts.parse_metadata or [] if opts.metafromtitle is not None: - parse_metadata.append('title:%s' % opts.metafromtitle) - opts.parse_metadata = list(itertools.chain(*map(metadataparser_actions, parse_metadata))) + opts.parse_metadata.setdefault('pre_process', []).append('title:%s' % opts.metafromtitle) + opts.parse_metadata = { + k: list(itertools.chain(*map(metadataparser_actions, v))) + for k, v in opts.parse_metadata.items() + } # Other options if opts.playlist_items is not None: @@ -405,11 +403,14 @@ def validate_options(opts): except Exception: raise ValueError('unsupported geo-bypass country or ip-block') - opts.match_filter = match_filter_func(opts.match_filter) + opts.match_filter = match_filter_func(opts.match_filter, opts.breaking_match_filter) if opts.download_archive is not None: opts.download_archive = expand_path(opts.download_archive) + if opts.ffmpeg_location is not None: + opts.ffmpeg_location = expand_path(opts.ffmpeg_location) + if opts.user_agent is not None: opts.headers.setdefault('User-Agent', opts.user_agent) if opts.referer is not None: @@ -485,7 +486,7 @@ def validate_options(opts): val1=opts.sponskrub and opts.sponskrub_cut) # Conflicts with --allow-unplayable-formats - report_conflict('--add-metadata', 'addmetadata') + report_conflict('--embed-metadata', 'addmetadata') report_conflict('--embed-chapters', 'addchapters') report_conflict('--embed-info-json', 'embed_infojson') report_conflict('--embed-subs', 'embedsubtitles') @@ -558,11 +559,11 @@ def validate_options(opts): def get_postprocessors(opts): yield from opts.add_postprocessors - if opts.parse_metadata: + for when, actions in opts.parse_metadata.items(): yield { 'key': 'MetadataParser', - 'actions': opts.parse_metadata, - 'when': 'pre_process' + 'actions': actions, + 'when': when } sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove if sponsorblock_query: @@ -698,11 +699,12 @@ def parse_options(argv=None): postprocessors = list(get_postprocessors(opts)) - print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[2:]) + print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[3:]) any_getting = any(getattr(opts, k) for k in ( 'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename', 'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl' )) + opts.quiet = opts.quiet or any_getting or opts.print_json or bool(opts.forceprint) playlist_pps = [pp for pp in postprocessors if pp.get('when') == 'playlist'] write_playlist_infojson = (opts.writeinfojson and not opts.clean_infojson @@ -738,7 +740,7 @@ def parse_options(argv=None): 'client_certificate': opts.client_certificate, 'client_certificate_key': opts.client_certificate_key, 'client_certificate_password': opts.client_certificate_password, - 'quiet': opts.quiet or any_getting or opts.print_json or bool(opts.forceprint), + 'quiet': opts.quiet, 'no_warnings': opts.no_warnings, 'forceurl': opts.geturl, 'forcetitle': opts.gettitle, @@ -850,6 +852,7 @@ def parse_options(argv=None): 'legacyserverconnect': opts.legacy_server_connect, 'nocheckcertificate': opts.no_check_certificate, 'prefer_insecure': opts.prefer_insecure, + 'enable_file_urls': opts.enable_file_urls, 'http_headers': opts.headers, 'proxy': opts.proxy, 'socket_timeout': opts.socket_timeout, @@ -928,7 +931,7 @@ def _real_main(argv=None): if opts.rm_cachedir: ydl.cache.remove() - updater = Updater(ydl) + updater = Updater(ydl, opts.update_self if isinstance(opts.update_self, str) else None) if opts.update_self and updater.update() and actual_use: if updater.cmd: return updater.restart() @@ -958,6 +961,8 @@ def _real_main(argv=None): def main(argv=None): + global _IN_CLI + _IN_CLI = True try: _exit(*variadic(_real_main(argv))) except DownloadError: diff --git a/yt_dlp/__main__.py b/yt_dlp/__main__.py index 895918c272..78701df8d3 100644 --- a/yt_dlp/__main__.py +++ b/yt_dlp/__main__.py @@ -5,7 +5,7 @@ import sys -if __package__ is None and not hasattr(sys, 'frozen'): +if __package__ is None and not getattr(sys, 'frozen', False): # direct call of __main__.py import os.path path = os.path.realpath(os.path.abspath(__file__)) @@ -14,5 +14,4 @@ if __package__ is None and not hasattr(sys, 'frozen'): import yt_dlp if __name__ == '__main__': - yt_dlp._IN_CLI = True yt_dlp.main() diff --git a/yt_dlp/__pyinstaller/__init__.py b/yt_dlp/__pyinstaller/__init__.py new file mode 100644 index 0000000000..1c52aadf4b --- /dev/null +++ b/yt_dlp/__pyinstaller/__init__.py @@ -0,0 +1,5 @@ +import os + + +def get_hook_dirs(): + return [os.path.dirname(__file__)] diff --git a/yt_dlp/__pyinstaller/hook-yt_dlp.py b/yt_dlp/__pyinstaller/hook-yt_dlp.py new file mode 100644 index 0000000000..63dcdffe02 --- /dev/null +++ b/yt_dlp/__pyinstaller/hook-yt_dlp.py @@ -0,0 +1,31 @@ +import sys + +from PyInstaller.utils.hooks import collect_submodules + + +def pycryptodome_module(): + try: + import Cryptodome # noqa: F401 + except ImportError: + try: + import Crypto # noqa: F401 + print('WARNING: Using Crypto since Cryptodome is not available. ' + 'Install with: pip install pycryptodomex', file=sys.stderr) + return 'Crypto' + except ImportError: + pass + return 'Cryptodome' + + +def get_hidden_imports(): + yield 'yt_dlp.compat._legacy' + yield pycryptodome_module() + yield from collect_submodules('websockets') + # These are auto-detected, but explicitly add them just in case + yield from ('mutagen', 'brotli', 'certifi') + + +hiddenimports = list(get_hidden_imports()) +print(f'Adding imports: {hiddenimports}') + +excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts'] diff --git a/yt_dlp/aes.py b/yt_dlp/aes.py index b428c682bf..b3a383cd9c 100644 --- a/yt_dlp/aes.py +++ b/yt_dlp/aes.py @@ -2,17 +2,17 @@ import base64 from math import ceil from .compat import compat_ord -from .dependencies import Cryptodome_AES +from .dependencies import Cryptodome from .utils import bytes_to_intlist, intlist_to_bytes -if Cryptodome_AES: +if Cryptodome.AES: def aes_cbc_decrypt_bytes(data, key, iv): """ Decrypt bytes with AES-CBC using pycryptodome """ - return Cryptodome_AES.new(key, Cryptodome_AES.MODE_CBC, iv).decrypt(data) + return Cryptodome.AES.new(key, Cryptodome.AES.MODE_CBC, iv).decrypt(data) def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce): """ Decrypt bytes with AES-GCM using pycryptodome """ - return Cryptodome_AES.new(key, Cryptodome_AES.MODE_GCM, nonce).decrypt_and_verify(data, tag) + return Cryptodome.AES.new(key, Cryptodome.AES.MODE_GCM, nonce).decrypt_and_verify(data, tag) else: def aes_cbc_decrypt_bytes(data, key, iv): @@ -28,11 +28,23 @@ def aes_cbc_encrypt_bytes(data, key, iv, **kwargs): return intlist_to_bytes(aes_cbc_encrypt(*map(bytes_to_intlist, (data, key, iv)), **kwargs)) +BLOCK_SIZE_BYTES = 16 + + def unpad_pkcs7(data): return data[:-compat_ord(data[-1])] -BLOCK_SIZE_BYTES = 16 +def pkcs7_padding(data): + """ + PKCS#7 padding + + @param {int[]} data cleartext + @returns {int[]} padding data + """ + + remaining_length = BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES + return data + [remaining_length] * remaining_length def pad_block(block, padding_mode): @@ -64,7 +76,7 @@ def pad_block(block, padding_mode): def aes_ecb_encrypt(data, key, iv=None): """ - Encrypt with aes in ECB mode + Encrypt with aes in ECB mode. Using PKCS#7 padding @param {int[]} data cleartext @param {int[]} key 16/24/32-Byte cipher key @@ -77,8 +89,7 @@ def aes_ecb_encrypt(data, key, iv=None): encrypted_data = [] for i in range(block_count): block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] - encrypted_data += aes_encrypt(block, expanded_key) - encrypted_data = encrypted_data[:len(data)] + encrypted_data += aes_encrypt(pkcs7_padding(block), expanded_key) return encrypted_data @@ -551,5 +562,6 @@ __all__ = [ 'key_expansion', 'pad_block', + 'pkcs7_padding', 'unpad_pkcs7', ] diff --git a/yt_dlp/cache.py b/yt_dlp/cache.py index 4f9fb78d37..9dd4f2f25b 100644 --- a/yt_dlp/cache.py +++ b/yt_dlp/cache.py @@ -1,10 +1,10 @@ import contextlib -import errno import json import os import re import shutil import traceback +import urllib.parse from .utils import expand_path, traverse_obj, version_tuple, write_json_file from .version import __version__ @@ -22,11 +22,9 @@ class Cache: return expand_path(res) def _get_cache_fn(self, section, key, dtype): - assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \ - 'invalid section %r' % section - assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key - return os.path.join( - self._get_root_dir(), section, f'{key}.{dtype}') + assert re.match(r'^[\w.-]+$', section), f'invalid section {section!r}' + key = urllib.parse.quote(key, safe='').replace('%', ',') # encode non-ascii characters + return os.path.join(self._get_root_dir(), section, f'{key}.{dtype}') @property def enabled(self): @@ -40,11 +38,7 @@ class Cache: fn = self._get_cache_fn(section, key, dtype) try: - try: - os.makedirs(os.path.dirname(fn)) - except OSError as ose: - if ose.errno != errno.EEXIST: - raise + os.makedirs(os.path.dirname(fn), exist_ok=True) self._ydl.write_debug(f'Saving {section}.{key} to cache') write_json_file({'yt-dlp_version': __version__, 'data': data}, fn) except Exception: diff --git a/yt_dlp/compat/__init__.py b/yt_dlp/compat/__init__.py index 6d85a6a1fb..c6c02541c2 100644 --- a/yt_dlp/compat/__init__.py +++ b/yt_dlp/compat/__init__.py @@ -8,13 +8,13 @@ from .compat_utils import passthrough_module # XXX: Implement this the same way as other DeprecationWarnings without circular import passthrough_module(__name__, '._legacy', callback=lambda attr: warnings.warn( - DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=3)) + DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=5)) # HTMLParseError has been deprecated in Python 3.3 and removed in # Python 3.5. Introducing dummy exception for Python >3.5 for compatible # and uniform cross-version exception handling -class compat_HTMLParseError(Exception): +class compat_HTMLParseError(ValueError): pass @@ -70,9 +70,3 @@ if compat_os_name in ('nt', 'ce'): return userhome + path[i:] else: compat_expanduser = os.path.expanduser - - -# NB: Add modules that are imported dynamically here so that PyInstaller can find them -# See https://github.com/pyinstaller/pyinstaller-hooks-contrib/issues/438 -if False: - from . import _legacy # noqa: F401 diff --git a/yt_dlp/compat/_legacy.py b/yt_dlp/compat/_legacy.py index 09259c9889..83bf869a80 100644 --- a/yt_dlp/compat/_legacy.py +++ b/yt_dlp/compat/_legacy.py @@ -1,5 +1,6 @@ """ Do not use! """ +import base64 import collections import ctypes import getpass @@ -29,10 +30,11 @@ from asyncio import run as compat_asyncio_run # noqa: F401 from re import Pattern as compat_Pattern # noqa: F401 from re import match as compat_Match # noqa: F401 +from . import compat_expanduser, compat_HTMLParseError, compat_realpath from .compat_utils import passthrough_module -from ..dependencies import Cryptodome_AES as compat_pycrypto_AES # noqa: F401 from ..dependencies import brotli as compat_brotli # noqa: F401 from ..dependencies import websockets as compat_websockets # noqa: F401 +from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401 passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode')) @@ -47,22 +49,25 @@ def compat_setenv(key, value, env=os.environ): env[key] = value +compat_base64_b64decode = base64.b64decode compat_basestring = str +compat_casefold = str.casefold compat_chr = chr compat_collections_abc = collections.abc -compat_cookiejar = http.cookiejar -compat_cookiejar_Cookie = http.cookiejar.Cookie -compat_cookies = http.cookies -compat_cookies_SimpleCookie = http.cookies.SimpleCookie -compat_etree_Element = etree.Element -compat_etree_register_namespace = etree.register_namespace +compat_cookiejar = compat_http_cookiejar = http.cookiejar +compat_cookiejar_Cookie = compat_http_cookiejar_Cookie = http.cookiejar.Cookie +compat_cookies = compat_http_cookies = http.cookies +compat_cookies_SimpleCookie = compat_http_cookies_SimpleCookie = http.cookies.SimpleCookie +compat_etree_Element = compat_xml_etree_ElementTree_Element = etree.Element +compat_etree_register_namespace = compat_xml_etree_register_namespace = etree.register_namespace compat_filter = filter compat_get_terminal_size = shutil.get_terminal_size compat_getenv = os.getenv -compat_getpass = getpass.getpass +compat_getpass = compat_getpass_getpass = getpass.getpass compat_html_entities = html.entities compat_html_entities_html5 = html.entities.html5 -compat_HTMLParser = html.parser.HTMLParser +compat_html_parser_HTMLParseError = compat_HTMLParseError +compat_HTMLParser = compat_html_parser_HTMLParser = html.parser.HTMLParser compat_http_client = http.client compat_http_server = http.server compat_input = input @@ -71,6 +76,8 @@ compat_itertools_count = itertools.count compat_kwargs = lambda kwargs: kwargs compat_map = map compat_numeric_types = (int, float, complex) +compat_os_path_expanduser = compat_expanduser +compat_os_path_realpath = compat_realpath compat_print = print compat_shlex_split = shlex.split compat_socket_create_connection = socket.create_connection @@ -80,7 +87,9 @@ compat_struct_unpack = struct.unpack compat_subprocess_get_DEVNULL = lambda: DEVNULL compat_tokenize_tokenize = tokenize.tokenize compat_urllib_error = urllib.error +compat_urllib_HTTPError = urllib.error.HTTPError compat_urllib_parse = urllib.parse +compat_urllib_parse_parse_qs = urllib.parse.parse_qs compat_urllib_parse_quote = urllib.parse.quote compat_urllib_parse_quote_plus = urllib.parse.quote_plus compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus @@ -89,8 +98,10 @@ compat_urllib_parse_urlunparse = urllib.parse.urlunparse compat_urllib_request = urllib.request compat_urllib_request_DataHandler = urllib.request.DataHandler compat_urllib_response = urllib.response -compat_urlretrieve = urllib.request.urlretrieve -compat_xml_parse_error = etree.ParseError +compat_urlretrieve = compat_urllib_request_urlretrieve = urllib.request.urlretrieve +compat_xml_parse_error = compat_xml_etree_ElementTree_ParseError = etree.ParseError compat_xpath = lambda xpath: xpath compat_zip = zip workaround_optparse_bug9161 = lambda: None + +legacy = [] diff --git a/yt_dlp/compat/compat_utils.py b/yt_dlp/compat/compat_utils.py index 82e1762810..3ca46d270c 100644 --- a/yt_dlp/compat/compat_utils.py +++ b/yt_dlp/compat/compat_utils.py @@ -1,5 +1,6 @@ import collections import contextlib +import functools import importlib import sys import types @@ -10,61 +11,73 @@ _Package = collections.namedtuple('Package', ('name', 'version')) def get_package_info(module): - parent = module.__name__.split('.')[0] - parent_module = None - with contextlib.suppress(ImportError): - parent_module = importlib.import_module(parent) - - for attr in ('__version__', 'version_string', 'version'): - version = getattr(parent_module, attr, None) - if version is not None: - break - return _Package(getattr(module, '_yt_dlp__identifier', parent), str(version)) + return _Package( + name=getattr(module, '_yt_dlp__identifier', module.__name__), + version=str(next(filter(None, ( + getattr(module, attr, None) + for attr in ('__version__', 'version_string', 'version') + )), None))) def _is_package(module): - try: - module.__getattribute__('__path__') - except AttributeError: - return False - return True + return '__path__' in vars(module) -def passthrough_module(parent, child, allowed_attributes=None, *, callback=lambda _: None): - parent_module = importlib.import_module(parent) - child_module = None # Import child module only as needed +def _is_dunder(name): + return name.startswith('__') and name.endswith('__') - class PassthroughModule(types.ModuleType): - def __getattr__(self, attr): - if _is_package(parent_module): - with contextlib.suppress(ImportError): - return importlib.import_module(f'.{attr}', parent) - ret = self.__from_child(attr) - if ret is _NO_ATTRIBUTE: - raise AttributeError(f'module {parent} has no attribute {attr}') - callback(attr) - return ret +class EnhancedModule(types.ModuleType): + def __bool__(self): + return vars(self).get('__bool__', lambda: True)() - def __from_child(self, attr): - if allowed_attributes is None: - if attr.startswith('__') and attr.endswith('__'): - return _NO_ATTRIBUTE - elif attr not in allowed_attributes: + def __getattribute__(self, attr): + try: + ret = super().__getattribute__(attr) + except AttributeError: + if _is_dunder(attr): + raise + getter = getattr(self, '__getattr__', None) + if not getter: + raise + ret = getter(attr) + return ret.fget() if isinstance(ret, property) else ret + + +def passthrough_module(parent, child, allowed_attributes=(..., ), *, callback=lambda _: None): + """Passthrough parent module into a child module, creating the parent if necessary""" + def __getattr__(attr): + if _is_package(parent): + with contextlib.suppress(ModuleNotFoundError): + return importlib.import_module(f'.{attr}', parent.__name__) + + ret = from_child(attr) + if ret is _NO_ATTRIBUTE: + raise AttributeError(f'module {parent.__name__} has no attribute {attr}') + callback(attr) + return ret + + @functools.lru_cache(maxsize=None) + def from_child(attr): + nonlocal child + if attr not in allowed_attributes: + if ... not in allowed_attributes or _is_dunder(attr): return _NO_ATTRIBUTE - nonlocal child_module - child_module = child_module or importlib.import_module(child, parent) + if isinstance(child, str): + child = importlib.import_module(child, parent.__name__) - with contextlib.suppress(AttributeError): - return getattr(child_module, attr) + if _is_package(child): + with contextlib.suppress(ImportError): + return passthrough_module(f'{parent.__name__}.{attr}', + importlib.import_module(f'.{attr}', child.__name__)) - if _is_package(child_module): - with contextlib.suppress(ImportError): - return importlib.import_module(f'.{attr}', child) + with contextlib.suppress(AttributeError): + return getattr(child, attr) - return _NO_ATTRIBUTE + return _NO_ATTRIBUTE - # Python 3.6 does not have module level __getattr__ - # https://peps.python.org/pep-0562/ - sys.modules[parent].__class__ = PassthroughModule + parent = sys.modules.get(parent, types.ModuleType(parent)) + parent.__class__ = EnhancedModule + parent.__getattr__ = __getattr__ + return parent diff --git a/yt_dlp/compat/shutil.py b/yt_dlp/compat/shutil.py new file mode 100644 index 0000000000..23239d5ce8 --- /dev/null +++ b/yt_dlp/compat/shutil.py @@ -0,0 +1,30 @@ +# flake8: noqa: F405 +from shutil import * # noqa: F403 + +from .compat_utils import passthrough_module + +passthrough_module(__name__, 'shutil') +del passthrough_module + + +import sys + +if sys.platform.startswith('freebsd'): + import errno + import os + import shutil + + # Workaround for PermissionError when using restricted ACL mode on FreeBSD + def copy2(src, dst, *args, **kwargs): + if os.path.isdir(dst): + dst = os.path.join(dst, os.path.basename(src)) + shutil.copyfile(src, dst, *args, **kwargs) + try: + shutil.copystat(src, dst, *args, **kwargs) + except PermissionError as e: + if e.errno != getattr(errno, 'EPERM', None): + raise + return dst + + def move(*args, copy_function=copy2, **kwargs): + return shutil.move(*args, copy_function=copy_function, **kwargs) diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index c3b14f03bb..8ca7cea2ce 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -1,6 +1,7 @@ import base64 import contextlib import http.cookiejar +import http.cookies import json import os import re @@ -844,12 +845,15 @@ def _get_linux_keyring_password(browser_keyring_name, keyring, logger): def _get_mac_keyring_password(browser_keyring_name, logger): logger.debug('using find-generic-password to obtain password from OSX keychain') try: - stdout, _, _ = Popen.run( + stdout, _, returncode = Popen.run( ['security', 'find-generic-password', '-w', # write password to stdout '-a', browser_keyring_name, # match 'account' '-s', f'{browser_keyring_name} Safe Storage'], # match 'service' stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) + if returncode: + logger.warning('find-generic-password failed') + return None return stdout.rstrip(b'\n') except Exception as e: logger.warning(f'exception running find-generic-password: {error_to_str(e)}') @@ -987,6 +991,97 @@ def _parse_browser_specification(browser_name, profile=None, keyring=None, conta raise ValueError(f'unsupported browser: "{browser_name}"') if keyring not in (None, *SUPPORTED_KEYRINGS): raise ValueError(f'unsupported keyring: "{keyring}"') - if profile is not None and _is_path(profile): - profile = os.path.expanduser(profile) + if profile is not None and _is_path(expand_path(profile)): + profile = expand_path(profile) return browser_name, profile, keyring, container + + +class LenientSimpleCookie(http.cookies.SimpleCookie): + """More lenient version of http.cookies.SimpleCookie""" + # From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py + # We use Morsel's legal key chars to avoid errors on setting values + _LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~') + _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}') + + _RESERVED = { + "expires", + "path", + "comment", + "domain", + "max-age", + "secure", + "httponly", + "version", + "samesite", + } + + _FLAGS = {"secure", "httponly"} + + # Added 'bad' group to catch the remaining value + _COOKIE_PATTERN = re.compile(r""" + \s* # Optional whitespace at start of cookie + (?P # Start of group 'key' + [""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter + ) # End of group 'key' + ( # Optional group: there may not be a value. + \s*=\s* # Equal Sign + ( # Start of potential value + (?P # Start of group 'val' + "(?:[^\\"]|\\.)*" # Any doublequoted string + | # or + \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr + | # or + [""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string + ) # End of group 'val' + | # or + (?P(?:\\;|[^;])*?) # 'bad' group fallback for invalid values + ) # End of potential value + )? # End of optional value group + \s* # Any number of spaces. + (\s+|;|$) # Ending either at space, semicolon, or EOS. + """, re.ASCII | re.VERBOSE) + + def load(self, data): + # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776 + if not isinstance(data, str): + return super().load(data) + + morsel = None + for match in self._COOKIE_PATTERN.finditer(data): + if match.group('bad'): + morsel = None + continue + + key, value = match.group('key', 'val') + + is_attribute = False + if key.startswith('$'): + key = key[1:] + is_attribute = True + + lower_key = key.lower() + if lower_key in self._RESERVED: + if morsel is None: + continue + + if value is None: + if lower_key not in self._FLAGS: + morsel = None + continue + value = True + else: + value, _ = self.value_decode(value) + + morsel[key] = value + + elif is_attribute: + morsel = None + + elif value is not None: + morsel = self.get(key, http.cookies.Morsel()) + real_value, coded_value = self.value_decode(value) + morsel.set(key, real_value, coded_value) + self[key] = morsel + + else: + morsel = None diff --git a/yt_dlp/dependencies/Cryptodome.py b/yt_dlp/dependencies/Cryptodome.py new file mode 100644 index 0000000000..74ab6575ce --- /dev/null +++ b/yt_dlp/dependencies/Cryptodome.py @@ -0,0 +1,36 @@ +import types + +try: + import Cryptodome as _parent +except ImportError: + try: + import Crypto as _parent + except (ImportError, SyntaxError): # Old Crypto gives SyntaxError in newer Python + _parent = types.ModuleType('no_Cryptodome') + __bool__ = lambda: False + +__version__ = '' +AES = PKCS1_v1_5 = Blowfish = PKCS1_OAEP = SHA1 = CMAC = RSA = None +try: + if _parent.__name__ == 'Cryptodome': + from Cryptodome import __version__ + from Cryptodome.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 + from Cryptodome.Hash import CMAC, SHA1 + from Cryptodome.PublicKey import RSA + elif _parent.__name__ == 'Crypto': + from Crypto import __version__ + from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 # noqa: F401 + from Crypto.Hash import CMAC, SHA1 # noqa: F401 + from Crypto.PublicKey import RSA # noqa: F401 +except ImportError: + __version__ = f'broken {__version__}'.strip() + + +_yt_dlp__identifier = _parent.__name__ +if AES and _yt_dlp__identifier == 'Crypto': + try: + # In pycrypto, mode defaults to ECB. See: + # https://www.pycryptodome.org/en/latest/src/vs_pycrypto.html#:~:text=not%20have%20ECB%20as%20default%20mode + AES.new(b'abcdefghijklmnop') + except TypeError: + _yt_dlp__identifier = 'pycrypto' diff --git a/yt_dlp/dependencies.py b/yt_dlp/dependencies/__init__.py similarity index 75% rename from yt_dlp/dependencies.py rename to yt_dlp/dependencies/__init__.py index 5a5363adb1..6e7d29c5ca 100644 --- a/yt_dlp/dependencies.py +++ b/yt_dlp/dependencies/__init__.py @@ -23,24 +23,6 @@ else: certifi = None -try: - from Cryptodome.Cipher import AES as Cryptodome_AES -except ImportError: - try: - from Crypto.Cipher import AES as Cryptodome_AES - except (ImportError, SyntaxError): # Old Crypto gives SyntaxError in newer Python - Cryptodome_AES = None - else: - try: - # In pycrypto, mode defaults to ECB. See: - # https://www.pycryptodome.org/en/latest/src/vs_pycrypto.html#:~:text=not%20have%20ECB%20as%20default%20mode - Cryptodome_AES.new(b'abcdefghijklmnop') - except TypeError: - pass - else: - Cryptodome_AES._yt_dlp__identifier = 'pycrypto' - - try: import mutagen except ImportError: @@ -84,12 +66,16 @@ else: xattr._yt_dlp__identifier = 'pyxattr' +from . import Cryptodome + all_dependencies = {k: v for k, v in globals().items() if not k.startswith('_')} - - available_dependencies = {k: v for k, v in all_dependencies.items() if v} +# Deprecated +Cryptodome_AES = Cryptodome.AES + + __all__ = [ 'all_dependencies', 'available_dependencies', diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index ab557a47ac..077b29b41f 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -15,15 +15,17 @@ from ..minicurses import ( from ..utils import ( IDENTITY, NO_DEFAULT, - NUMBER_RE, LockingUnsupportedError, Namespace, RetryManager, classproperty, decodeArgument, + deprecation_warning, encodeFilename, format_bytes, join_nonempty, + parse_bytes, + remove_start, sanitize_open, shell_quote, timeconvert, @@ -120,11 +122,11 @@ class FileDownloader: time = timetuple_from_msec(seconds * 1000) if time.hours > 99: return '--:--:--' - if not time.hours: - return ' %02d:%02d' % time[1:-1] return '%02d:%02d:%02d' % time[:-1] - format_eta = format_seconds + @classmethod + def format_eta(cls, seconds): + return f'{remove_start(cls.format_seconds(seconds), "00:"):>8s}' @staticmethod def calc_percent(byte_counter, data_len): @@ -179,12 +181,9 @@ class FileDownloader: @staticmethod def parse_bytes(bytestr): """Parse a string indicating a byte quantity into an integer.""" - matchobj = re.match(rf'(?i)^({NUMBER_RE})([kMGTPEZY]?)$', bytestr) - if matchobj is None: - return None - number = float(matchobj.group(1)) - multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) - return int(round(number * multiplier)) + deprecation_warning('yt_dlp.FileDownloader.parse_bytes is deprecated and ' + 'may be removed in the future. Use yt_dlp.utils.parse_bytes instead') + return parse_bytes(bytestr) def slow_down(self, start_time, now, byte_counter): """Sleep if the download speed is over the rate limit.""" @@ -332,6 +331,8 @@ class FileDownloader: return tmpl return default + _format_bytes = lambda k: f'{format_bytes(s.get(k)):>10s}' + if s['status'] == 'finished': if self.params.get('noprogress'): self.to_screen('[download] Download completed') @@ -339,7 +340,7 @@ class FileDownloader: s.update({ 'speed': speed, '_speed_str': self.format_speed(speed).strip(), - '_total_bytes_str': format_bytes(s.get('total_bytes')), + '_total_bytes_str': _format_bytes('total_bytes'), '_elapsed_str': self.format_seconds(s.get('elapsed')), '_percent_str': self.format_percent(100), }) @@ -354,15 +355,15 @@ class FileDownloader: return s.update({ - '_eta_str': self.format_eta(s.get('eta')), + '_eta_str': self.format_eta(s.get('eta')).strip(), '_speed_str': self.format_speed(s.get('speed')), '_percent_str': self.format_percent(try_call( lambda: 100 * s['downloaded_bytes'] / s['total_bytes'], lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'], lambda: s['downloaded_bytes'] == 0 and 0)), - '_total_bytes_str': format_bytes(s.get('total_bytes')), - '_total_bytes_estimate_str': format_bytes(s.get('total_bytes_estimate')), - '_downloaded_bytes_str': format_bytes(s.get('downloaded_bytes')), + '_total_bytes_str': _format_bytes('total_bytes'), + '_total_bytes_estimate_str': _format_bytes('total_bytes_estimate'), + '_downloaded_bytes_str': _format_bytes('downloaded_bytes'), '_elapsed_str': self.format_seconds(s.get('elapsed')), }) diff --git a/yt_dlp/downloader/dash.py b/yt_dlp/downloader/dash.py index a6da26f09d..4328d739c2 100644 --- a/yt_dlp/downloader/dash.py +++ b/yt_dlp/downloader/dash.py @@ -1,8 +1,9 @@ import time +import urllib.parse from . import get_suitable_downloader from .fragment import FragmentFD -from ..utils import urljoin +from ..utils import update_url_query, urljoin class DashSegmentsFD(FragmentFD): @@ -40,7 +41,12 @@ class DashSegmentsFD(FragmentFD): self._prepare_and_start_frag_download(ctx, fmt) ctx['start'] = real_start - fragments_to_download = self._get_fragments(fmt, ctx) + extra_query = None + extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') + if extra_param_to_segment_url: + extra_query = urllib.parse.parse_qs(extra_param_to_segment_url) + + fragments_to_download = self._get_fragments(fmt, ctx, extra_query) if real_downloader: self.to_screen( @@ -51,13 +57,13 @@ class DashSegmentsFD(FragmentFD): args.append([ctx, fragments_to_download, fmt]) - return self.download_and_append_fragments_multiple(*args) + return self.download_and_append_fragments_multiple(*args, is_fatal=lambda idx: idx == 0) def _resolve_fragments(self, fragments, ctx): fragments = fragments(ctx) if callable(fragments) else fragments return [next(iter(fragments))] if self.params.get('test') else fragments - def _get_fragments(self, fmt, ctx): + def _get_fragments(self, fmt, ctx, extra_query): fragment_base_url = fmt.get('fragment_base_url') fragments = self._resolve_fragments(fmt['fragments'], ctx) @@ -70,6 +76,8 @@ class DashSegmentsFD(FragmentFD): if not fragment_url: assert fragment_base_url fragment_url = urljoin(fragment_base_url, fragment['path']) + if extra_query: + fragment_url = update_url_query(fragment_url, extra_query) yield { 'frag_index': frag_index, diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index d117c06e0a..5f54017a81 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -1,9 +1,11 @@ import enum +import json import os.path import re import subprocess import sys import time +import uuid from .fragment import FragmentFD from ..compat import functools @@ -20,8 +22,10 @@ from ..utils import ( determine_ext, encodeArgument, encodeFilename, + find_available_port, handle_youtubedl_headers, remove_end, + sanitized_Request, traverse_obj, ) @@ -60,7 +64,6 @@ class ExternalFD(FragmentFD): } if filename != '-': fsize = os.path.getsize(encodeFilename(tmpfilename)) - self.to_screen(f'\r[{self.get_basename()}] Downloaded {fsize} bytes') self.try_rename(tmpfilename, filename) status.update({ 'downloaded_bytes': fsize, @@ -101,6 +104,7 @@ class ExternalFD(FragmentFD): return all(( not info_dict.get('to_stdout') or Features.TO_STDOUT in cls.SUPPORTED_FEATURES, '+' not in info_dict['protocol'] or Features.MULTIPLE_FORMATS in cls.SUPPORTED_FEATURES, + not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url'), all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')), )) @@ -129,8 +133,7 @@ class ExternalFD(FragmentFD): self._debug_cmd(cmd) if 'fragments' not in info_dict: - _, stderr, returncode = Popen.run( - cmd, text=True, stderr=subprocess.PIPE if self._CAPTURE_STDERR else None) + _, stderr, returncode = self._call_process(cmd, info_dict) if returncode and stderr: self.to_stderr(stderr) return returncode @@ -140,7 +143,7 @@ class ExternalFD(FragmentFD): retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry, frag_index=None, fatal=not skip_unavailable_fragments) for retry in retry_manager: - _, stderr, returncode = Popen.run(cmd, text=True, stderr=subprocess.PIPE) + _, stderr, returncode = self._call_process(cmd, info_dict) if not returncode: break # TODO: Decide whether to retry based on error code @@ -172,6 +175,9 @@ class ExternalFD(FragmentFD): self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename)) return 0 + def _call_process(self, cmd, info_dict): + return Popen.run(cmd, text=True, stderr=subprocess.PIPE) + class CurlFD(ExternalFD): AVAILABLE_OPT = '-V' @@ -252,6 +258,19 @@ class Aria2cFD(ExternalFD): check_results = (not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES) return all(check_results) + @staticmethod + def _aria2c_filename(fn): + return fn if os.path.isabs(fn) else f'.{os.path.sep}{fn}' + + def _call_downloader(self, tmpfilename, info_dict): + # FIXME: Disabled due to https://github.com/yt-dlp/yt-dlp/issues/5931 + if False and 'no-external-downloader-progress' not in self.params.get('compat_opts', []): + info_dict['__rpc'] = { + 'port': find_available_port() or 19190, + 'secret': str(uuid.uuid4()), + } + return super()._call_downloader(tmpfilename, info_dict) + def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-c', '--console-log-level=warn', '--summary-interval=0', '--download-result=hide', @@ -272,6 +291,12 @@ class Aria2cFD(ExternalFD): cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=') cmd += self._configuration_args() + if '__rpc' in info_dict: + cmd += [ + '--enable-rpc', + f'--rpc-listen-port={info_dict["__rpc"]["port"]}', + f'--rpc-secret={info_dict["__rpc"]["secret"]}'] + # aria2c strips out spaces from the beginning/end of filenames and paths. # We work around this issue by adding a "./" to the beginning of the # filename and relative path, and adding a "/" at the end of the path. @@ -280,11 +305,9 @@ class Aria2cFD(ExternalFD): # https://github.com/aria2/aria2/issues/1373 dn = os.path.dirname(tmpfilename) if dn: - if not os.path.isabs(dn): - dn = f'.{os.path.sep}{dn}' - cmd += ['--dir', dn + os.path.sep] + cmd += ['--dir', self._aria2c_filename(dn) + os.path.sep] if 'fragments' not in info_dict: - cmd += ['--out', f'.{os.path.sep}{os.path.basename(tmpfilename)}'] + cmd += ['--out', self._aria2c_filename(os.path.basename(tmpfilename))] cmd += ['--auto-file-renaming=false'] if 'fragments' in info_dict: @@ -293,15 +316,97 @@ class Aria2cFD(ExternalFD): url_list = [] for frag_index, fragment in enumerate(info_dict['fragments']): fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index) - url_list.append('%s\n\tout=%s' % (fragment['url'], fragment_filename)) + url_list.append('%s\n\tout=%s' % (fragment['url'], self._aria2c_filename(fragment_filename))) stream, _ = self.sanitize_open(url_list_file, 'wb') stream.write('\n'.join(url_list).encode()) stream.close() - cmd += ['-i', url_list_file] + cmd += ['-i', self._aria2c_filename(url_list_file)] else: cmd += ['--', info_dict['url']] return cmd + def aria2c_rpc(self, rpc_port, rpc_secret, method, params=()): + # Does not actually need to be UUID, just unique + sanitycheck = str(uuid.uuid4()) + d = json.dumps({ + 'jsonrpc': '2.0', + 'id': sanitycheck, + 'method': method, + 'params': [f'token:{rpc_secret}', *params], + }).encode('utf-8') + request = sanitized_Request( + f'http://localhost:{rpc_port}/jsonrpc', + data=d, headers={ + 'Content-Type': 'application/json', + 'Content-Length': f'{len(d)}', + 'Ytdl-request-proxy': '__noproxy__', + }) + with self.ydl.urlopen(request) as r: + resp = json.load(r) + assert resp.get('id') == sanitycheck, 'Something went wrong with RPC server' + return resp['result'] + + def _call_process(self, cmd, info_dict): + if '__rpc' not in info_dict: + return super()._call_process(cmd, info_dict) + + send_rpc = functools.partial(self.aria2c_rpc, info_dict['__rpc']['port'], info_dict['__rpc']['secret']) + started = time.time() + + fragmented = 'fragments' in info_dict + frag_count = len(info_dict['fragments']) if fragmented else 1 + status = { + 'filename': info_dict.get('_filename'), + 'status': 'downloading', + 'elapsed': 0, + 'downloaded_bytes': 0, + 'fragment_count': frag_count if fragmented else None, + 'fragment_index': 0 if fragmented else None, + } + self._hook_progress(status, info_dict) + + def get_stat(key, *obj, average=False): + val = tuple(filter(None, map(float, traverse_obj(obj, (..., ..., key))))) or [0] + return sum(val) / (len(val) if average else 1) + + with Popen(cmd, text=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE) as p: + # Add a small sleep so that RPC client can receive response, + # or the connection stalls infinitely + time.sleep(0.2) + retval = p.poll() + while retval is None: + # We don't use tellStatus as we won't know the GID without reading stdout + # Ref: https://aria2.github.io/manual/en/html/aria2c.html#aria2.tellActive + active = send_rpc('aria2.tellActive') + completed = send_rpc('aria2.tellStopped', [0, frag_count]) + + downloaded = get_stat('totalLength', completed) + get_stat('completedLength', active) + speed = get_stat('downloadSpeed', active) + total = frag_count * get_stat('totalLength', active, completed, average=True) + if total < downloaded: + total = None + + status.update({ + 'downloaded_bytes': int(downloaded), + 'speed': speed, + 'total_bytes': None if fragmented else total, + 'total_bytes_estimate': total, + 'eta': (total - downloaded) / (speed or 1), + 'fragment_index': min(frag_count, len(completed) + 1) if fragmented else None, + 'elapsed': time.time() - started + }) + self._hook_progress(status, info_dict) + + if not active and len(completed) >= frag_count: + send_rpc('aria2.shutdown') + retval = p.wait() + break + + time.sleep(0.1) + retval = p.poll() + + return '', p.stderr.read(), retval + class HttpieFD(ExternalFD): AVAILABLE_OPT = '--version' @@ -340,7 +445,6 @@ class FFmpegFD(ExternalFD): and cls.can_download(info_dict)) def _call_downloader(self, tmpfilename, info_dict): - urls = [f['url'] for f in info_dict.get('requested_formats', [])] or [info_dict['url']] ffpp = FFmpegPostProcessor(downloader=self) if not ffpp.available: self.report_error('m3u8 download detected but ffmpeg could not be found. Please install') @@ -370,16 +474,6 @@ class FFmpegFD(ExternalFD): # http://trac.ffmpeg.org/ticket/6125#comment:10 args += ['-seekable', '1' if seekable else '0'] - http_headers = None - if info_dict.get('http_headers'): - youtubedl_headers = handle_youtubedl_headers(info_dict['http_headers']) - http_headers = [ - # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: - # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. - '-headers', - ''.join(f'{key}: {val}\r\n' for key, val in youtubedl_headers.items()) - ] - env = None proxy = self.params.get('proxy') if proxy: @@ -432,21 +526,26 @@ class FFmpegFD(ExternalFD): start_time, end_time = info_dict.get('section_start') or 0, info_dict.get('section_end') - for i, url in enumerate(urls): - if http_headers is not None and re.match(r'^https?://', url): - args += http_headers + selected_formats = info_dict.get('requested_formats') or [info_dict] + for i, fmt in enumerate(selected_formats): + if fmt.get('http_headers') and re.match(r'^https?://', fmt['url']): + headers_dict = handle_youtubedl_headers(fmt['http_headers']) + # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: + # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. + args.extend(['-headers', ''.join(f'{key}: {val}\r\n' for key, val in headers_dict.items())]) + if start_time: args += ['-ss', str(start_time)] if end_time: args += ['-t', str(end_time - start_time)] - args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', url] + args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', fmt['url']] if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'): args += ['-c', 'copy'] if info_dict.get('requested_formats') or protocol == 'http_dash_segments': - for (i, fmt) in enumerate(info_dict.get('requested_formats') or [info_dict]): + for i, fmt in enumerate(selected_formats): stream_number = fmt.get('manifest_stream_number', 0) args.extend(['-map', f'{i}:{stream_number}']) @@ -486,8 +585,9 @@ class FFmpegFD(ExternalFD): args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) self._debug_cmd(args) + piped = any(fmt['url'] in ('-', 'pipe:') for fmt in selected_formats) with Popen(args, stdin=subprocess.PIPE, env=env) as proc: - if url in ('-', 'pipe:'): + if piped: self.on_process_started(proc, proc.stdin) try: retval = proc.wait() @@ -497,7 +597,7 @@ class FFmpegFD(ExternalFD): # produces a file that is playable (this is mostly useful for live # streams). Note that Windows is not affected and produces playable # files (see https://github.com/ytdl-org/youtube-dl/issues/8300). - if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and url not in ('-', 'pipe:'): + if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and not piped: proc.communicate_or_kill(b'q') else: proc.kill(timeout=None) diff --git a/yt_dlp/downloader/f4m.py b/yt_dlp/downloader/f4m.py index a19ab43f15..306f92192f 100644 --- a/yt_dlp/downloader/f4m.py +++ b/yt_dlp/downloader/f4m.py @@ -424,6 +424,4 @@ class F4mFD(FragmentFD): msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1)) self.report_warning(msg) - self._finish_frag_download(ctx, info_dict) - - return True + return self._finish_frag_download(ctx, info_dict) diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index a5d70d0d49..3dc638f523 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -295,16 +295,23 @@ class FragmentFD(FileDownloader): self.try_remove(ytdl_filename) elapsed = time.time() - ctx['started'] - if ctx['tmpfilename'] == '-': - downloaded_bytes = ctx['complete_frags_downloaded_bytes'] + to_file = ctx['tmpfilename'] != '-' + if to_file: + downloaded_bytes = os.path.getsize(encodeFilename(ctx['tmpfilename'])) else: + downloaded_bytes = ctx['complete_frags_downloaded_bytes'] + + if not downloaded_bytes: + if to_file: + self.try_remove(ctx['tmpfilename']) + self.report_error('The downloaded file is empty') + return False + elif to_file: self.try_rename(ctx['tmpfilename'], ctx['filename']) - if self.params.get('updatetime', True): - filetime = ctx.get('fragment_filetime') - if filetime: - with contextlib.suppress(Exception): - os.utime(ctx['filename'], (time.time(), filetime)) - downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename'])) + filetime = ctx.get('fragment_filetime') + if self.params.get('updatetime', True) and filetime: + with contextlib.suppress(Exception): + os.utime(ctx['filename'], (time.time(), filetime)) self._hook_progress({ 'downloaded_bytes': downloaded_bytes, @@ -316,6 +323,7 @@ class FragmentFD(FileDownloader): 'max_progress': ctx.get('max_progress'), 'progress_idx': ctx.get('progress_idx'), }, info_dict) + return True def _prepare_external_frag_download(self, ctx): if 'live' not in ctx: @@ -352,7 +360,8 @@ class FragmentFD(FileDownloader): if not decrypt_info or decrypt_info['METHOD'] != 'AES-128': return frag_content iv = decrypt_info.get('IV') or struct.pack('>8xq', fragment['media_sequence']) - decrypt_info['KEY'] = decrypt_info.get('KEY') or _get_key(info_dict.get('_decryption_key_url') or decrypt_info['URI']) + decrypt_info['KEY'] = (decrypt_info.get('KEY') + or _get_key(traverse_obj(info_dict, ('hls_aes', 'uri')) or decrypt_info['URI'])) # Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block # size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded, # not what it decrypts to. @@ -362,7 +371,7 @@ class FragmentFD(FileDownloader): return decrypt_fragment - def download_and_append_fragments_multiple(self, *args, pack_func=None, finish_func=None): + def download_and_append_fragments_multiple(self, *args, **kwargs): ''' @params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ... all args must be either tuple or list @@ -370,18 +379,17 @@ class FragmentFD(FileDownloader): interrupt_trigger = [True] max_progress = len(args) if max_progress == 1: - return self.download_and_append_fragments(*args[0], pack_func=pack_func, finish_func=finish_func) + return self.download_and_append_fragments(*args[0], **kwargs) max_workers = self.params.get('concurrent_fragment_downloads', 1) if max_progress > 1: self._prepare_multiline_status(max_progress) - is_live = any(traverse_obj(args, (..., 2, 'is_live'), default=[])) + is_live = any(traverse_obj(args, (..., 2, 'is_live'))) def thread_func(idx, ctx, fragments, info_dict, tpe): ctx['max_progress'] = max_progress ctx['progress_idx'] = idx return self.download_and_append_fragments( - ctx, fragments, info_dict, pack_func=pack_func, finish_func=finish_func, - tpe=tpe, interrupt_trigger=interrupt_trigger) + ctx, fragments, info_dict, **kwargs, tpe=tpe, interrupt_trigger=interrupt_trigger) class FTPE(concurrent.futures.ThreadPoolExecutor): # has to stop this or it's going to wait on the worker thread itself @@ -428,17 +436,12 @@ class FragmentFD(FileDownloader): return result def download_and_append_fragments( - self, ctx, fragments, info_dict, *, pack_func=None, finish_func=None, - tpe=None, interrupt_trigger=None): - if not interrupt_trigger: - interrupt_trigger = (True, ) + self, ctx, fragments, info_dict, *, is_fatal=(lambda idx: False), + pack_func=(lambda content, idx: content), finish_func=None, + tpe=None, interrupt_trigger=(True, )): - is_fatal = ( - ((lambda _: False) if info_dict.get('is_live') else (lambda idx: idx == 0)) - if self.params.get('skip_unavailable_fragments', True) else (lambda _: True)) - - if not pack_func: - pack_func = lambda frag_content, _: frag_content + if not self.params.get('skip_unavailable_fragments', True): + is_fatal = lambda _: True def download_fragment(fragment, ctx): if not interrupt_trigger[0]: @@ -463,7 +466,8 @@ class FragmentFD(FileDownloader): for retry in RetryManager(self.params.get('fragment_retries'), error_callback): try: ctx['fragment_count'] = fragment.get('fragment_count') - if not self._download_fragment(ctx, fragment['url'], info_dict, headers): + if not self._download_fragment( + ctx, fragment['url'], info_dict, headers, info_dict.get('request_data')): return except (urllib.error.HTTPError, http.client.IncompleteRead) as err: retry.error = err @@ -493,7 +497,7 @@ class FragmentFD(FileDownloader): download_fragment(fragment, ctx_copy) return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized') - self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome') + self.report_warning('The download speed shown is only of one thread. This is a known issue') with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool: try: for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments): @@ -527,5 +531,4 @@ class FragmentFD(FileDownloader): if finish_func is not None: ctx['dest_stream'].write(finish_func()) ctx['dest_stream'].flush() - self._finish_frag_download(ctx, info_dict) - return True + return self._finish_frag_download(ctx, info_dict) diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 2010f3dc9e..f2868dc52b 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -7,8 +7,15 @@ from . import get_suitable_downloader from .external import FFmpegFD from .fragment import FragmentFD from .. import webvtt -from ..dependencies import Cryptodome_AES -from ..utils import bug_reports_message, parse_m3u8_attributes, update_url_query +from ..dependencies import Cryptodome +from ..utils import ( + bug_reports_message, + parse_m3u8_attributes, + remove_start, + traverse_obj, + update_url_query, + urljoin, +) class HlsFD(FragmentFD): @@ -63,7 +70,7 @@ class HlsFD(FragmentFD): can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None if can_download: has_ffmpeg = FFmpegFD.available() - no_crypto = not Cryptodome_AES and '#EXT-X-KEY:METHOD=AES-128' in s + no_crypto = not Cryptodome.AES and '#EXT-X-KEY:METHOD=AES-128' in s if no_crypto and has_ffmpeg: can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available' elif no_crypto: @@ -150,6 +157,13 @@ class HlsFD(FragmentFD): i = 0 media_sequence = 0 decrypt_info = {'METHOD': 'NONE'} + external_aes_key = traverse_obj(info_dict, ('hls_aes', 'key')) + if external_aes_key: + external_aes_key = binascii.unhexlify(remove_start(external_aes_key, '0x')) + assert len(external_aes_key) in (16, 24, 32), 'Invalid length for HLS AES-128 key' + external_aes_iv = traverse_obj(info_dict, ('hls_aes', 'iv')) + if external_aes_iv: + external_aes_iv = binascii.unhexlify(remove_start(external_aes_iv, '0x').zfill(32)) byte_range = {} discontinuity_count = 0 frag_index = 0 @@ -165,10 +179,7 @@ class HlsFD(FragmentFD): frag_index += 1 if frag_index <= ctx['fragment_index']: continue - frag_url = ( - line - if re.match(r'^https?://', line) - else urllib.parse.urljoin(man_url, line)) + frag_url = urljoin(man_url, line) if extra_query: frag_url = update_url_query(frag_url, extra_query) @@ -190,10 +201,7 @@ class HlsFD(FragmentFD): return False frag_index += 1 map_info = parse_m3u8_attributes(line[11:]) - frag_url = ( - map_info.get('URI') - if re.match(r'^https?://', map_info.get('URI')) - else urllib.parse.urljoin(man_url, map_info.get('URI'))) + frag_url = urljoin(man_url, map_info.get('URI')) if extra_query: frag_url = update_url_query(frag_url, extra_query) @@ -218,15 +226,18 @@ class HlsFD(FragmentFD): decrypt_url = decrypt_info.get('URI') decrypt_info = parse_m3u8_attributes(line[11:]) if decrypt_info['METHOD'] == 'AES-128': - if 'IV' in decrypt_info: + if external_aes_iv: + decrypt_info['IV'] = external_aes_iv + elif 'IV' in decrypt_info: decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32)) - if not re.match(r'^https?://', decrypt_info['URI']): - decrypt_info['URI'] = urllib.parse.urljoin( - man_url, decrypt_info['URI']) - if extra_query: - decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) - if decrypt_url != decrypt_info['URI']: - decrypt_info['KEY'] = None + if external_aes_key: + decrypt_info['KEY'] = external_aes_key + else: + decrypt_info['URI'] = urljoin(man_url, decrypt_info['URI']) + if extra_query: + decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) + if decrypt_url != decrypt_info['URI']: + decrypt_info['KEY'] = None elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): media_sequence = int(line[22:]) diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 95c870ee8b..fa72d5722a 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -211,7 +211,12 @@ class HttpFD(FileDownloader): ctx.stream = None def download(): - data_len = ctx.data.info().get('Content-length', None) + data_len = ctx.data.info().get('Content-length') + + if ctx.data.info().get('Content-encoding'): + # Content-encoding is present, Content-length is not reliable anymore as we are + # doing auto decompression. (See: https://github.com/yt-dlp/yt-dlp/pull/6176) + data_len = None # Range HTTP header may be ignored/unsupported by a webserver # (e.g. extractor/scivee.py, extractor/bambuser.py). diff --git a/yt_dlp/downloader/ism.py b/yt_dlp/downloader/ism.py index 801b5af813..a157a8ad93 100644 --- a/yt_dlp/downloader/ism.py +++ b/yt_dlp/downloader/ism.py @@ -138,6 +138,8 @@ def write_piff_header(stream, params): if fourcc == 'AACL': sample_entry_box = box(b'mp4a', sample_entry_payload) + if fourcc == 'EC-3': + sample_entry_box = box(b'ec-3', sample_entry_payload) elif stream_type == 'video': sample_entry_payload += u16.pack(0) # pre defined sample_entry_payload += u16.pack(0) # reserved @@ -278,5 +280,4 @@ class IsmFD(FragmentFD): return False self.report_skip_fragment(frag_index) - self._finish_frag_download(ctx, info_dict) - return True + return self._finish_frag_download(ctx, info_dict) diff --git a/yt_dlp/downloader/mhtml.py b/yt_dlp/downloader/mhtml.py index ed076e09ed..d977dcec31 100644 --- a/yt_dlp/downloader/mhtml.py +++ b/yt_dlp/downloader/mhtml.py @@ -186,5 +186,4 @@ body > figure > img { ctx['dest_stream'].write( b'--%b--\r\n\r\n' % frag_boundary.encode('us-ascii')) - self._finish_frag_download(ctx, info_dict) - return True + return self._finish_frag_download(ctx, info_dict) diff --git a/yt_dlp/downloader/youtube_live_chat.py b/yt_dlp/downloader/youtube_live_chat.py index 1bc3209dc4..5928fecf0b 100644 --- a/yt_dlp/downloader/youtube_live_chat.py +++ b/yt_dlp/downloader/youtube_live_chat.py @@ -191,8 +191,7 @@ class YoutubeLiveChatFD(FragmentFD): if test: break - self._finish_frag_download(ctx, info_dict) - return True + return self._finish_frag_download(ctx, info_dict) @staticmethod def parse_live_timestamp(action): diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index a1e54a4a53..42a0ad007d 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -21,6 +21,8 @@ from .youtube import ( # Youtube is moved to the top to improve performance YoutubeYtBeIE, YoutubeYtUserIE, YoutubeWatchLaterIE, + YoutubeShortsAudioPivotIE, + YoutubeConsentRedirectIE, ) from .abc import ( @@ -64,12 +66,21 @@ from .aenetworks import ( HistoryPlayerIE, BiographyIE, ) +from .aeonco import AeonCoIE from .afreecatv import ( AfreecaTVIE, AfreecaTVLiveIE, AfreecaTVUserIE, ) +from .agora import ( + TokFMAuditionIE, + TokFMPodcastIE, + WyborczaPodcastIE, + WyborczaVideoIE, +) from .airmozilla import AirMozillaIE +from .airtv import AirTVIE +from .aitube import AitubeKZVideoIE from .aljazeera import AlJazeeraIE from .alphaporno import AlphaPornoIE from .amara import AmaraIE @@ -78,11 +89,20 @@ from .alura import ( AluraCourseIE ) from .amcnetworks import AMCNetworksIE -from .amazon import AmazonStoreIE +from .amazon import ( + AmazonStoreIE, + AmazonReviewsIE, +) +from .amazonminitv import ( + AmazonMiniTVIE, + AmazonMiniTVSeasonIE, + AmazonMiniTVSeriesIE, +) from .americastestkitchen import ( AmericasTestKitchenIE, AmericasTestKitchenSeasonIE, ) +from .anchorfm import AnchorFMEpisodeIE from .angel import AngelIE from .anvato import AnvatoIE from .aol import AolIE @@ -103,6 +123,7 @@ from .applepodcasts import ApplePodcastsIE from .archiveorg import ( ArchiveOrgIE, YoutubeWebArchiveIE, + VLiveWebArchiveIE, ) from .arcpublishing import ArcPublishingIE from .arkena import ArkenaIE @@ -170,7 +191,12 @@ from .bbc import ( from .beeg import BeegIE from .behindkink import BehindKinkIE from .bellmedia import BellMediaIE +from .beatbump import ( + BeatBumpVideoIE, + BeatBumpPlaylistIE, +) from .beatport import BeatportIE +from .berufetv import BerufeTVIE from .bet import BetIE from .bfi import BFIPlayerIE from .bfmtv import ( @@ -184,13 +210,16 @@ from .bigo import BigoIE from .bild import BildIE from .bilibili import ( BiliBiliIE, + BiliBiliBangumiIE, + BiliBiliBangumiMediaIE, BiliBiliSearchIE, BilibiliCategoryIE, - BiliBiliBangumiIE, BilibiliAudioIE, BilibiliAudioAlbumIE, BiliBiliPlayerIE, - BilibiliChannelIE, + BilibiliSpaceVideoIE, + BilibiliSpaceAudioIE, + BilibiliSpacePlaylistIE, BiliIntlIE, BiliIntlSeriesIE, BiliLiveIE, @@ -210,12 +239,15 @@ from .bleacherreport import ( BleacherReportIE, BleacherReportCMSIE, ) +from .blerp import BlerpIE from .blogger import BloggerIE from .bloomberg import BloombergIE from .bokecc import BokeCCIE from .bongacams import BongaCamsIE from .bostonglobe import BostonGlobeIE from .box import BoxIE +from .boxcast import BoxCastVideoIE +from .booyah import BooyahClipsIE from .bpb import BpbIE from .br import ( BRIE, @@ -229,6 +261,7 @@ from .brightcove import ( BrightcoveNewIE, ) from .businessinsider import BusinessInsiderIE +from .bundesliga import BundesligaIE from .buzzfeed import BuzzFeedIE from .byutv import BYUtvIE from .c56 import C56IE @@ -241,6 +274,7 @@ from .camdemy import ( CamdemyFolderIE ) from .cammodels import CamModelsIE +from .camsoda import CamsodaIE from .camtasia import CamtasiaEmbedIE from .camwithher import CamWithHerIE from .canalalpha import CanalAlphaIE @@ -304,6 +338,7 @@ from .chirbit import ( ) from .cinchcast import CinchcastIE from .cinemax import CinemaxIE +from .cinetecamilano import CinetecaMilanoIE from .ciscolive import ( CiscoLiveSessionIE, CiscoLiveSearchIE, @@ -328,6 +363,7 @@ from .cnn import ( CNNIE, CNNBlogsIE, CNNArticleIE, + CNNIndonesiaIE, ) from .coub import CoubIE from .comedycentral import ( @@ -357,8 +393,6 @@ from .crowdbunker import ( CrowdBunkerChannelIE, ) from .crunchyroll import ( - CrunchyrollIE, - CrunchyrollShowPlaylistIE, CrunchyrollBetaIE, CrunchyrollBetaShowIE, ) @@ -410,7 +444,7 @@ from .deezer import ( DeezerShowIE, ) from .democracynow import DemocracynowIE -from .detik import Detik20IE +from .detik import DetikEmbedIE from .dfb import DFBIE from .dhm import DHMIE from .digg import DiggIE @@ -437,6 +471,7 @@ from .dplay import ( AnimalPlanetIE, TLCIE, MotorTrendIE, + MotorTrendOnDemandIE, DiscoveryPlusIndiaIE, DiscoveryNetworksDeIE, DiscoveryPlusItalyIE, @@ -449,6 +484,8 @@ from .drtuber import DrTuberIE from .drtv import ( DRTVIE, DRTVLiveIE, + DRTVSeasonIE, + DRTVSeriesIE, ) from .dtube import DTubeIE from .dvtv import DVTVIE @@ -458,11 +495,14 @@ from .duboku import ( ) from .dumpert import DumpertIE from .defense import DefenseGouvFrIE +from .deuxm import ( + DeuxMIE, + DeuxMNewsIE +) from .digitalconcerthall import DigitalConcertHallIE from .discovery import DiscoveryIE from .disney import DisneyIE from .dispeak import DigitallySpeakingIE -from .doodstream import DoodStreamIE from .dropbox import DropboxIE from .dropout import ( DropoutSeasonIE, @@ -474,6 +514,7 @@ from .dw import ( ) from .eagleplatform import EaglePlatformIE, ClipYouEmbedIE from .ebaumsworld import EbaumsWorldIE +from .ebay import EbayIE from .echomsk import EchoMskIE from .egghead import ( EggheadCourseIE, @@ -516,7 +557,7 @@ from .espn import ( ESPNCricInfoIE, ) from .esri import EsriVideoIE -from .europa import EuropaIE +from .europa import EuropaIE, EuroParlWebstreamIE from .europeantour import EuropeanTourIE from .eurosport import EurosportIE from .euscreen import EUScreenIE @@ -574,6 +615,7 @@ from .foxgay import FoxgayIE from .foxnews import ( FoxNewsIE, FoxNewsArticleIE, + FoxNewsVideoIE, ) from .foxsports import FoxSportsIE from .fptplay import FptplayIE @@ -624,6 +666,10 @@ from .gazeta import GazetaIE from .gdcvault import GDCVaultIE from .gedidigital import GediDigitalIE from .generic import GenericIE +from .genius import ( + GeniusIE, + GeniusLyricsIE, +) from .gettr import ( GettrIE, GettrStreamingIE, @@ -651,6 +697,7 @@ from .googlepodcasts import ( ) from .googlesearch import GoogleSearchIE from .gopro import GoProIE +from .goplay import GoPlayIE from .goshgay import GoshgayIE from .gotostage import GoToStageIE from .gputechconf import GPUTechConfIE @@ -679,6 +726,7 @@ from .hotstar import ( HotStarIE, HotStarPrefixIE, HotStarPlaylistIE, + HotStarSeasonIE, HotStarSeriesIE, ) from .howcast import HowcastIE @@ -692,7 +740,10 @@ from .hse import ( HSEShowIE, HSEProductIE, ) -from .genericembeds import HTML5MediaEmbedIE +from .genericembeds import ( + HTML5MediaEmbedIE, + QuotedHTMLIE, +) from .huajiao import HuajiaoIE from .huya import HuyaLiveIE from .huffpost import HuffPostIE @@ -702,6 +753,7 @@ from .hungama import ( HungamaAlbumPlaylistIE, ) from .hypem import HypemIE +from .hypergryph import MonsterSirenHypergryphMusicIE from .hytale import HytaleIE from .icareus import IcareusIE from .ichinanalive import ( @@ -717,6 +769,7 @@ from .iheart import ( IHeartRadioIE, IHeartRadioPodcastIE, ) +from .iltalehti import IltalehtiIE from .imdb import ( ImdbIE, ImdbListIE @@ -752,6 +805,7 @@ from .islamchannel import ( IslamChannelIE, IslamChannelSeriesIE, ) +from .israelnationalnews import IsraelNationalNewsIE from .itprotv import ( ITProTVIE, ITProTVCourseIE @@ -780,12 +834,21 @@ from .jamendo import ( JamendoIE, JamendoAlbumIE, ) +from .japandiet import ( + ShugiinItvLiveIE, + ShugiinItvLiveRoomIE, + ShugiinItvVodIE, + SangiinInstructionIE, + SangiinIE, +) from .jeuxvideo import JeuxVideoIE from .jove import JoveIE from .joj import JojIE from .jwplatform import JWPlatformIE from .kakao import KakaoIE from .kaltura import KalturaIE +from .kanal2 import Kanal2IE +from .kankanews import KankaNewsIE from .karaoketv import KaraoketvIE from .karrierevideos import KarriereVideosIE from .keezmovies import KeezMoviesIE @@ -795,10 +858,15 @@ from .khanacademy import ( KhanAcademyIE, KhanAcademyUnitIE, ) +from .kick import ( + KickIE, + KickVODIE, +) from .kicker import KickerIE from .kickstarter import KickStarterIE from .kinja import KinjaEmbedIE from .kinopoisk import KinoPoiskIE +from .kommunetv import KommunetvIE from .kompas import KompasVideoIE from .konserthusetplay import KonserthusetPlayIE from .koo import KooIE @@ -850,6 +918,10 @@ from .leeco import ( LePlaylistIE, LetvCloudIE, ) +from .lefigaro import ( + LeFigaroVideoEmbedIE, + LeFigaroVideoSectionIE, +) from .lego import LEGOIE from .lemonde import LemondeIE from .lenta import LentaIE @@ -879,6 +951,7 @@ from .linkedin import ( ) from .linuxacademy import LinuxAcademyIE from .liputan6 import Liputan6IE +from .listennotes import ListenNotesIE from .litv import LiTVIE from .livejournal import LiveJournalIE from .livestream import ( @@ -897,6 +970,9 @@ from .lrt import ( LRTVODIE, LRTStreamIE ) +from .lumni import ( + LumniIE +) from .lynda import ( LyndaIE, LyndaCourseIE @@ -941,6 +1017,10 @@ from .mediasite import ( MediasiteCatalogIE, MediasiteNamedCatalogIE, ) +from .mediastream import ( + MediaStreamIE, + WinSportsVideoIE, +) from .mediaworksnz import MediaWorksNZVODIE from .medici import MediciIE from .megaphone import MegaphoneIE @@ -957,6 +1037,7 @@ from .microsoftvirtualacademy import ( MicrosoftVirtualAcademyIE, MicrosoftVirtualAcademyCourseIE, ) +from .microsoftembed import MicrosoftEmbedIE from .mildom import ( MildomIE, MildomVodIE, @@ -991,6 +1072,7 @@ from .mlb import ( MLBIE, MLBVideoIE, MLBTVIE, + MLBArticleIE, ) from .mlssoccer import MLSSoccerIE from .mnet import MnetIE @@ -1078,6 +1160,7 @@ from .nbc import ( NBCSportsIE, NBCSportsStreamIE, NBCSportsVPlayerIE, + NBCStationsIE, ) from .ndr import ( NDRIE, @@ -1106,6 +1189,7 @@ from .neteasemusic import ( from .netverse import ( NetverseIE, NetversePlaylistIE, + NetverseSearchIE, ) from .newgrounds import ( NewgroundsIE, @@ -1130,6 +1214,8 @@ from .nfhsnetwork import NFHSNetworkIE from .nfl import ( NFLIE, NFLArticleIE, + NFLPlusEpisodeIE, + NFLPlusReplayIE, ) from .nhk import ( NhkVodIE, @@ -1167,11 +1253,13 @@ from .nintendo import NintendoIE from .nitter import NitterIE from .njpwworld import NJPWWorldIE from .nobelprize import NobelPrizeIE +from .noice import NoicePodcastIE from .nonktube import NonkTubeIE from .noodlemagazine import NoodleMagazineIE from .noovo import NoovoIE from .normalboots import NormalbootsIE from .nosvideo import NosVideoIE +from .nosnl import NOSNLArticleIE from .nova import ( NovaEmbedIE, NovaIE, @@ -1211,6 +1299,7 @@ from .nrl import NRLTVIE from .ntvcojp import NTVCoJpCUIE from .ntvde import NTVDeIE from .ntvru import NTVRuIE +from .nubilesporn import NubilesPornIE from .nytimes import ( NYTimesIE, NYTimesArticleIE, @@ -1218,14 +1307,22 @@ from .nytimes import ( ) from .nuvid import NuvidIE from .nzherald import NZHeraldIE +from .nzonscreen import NZOnScreenIE from .nzz import NZZIE from .odatv import OdaTVIE +from .odkmedia import OnDemandChinaEpisodeIE from .odnoklassniki import OdnoklassnikiIE +from .oftv import ( + OfTVIE, + OfTVPlaylistIE +) from .oktoberfesttv import OktoberfestTVIE from .olympics import OlympicsReplayIE from .on24 import On24IE from .ondemandkorea import OnDemandKoreaIE from .onefootball import OneFootballIE +from .onenewsnz import OneNewsNZIE +from .oneplace import OnePlacePodcastIE from .onet import ( OnetIE, OnetChannelIE, @@ -1334,6 +1431,7 @@ from .pluralsight import ( PluralsightIE, PluralsightCourseIE, ) +from .podbayfm import PodbayFMIE, PodbayFMChannelIE from .podchaser import PodchaserIE from .podomatic import PodomaticIE from .pokemon import ( @@ -1347,6 +1445,8 @@ from .pokergo import ( from .polsatgo import PolsatGoIE from .polskieradio import ( PolskieRadioIE, + PolskieRadioLegacyIE, + PolskieRadioAuditionIE, PolskieRadioCategoryIE, PolskieRadioPlayerIE, PolskieRadioPodcastIE, @@ -1374,6 +1474,8 @@ from .puhutv import ( PuhuTVIE, PuhuTVSerieIE, ) +from .pr0gramm import Pr0grammStaticIE, Pr0grammIE +from .prankcast import PrankCastIE from .premiershiprugby import PremiershipRugbyIE from .presstv import PressTVIE from .projectveritas import ProjectVeritasIE @@ -1387,6 +1489,7 @@ from .prx import ( ) from .puls4 import Puls4IE from .pyvideo import PyvideoIE +from .qingting import QingTingIE from .qqmusic import ( QQMusicIE, QQMusicSingerIE, @@ -1433,6 +1536,10 @@ from .raywenderlich import ( RayWenderlichCourseIE, ) from .rbmaradio import RBMARadioIE +from .rbgtum import ( + RbgTumIE, + RbgTumCourseIE, +) from .rcs import ( RCSIE, RCSEmbedsIE, @@ -1477,7 +1584,10 @@ from .rokfin import ( ) from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE from .rottentomatoes import RottenTomatoesIE -from .rozhlas import RozhlasIE +from .rozhlas import ( + RozhlasIE, + RozhlasVltavaIE, +) from .rte import RteIE, RteRadioIE from .rtlnl import ( RtlNlIE, @@ -1514,6 +1624,7 @@ from .ruhd import RUHDIE from .rule34video import Rule34VideoIE from .rumble import ( RumbleEmbedIE, + RumbleIE, RumbleChannelIE, ) from .rutube import ( @@ -1554,7 +1665,9 @@ from .samplefocus import SampleFocusIE from .sapo import SapoIE from .savefrom import SaveFromIE from .sbs import SBSIE +from .screen9 import Screen9IE from .screencast import ScreencastIE +from .screencastify import ScreencastifyIE from .screencastomatic import ScreencastOMaticIE from .scrippsnetworks import ( ScrippsNetworksWatchIE, @@ -1584,6 +1697,7 @@ from .shared import ( VivoIE, ) from .sharevideos import ShareVideosEmbedIE +from .sibnet import SibnetEmbedIE from .shemaroome import ShemarooMeIE from .showroomlive import ShowRoomLiveIE from .simplecast import ( @@ -1599,7 +1713,6 @@ from .skyit import ( SkyItVideoIE, SkyItVideoLiveIE, SkyItIE, - SkyItAcademyIE, SkyItArteIE, CieloTVItIE, TV8ItIE, @@ -1619,6 +1732,7 @@ from .sky import ( from .slideshare import SlideshareIE from .slideslive import SlidesLiveIE from .slutload import SlutloadIE +from .smotrim import SmotrimIE from .snotr import SnotrIE from .sohu import SohuIE from .sonyliv import ( @@ -1631,6 +1745,7 @@ from .soundcloud import ( SoundcloudSetIE, SoundcloudRelatedIE, SoundcloudUserIE, + SoundcloudUserPermalinkIE, SoundcloudTrackStationIE, SoundcloudPlaylistIE, SoundcloudSearchIE, @@ -1718,6 +1833,7 @@ from .svt import ( SVTPlayIE, SVTSeriesIE, ) +from .swearnet import SwearnetEpisodeIE from .swrmediathek import SWRMediathekIE from .syvdk import SYVDKIE from .syfy import SyfyIE @@ -1735,7 +1851,10 @@ from .teachertube import ( TeacherTubeUserIE, ) from .teachingchannel import TeachingChannelIE -from .teamcoco import TeamcocoIE +from .teamcoco import ( + TeamcocoIE, + ConanClassicIE, +) from .teamtreehouse import TeamTreeHouseIE from .techtalks import TechTalksIE from .ted import ( @@ -1747,6 +1866,7 @@ from .ted import ( from .tele5 import Tele5IE from .tele13 import Tele13IE from .telebruxelles import TeleBruxellesIE +from .telecaribe import TelecaribePlayIE from .telecinco import TelecincoIE from .telegraaf import TelegraafIE from .telegram import TelegramEmbedIE @@ -1761,8 +1881,10 @@ from .telequebec import ( ) from .teletask import TeleTaskIE from .telewebion import TelewebionIE -from .tempo import TempoIE +from .tempo import TempoIE, IVXPlayerIE from .tencent import ( + IflixEpisodeIE, + IflixSeriesIE, VQQSeriesIE, VQQVideoIE, WeTvEpisodeIE, @@ -1789,6 +1911,11 @@ from .theweatherchannel import TheWeatherChannelIE from .thisamericanlife import ThisAmericanLifeIE from .thisav import ThisAVIE from .thisoldhouse import ThisOldHouseIE +from .thisvid import ( + ThisVidIE, + ThisVidMemberIE, + ThisVidPlaylistIE, +) from .threespeak import ( ThreeSpeakIE, ThreeSpeakUserIE, @@ -1801,6 +1928,7 @@ from .tiktok import ( TikTokEffectIE, TikTokTagIE, TikTokVMIE, + TikTokLiveIE, DouyinIE, ) from .tinypic import TinyPicIE @@ -1838,6 +1966,7 @@ from .trovo import ( TrovoChannelVodIE, TrovoChannelClipIE, ) +from .trtcocuk import TrtCocukVideoIE from .trueid import TrueIDIE from .trunews import TruNewsIE from .truth import TruthIE @@ -1850,10 +1979,9 @@ from .tubitv import ( ) from .tumblr import TumblrIE from .tunein import ( - TuneInClipIE, TuneInStationIE, - TuneInProgramIE, - TuneInTopicIE, + TuneInPodcastIE, + TuneInPodcastEpisodeIE, TuneInShortenerIE, ) from .tunepk import TunePkIE @@ -1864,6 +1992,9 @@ from .tv2 import ( KatsomoIE, MTVUutisetArticleIE, ) +from .tv24ua import ( + TV24UAVideoIE, +) from .tv2dk import ( TV2DKIE, TV2DKBornholmPlayIE, @@ -1913,7 +2044,8 @@ from .tvp import ( TVPEmbedIE, TVPIE, TVPStreamIE, - TVPWebsiteIE, + TVPVODSeriesIE, + TVPVODVideoIE, ) from .tvplay import ( TVPlayIE, @@ -1944,8 +2076,13 @@ from .twitter import ( TwitterIE, TwitterAmplifyIE, TwitterBroadcastIE, + TwitterSpacesIE, TwitterShortenerIE, ) +from .txxx import ( + TxxxIE, + PornTopIE, +) from .udemy import ( UdemyIE, UdemyCourseIE @@ -1966,6 +2103,8 @@ from .drooble import DroobleIE from .umg import UMGDeIE from .unistra import UnistraIE from .unity import UnityIE +from .unscripted import UnscriptedNewsVideoIE +from .unsupported import KnownDRMIE, KnownPiracyIE from .uol import UOLIE from .uplynk import ( UplynkIE, @@ -1985,7 +2124,10 @@ from .varzesh3 import Varzesh3IE from .vbox7 import Vbox7IE from .veehd import VeeHDIE from .veo import VeoIE -from .veoh import VeohIE +from .veoh import ( + VeohIE, + VeohUserIE +) from .vesti import VestiIE from .vevo import ( VevoIE, @@ -2011,6 +2153,13 @@ from .videocampus_sachsen import ( ) from .videodetective import VideoDetectiveIE from .videofyme import VideofyMeIE +from .videoken import ( + VideoKenIE, + VideoKenPlayerIE, + VideoKenPlaylistIE, + VideoKenCategoryIE, + VideoKenTopicIE, +) from .videomore import ( VideomoreIE, VideomoreVideoIE, @@ -2023,7 +2172,6 @@ from .vidio import ( VidioLiveIE ) from .vidlii import VidLiiIE -from .vier import VierIE, VierVideosIE from .viewlift import ( ViewLiftIE, ViewLiftEmbedIE, @@ -2036,6 +2184,7 @@ from .vimeo import ( VimeoGroupsIE, VimeoLikesIE, VimeoOndemandIE, + VimeoProIE, VimeoReviewIE, VimeoUserIE, VimeoWatchLaterIE, @@ -2059,17 +2208,14 @@ from .viu import ( ViuIE, ViuPlaylistIE, ViuOTTIE, + ViuOTTIndonesiaIE, ) from .vk import ( VKIE, VKUserVideosIE, VKWallPostIE, ) -from .vlive import ( - VLiveIE, - VLivePostIE, - VLiveChannelIE, -) +from .vocaroo import VocarooIE from .vodlocker import VodlockerIE from .vodpl import VODPlIE from .vodplatform import VODPlatformIE @@ -2078,6 +2224,7 @@ from .voicy import ( VoicyIE, VoicyChannelIE, ) +from .volejtv import VolejTVIE from .voot import ( VootIE, VootSeriesIE, @@ -2123,6 +2270,7 @@ from .wdr import ( WDRElefantIE, WDRMobileIE, ) +from .webcamerapl import WebcameraplIE from .webcaster import ( WebcasterIE, WebcasterFeedIE, @@ -2143,17 +2291,27 @@ from .whowatch import WhoWatchIE from .wistia import ( WistiaIE, WistiaPlaylistIE, + WistiaChannelIE, +) +from .wordpress import ( + WordpressPlaylistEmbedIE, + WordpressMiniAudioPlayerEmbedIE, ) from .worldstarhiphop import WorldStarHipHopIE from .wppilot import ( WPPilotIE, WPPilotChannelsIE, ) +from .wrestleuniverse import ( + WrestleUniverseVODIE, + WrestleUniversePPVIE, +) from .wsj import ( WSJIE, WSJArticleIE, ) from .wwe import WWEIE +from .xanimu import XanimuIE from .xbef import XBefIE from .xboxclips import XboxClipsIE from .xfileshare import XFileShareIE @@ -2162,12 +2320,6 @@ from .xhamster import ( XHamsterEmbedIE, XHamsterUserIE, ) -from .xiami import ( - XiamiSongIE, - XiamiAlbumIE, - XiamiArtistIE, - XiamiCollectionIE -) from .ximalaya import ( XimalayaIE, XimalayaAlbumIE @@ -2178,7 +2330,10 @@ from .xnxx import XNXXIE from .xstream import XstreamIE from .xtube import XTubeUserIE, XTubeIE from .xuite import XuiteIE -from .xvideos import XVideosIE +from .xvideos import ( + XVideosIE, + XVideosQuickiesIE +) from .xxxymovies import XXXYMoviesIE from .yahoo import ( YahooIE, @@ -2202,8 +2357,10 @@ from .yandexvideo import ( ZenYandexChannelIE, ) from .yapfiles import YapFilesIE +from .yappy import YappyIE from .yesjapan import YesJapanIE from .yinyuetai import YinYueTaiIE +from .yle_areena import YleAreenaIE from .ynet import YnetIE from .youjizz import YouJizzIE from .youku import ( @@ -2266,6 +2423,7 @@ from .zee5 import ( Zee5IE, Zee5SeriesIE, ) +from .zeenews import ZeeNewsIE from .zhihu import ZhihuIE from .zingmp3 import ( ZingMp3IE, diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py index 03f10ab23d..0ca76b85a8 100644 --- a/yt_dlp/extractor/abc.py +++ b/yt_dlp/extractor/abc.py @@ -155,8 +155,6 @@ class ABCIE(InfoExtractor): 'format_id': format_id }) - self._sort_formats(formats) - return { 'id': video_id, 'title': self._og_search_title(webpage), @@ -221,7 +219,6 @@ class ABCIViewIE(InfoExtractor): entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) if formats: break - self._sort_formats(formats) subtitles = {} src_vtt = stream.get('captions', {}).get('src-vtt') diff --git a/yt_dlp/extractor/abcotvs.py b/yt_dlp/extractor/abcotvs.py index 44a9f8ca5e..6dca19de41 100644 --- a/yt_dlp/extractor/abcotvs.py +++ b/yt_dlp/extractor/abcotvs.py @@ -78,7 +78,6 @@ class ABCOTVSIE(InfoExtractor): 'url': mp4_url, 'width': 640, }) - self._sort_formats(formats) image = video.get('image') or {} @@ -119,7 +118,6 @@ class ABCOTVSClipsIE(InfoExtractor): title = video_data['title'] formats = self._extract_m3u8_formats( video_data['videoURL'].split('?')[0], video_id, 'mp4') - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py index 9955fb289f..f611c1f2c2 100644 --- a/yt_dlp/extractor/abematv.py +++ b/yt_dlp/extractor/abematv.py @@ -156,7 +156,7 @@ class AbemaTVBaseIE(InfoExtractor): def _generate_aks(cls, deviceid): deviceid = deviceid.encode('utf-8') # add 1 hour and then drop minute and secs - ts_1hour = int((time_seconds(hours=9) // 3600 + 1) * 3600) + ts_1hour = int((time_seconds() // 3600 + 1) * 3600) time_struct = time.gmtime(ts_1hour) ts_1hour_str = str(ts_1hour).encode('utf-8') @@ -190,6 +190,16 @@ class AbemaTVBaseIE(InfoExtractor): if self._USERTOKEN: return self._USERTOKEN + username, _ = self._get_login_info() + AbemaTVBaseIE._USERTOKEN = username and self.cache.load(self._NETRC_MACHINE, username) + if AbemaTVBaseIE._USERTOKEN: + # try authentication with locally stored token + try: + self._get_media_token(True) + return + except ExtractorError as e: + self.report_warning(f'Failed to login with cached user token; obtaining a fresh one ({e})') + AbemaTVBaseIE._DEVICE_ID = str(uuid.uuid4()) aks = self._generate_aks(self._DEVICE_ID) user_data = self._download_json( @@ -300,6 +310,11 @@ class AbemaTVIE(AbemaTVBaseIE): _TIMETABLE = None def _perform_login(self, username, password): + self._get_device_token() + if self.cache.load(self._NETRC_MACHINE, username) and self._get_media_token(): + self.write_debug('Skipping logging in') + return + if '@' in username: # don't strictly check if it's email address or not ep, method = 'user/email', 'email' else: @@ -319,6 +334,7 @@ class AbemaTVIE(AbemaTVBaseIE): AbemaTVBaseIE._USERTOKEN = login_response['token'] self._get_media_token(True) + self.cache.store(self._NETRC_MACHINE, username, AbemaTVBaseIE._USERTOKEN) def _real_extract(self, url): # starting download using infojson from this extractor is undefined behavior, @@ -416,7 +432,7 @@ class AbemaTVIE(AbemaTVBaseIE): f'https://api.abema.io/v1/video/programs/{video_id}', video_id, note='Checking playability', headers=headers) - ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType'), default=[]) + ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType')) if 3 not in ondemand_types: # cannot acquire decryption key for these streams self.report_warning('This is a premium-only stream') @@ -489,7 +505,7 @@ class AbemaTVTitleIE(AbemaTVBaseIE): }) yield from ( self.url_result(f'https://abema.tv/video/episode/{x}') - for x in traverse_obj(programs, ('programs', ..., 'id'), default=[])) + for x in traverse_obj(programs, ('programs', ..., 'id'))) def _entries(self, playlist_id, series_version): return OnDemandPagedList( diff --git a/yt_dlp/extractor/acfun.py b/yt_dlp/extractor/acfun.py index 615efd9bb0..dc57929445 100644 --- a/yt_dlp/extractor/acfun.py +++ b/yt_dlp/extractor/acfun.py @@ -27,7 +27,6 @@ class AcFunVideoBaseIE(InfoExtractor): **parse_codecs(video.get('codecs', '')) }) - self._sort_formats(formats) return { 'id': video_id, 'formats': formats, @@ -84,7 +83,7 @@ class AcFunVideoIE(AcFunVideoBaseIE): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - json_all = self._search_json(r'window.videoInfo\s*=\s*', webpage, 'videoInfo', video_id) + json_all = self._search_json(r'window.videoInfo\s*=', webpage, 'videoInfo', video_id) title = json_all.get('title') video_list = json_all.get('videoList') or [] @@ -161,10 +160,10 @@ class AcFunBangumiIE(AcFunVideoBaseIE): def _real_extract(self, url): video_id = self._match_id(url) ac_idx = parse_qs(url).get('ac', [None])[-1] - video_id = f'{video_id}{format_field(ac_idx, template="__%s")}' + video_id = f'{video_id}{format_field(ac_idx, None, "__%s")}' webpage = self._download_webpage(url, video_id) - json_bangumi_data = self._search_json(r'window.bangumiData\s*=\s*', webpage, 'bangumiData', video_id) + json_bangumi_data = self._search_json(r'window.bangumiData\s*=', webpage, 'bangumiData', video_id) if ac_idx: video_info = json_bangumi_data['hlVideoInfo'] @@ -181,7 +180,7 @@ class AcFunBangumiIE(AcFunVideoBaseIE): if v.get('id') == season_id), 1) json_bangumi_list = self._search_json( - r'window\.bangumiList\s*=\s*', webpage, 'bangumiList', video_id, fatal=False) + r'window\.bangumiList\s*=', webpage, 'bangumiList', video_id, fatal=False) video_internal_id = int_or_none(traverse_obj(json_bangumi_data, ('currentVideoInfo', 'id'))) episode_number = video_internal_id and next(( idx for idx, v in enumerate(json_bangumi_list.get('items') or [], 1) diff --git a/yt_dlp/extractor/adn.py b/yt_dlp/extractor/adn.py index 18ddc5729c..f1f55e87fc 100644 --- a/yt_dlp/extractor/adn.py +++ b/yt_dlp/extractor/adn.py @@ -28,30 +28,34 @@ from ..utils import ( class ADNIE(InfoExtractor): - IE_DESC = 'Anime Digital Network' - _VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P\d+)' - _TEST = { - 'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites', - 'md5': '0319c99885ff5547565cacb4f3f9348d', + IE_DESC = 'Animation Digital Network' + _VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.fr/video/[^/]+/(?P\d+)' + _TESTS = [{ + 'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir', + 'md5': '1c9ef066ceb302c86f80c2b371615261', 'info_dict': { - 'id': '7778', + 'id': '9841', 'ext': 'mp4', - 'title': 'Blue Exorcist - Kyôto Saga - Episode 1', - 'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5', - 'series': 'Blue Exorcist - Kyôto Saga', - 'duration': 1467, - 'release_date': '20170106', + 'title': 'Fruits Basket - Episode 1', + 'description': 'md5:14be2f72c3c96809b0ca424b0097d336', + 'series': 'Fruits Basket', + 'duration': 1437, + 'release_date': '20190405', 'comment_count': int, 'average_rating': float, - 'season_number': 2, - 'episode': 'Début des hostilités', + 'season_number': 1, + 'episode': 'À ce soir !', 'episode_number': 1, - } - } + }, + 'skip': 'Only available in region (FR, ...)', + }, { + 'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites', + 'only_matching': True, + }] - _NETRC_MACHINE = 'animedigitalnetwork' - _BASE_URL = 'http://animedigitalnetwork.fr' - _API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/' + _NETRC_MACHINE = 'animationdigitalnetwork' + _BASE = 'animationdigitalnetwork.fr' + _API_BASE_URL = 'https://gw.api.' + _BASE + '/' _PLAYER_BASE_URL = _API_BASE_URL + 'player/' _HEADERS = {} _LOGIN_ERR_MESSAGE = 'Unable to log in' @@ -75,11 +79,11 @@ class ADNIE(InfoExtractor): if subtitle_location: enc_subtitles = self._download_webpage( subtitle_location, video_id, 'Downloading subtitles data', - fatal=False, headers={'Origin': 'https://animedigitalnetwork.fr'}) + fatal=False, headers={'Origin': 'https://' + self._BASE}) if not enc_subtitles: return None - # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js + # http://animationdigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes( compat_b64decode(enc_subtitles[24:]), binascii.unhexlify(self._K + '7fac1178830cfe0c'), @@ -164,7 +168,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' }, data=b'')['token'] links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link') - self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)]) + self._K = ''.join(random.choices('0123456789abcdef', k=16)) message = bytes_to_intlist(json.dumps({ 'k': self._K, 't': token, @@ -231,7 +235,6 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' for f in m3u8_formats: f['language'] = 'fr' formats.extend(m3u8_formats) - self._sort_formats(formats) video = (self._download_json( self._API_BASE_URL + 'video/%s' % video_id, video_id, diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index ec1be008a3..e5944f7146 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -1352,7 +1352,7 @@ MSO_INFO = { } -class AdobePassIE(InfoExtractor): +class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s' _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0' _MVPD_CACHE = 'ap-mvpd' diff --git a/yt_dlp/extractor/adobetv.py b/yt_dlp/extractor/adobetv.py index d8e07b3a17..d1525a1af2 100644 --- a/yt_dlp/extractor/adobetv.py +++ b/yt_dlp/extractor/adobetv.py @@ -70,7 +70,6 @@ class AdobeTVBaseIE(InfoExtractor): }) s3_extracted = True formats.append(f) - self._sort_formats(formats) return { 'id': video_id, @@ -269,7 +268,6 @@ class AdobeTVVideoIE(AdobeTVBaseIE): 'width': int_or_none(source.get('width') or None), 'url': source_src, }) - self._sort_formats(formats) # For both metadata and downloaded files the duration varies among # formats. I just pick the max one diff --git a/yt_dlp/extractor/adultswim.py b/yt_dlp/extractor/adultswim.py index 1368954bc6..bd29eb43e5 100644 --- a/yt_dlp/extractor/adultswim.py +++ b/yt_dlp/extractor/adultswim.py @@ -180,7 +180,6 @@ class AdultSwimIE(TurnerBaseIE): info['subtitles'].setdefault('en', []).append({ 'url': asset_url, }) - self._sort_formats(info['formats']) return info else: diff --git a/yt_dlp/extractor/aenetworks.py b/yt_dlp/extractor/aenetworks.py index 516cb6302c..d7c401016c 100644 --- a/yt_dlp/extractor/aenetworks.py +++ b/yt_dlp/extractor/aenetworks.py @@ -8,7 +8,7 @@ from ..utils import ( ) -class AENetworksBaseIE(ThePlatformIE): +class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE _BASE_URL_REGEX = r'''(?x)https?:// (?:(?:www|play|watch)\.)? (?P @@ -62,7 +62,6 @@ class AENetworksBaseIE(ThePlatformIE): subtitles = self._merge_subtitles(subtitles, tp_subtitles) if last_e and not formats: raise last_e - self._sort_formats(formats) return { 'id': video_id, 'formats': formats, @@ -304,7 +303,6 @@ class HistoryTopicIE(AENetworksBaseIE): class HistoryPlayerIE(AENetworksBaseIE): IE_NAME = 'history:player' _VALID_URL = r'https?://(?:www\.)?(?P(?:history|biography)\.com)/player/(?P\d+)' - _TESTS = [] def _real_extract(self, url): domain, video_id = self._match_valid_url(url).groups() diff --git a/yt_dlp/extractor/aeonco.py b/yt_dlp/extractor/aeonco.py new file mode 100644 index 0000000000..4655862e3f --- /dev/null +++ b/yt_dlp/extractor/aeonco.py @@ -0,0 +1,40 @@ +from .common import InfoExtractor +from .vimeo import VimeoIE + + +class AeonCoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?aeon\.co/videos/(?P[^/?]+)' + _TESTS = [{ + 'url': 'https://aeon.co/videos/raw-solar-storm-footage-is-the-punk-rock-antidote-to-sleek-james-webb-imagery', + 'md5': 'e5884d80552c9b6ea8d268a258753362', + 'info_dict': { + 'id': '1284717', + 'ext': 'mp4', + 'title': 'Brilliant Noise', + 'thumbnail': 'https://i.vimeocdn.com/video/21006315-1a1e49da8b07fd908384a982b4ba9ff0268c509a474576ebdf7b1392f4acae3b-d_960', + 'uploader': 'Semiconductor', + 'uploader_id': 'semiconductor', + 'uploader_url': 'https://vimeo.com/semiconductor', + 'duration': 348 + } + }, { + 'url': 'https://aeon.co/videos/dazzling-timelapse-shows-how-microbes-spoil-our-food-and-sometimes-enrich-it', + 'md5': '4e5f3dad9dbda0dbfa2da41a851e631e', + 'info_dict': { + 'id': '728595228', + 'ext': 'mp4', + 'title': 'Wrought', + 'thumbnail': 'https://i.vimeocdn.com/video/1484618528-c91452611f9a4e4497735a533da60d45b2fe472deb0c880f0afaab0cd2efb22a-d_1280', + 'uploader': 'Biofilm Productions', + 'uploader_id': 'user140352216', + 'uploader_url': 'https://vimeo.com/user140352216', + 'duration': 1344 + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + vimeo_id = self._search_regex(r'hosterId":\s*"(?P[0-9]+)', webpage, 'vimeo id') + vimeo_url = VimeoIE._smuggle_referrer(f'https://player.vimeo.com/video/{vimeo_id}', 'https://aeon.co') + return self.url_result(vimeo_url, VimeoIE) diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py index b0fd158f61..9276fe7997 100644 --- a/yt_dlp/extractor/afreecatv.py +++ b/yt_dlp/extractor/afreecatv.py @@ -338,7 +338,6 @@ class AfreecaTVIE(InfoExtractor): }] if not formats and not self.get_param('ignore_no_formats'): continue - self._sort_formats(formats) file_info = common_entry.copy() file_info.update({ 'id': format_id, @@ -380,7 +379,7 @@ class AfreecaTVIE(InfoExtractor): return info -class AfreecaTVLiveIE(AfreecaTVIE): +class AfreecaTVLiveIE(AfreecaTVIE): # XXX: Do not subclass from concrete IE IE_NAME = 'afreecatv:live' _VALID_URL = r'https?://play\.afreeca(?:tv)?\.com/(?P[^/]+)(?:/(?P\d+))?' @@ -464,8 +463,6 @@ class AfreecaTVLiveIE(AfreecaTVIE): 'quality': quality_key(quality_str), }) - self._sort_formats(formats) - station_info = self._download_json( 'https://st.afreecatv.com/api/get_station_status.php', broadcast_no, query={'szBjId': broadcaster_id}, fatal=False, diff --git a/yt_dlp/extractor/agora.py b/yt_dlp/extractor/agora.py new file mode 100644 index 0000000000..abb2d3ff27 --- /dev/null +++ b/yt_dlp/extractor/agora.py @@ -0,0 +1,251 @@ +import functools +import uuid + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + OnDemandPagedList, + int_or_none, + month_by_name, + parse_duration, + try_call, +) + + +class WyborczaVideoIE(InfoExtractor): + # this id is not an article id, it has to be extracted from the article + _VALID_URL = r'(?:wyborcza:video:|https?://wyborcza\.pl/(?:api-)?video/)(?P\d+)' + IE_NAME = 'wyborcza:video' + _TESTS = [{ + 'url': 'wyborcza:video:26207634', + 'info_dict': { + 'id': '26207634', + 'ext': 'mp4', + 'title': '- Polska w 2020 r. jest innym państwem niż w 2015 r. Nie zmieniła się konstytucja, ale jest to już inny ustrój - mówi Adam Bodnar', + 'description': ' ', + 'uploader': 'Dorota Roman', + 'duration': 2474, + 'thumbnail': r're:https://.+\.jpg', + }, + }, { + 'url': 'https://wyborcza.pl/video/26207634', + 'only_matching': True, + }, { + 'url': 'https://wyborcza.pl/api-video/26207634', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + meta = self._download_json(f'https://wyborcza.pl/api-video/{video_id}', video_id) + + formats = [] + base_url = meta['redirector'].replace('http://', 'https://') + meta['basePath'] + for quality in ('standard', 'high'): + if not meta['files'].get(quality): + continue + formats.append({ + 'url': base_url + meta['files'][quality], + 'height': int_or_none( + self._search_regex( + r'p(\d+)[a-z]+\.mp4$', meta['files'][quality], + 'mp4 video height', default=None)), + 'format_id': quality, + }) + if meta['files'].get('dash'): + formats.extend(self._extract_mpd_formats(base_url + meta['files']['dash'], video_id)) + + return { + 'id': video_id, + 'formats': formats, + 'title': meta.get('title'), + 'description': meta.get('lead'), + 'uploader': meta.get('signature'), + 'thumbnail': meta.get('imageUrl'), + 'duration': meta.get('duration'), + } + + +class WyborczaPodcastIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?://(?:www\.)?(?: + wyborcza\.pl/podcast(?:/0,172673\.html)?| + wysokieobcasy\.pl/wysokie-obcasy/0,176631\.html + )(?:\?(?:[^&#]+?&)*podcast=(?P\d+))? + ''' + _TESTS = [{ + 'url': 'https://wyborcza.pl/podcast/0,172673.html?podcast=100720#S.main_topic-K.C-B.6-L.1.podcast', + 'info_dict': { + 'id': '100720', + 'ext': 'mp3', + 'title': 'Cyfrodziewczyny. Kim były pionierki polskiej informatyki ', + 'uploader': 'Michał Nogaś ', + 'upload_date': '20210117', + 'description': 'md5:49f0a06ffc4c1931210d3ab1416a651d', + 'duration': 3684.0, + 'thumbnail': r're:https://.+\.jpg', + }, + }, { + 'url': 'https://www.wysokieobcasy.pl/wysokie-obcasy/0,176631.html?podcast=100673', + 'info_dict': { + 'id': '100673', + 'ext': 'mp3', + 'title': 'Czym jest ubóstwo menstruacyjne i dlaczego dotyczy każdej i każdego z nas?', + 'uploader': 'Agnieszka Urazińska ', + 'upload_date': '20210115', + 'description': 'md5:c161dc035f8dbb60077011fc41274899', + 'duration': 1803.0, + 'thumbnail': r're:https://.+\.jpg', + }, + }, { + 'url': 'https://wyborcza.pl/podcast', + 'info_dict': { + 'id': '334', + 'title': 'Gościnnie: Wyborcza, 8:10', + 'series': 'Gościnnie: Wyborcza, 8:10', + }, + 'playlist_mincount': 370, + }, { + 'url': 'https://www.wysokieobcasy.pl/wysokie-obcasy/0,176631.html', + 'info_dict': { + 'id': '395', + 'title': 'Gościnnie: Wysokie Obcasy', + 'series': 'Gościnnie: Wysokie Obcasy', + }, + 'playlist_mincount': 12, + }] + + def _real_extract(self, url): + podcast_id = self._match_id(url) + + if not podcast_id: # playlist + podcast_id = '395' if 'wysokieobcasy.pl/' in url else '334' + return self.url_result(TokFMAuditionIE._create_url(podcast_id), TokFMAuditionIE, podcast_id) + + meta = self._download_json('https://wyborcza.pl/api/podcast', podcast_id, + query={'guid': podcast_id, 'type': 'wo' if 'wysokieobcasy.pl/' in url else None}) + + day, month, year = self._search_regex(r'^(\d\d?) (\w+) (\d{4})$', meta.get('publishedDate'), + 'upload date', group=(1, 2, 3), default=(None, None, None)) + return { + 'id': podcast_id, + 'url': meta['url'], + 'title': meta.get('title'), + 'description': meta.get('description'), + 'thumbnail': meta.get('imageUrl'), + 'duration': parse_duration(meta.get('duration')), + 'uploader': meta.get('author'), + 'upload_date': try_call(lambda: f'{year}{month_by_name(month, lang="pl"):0>2}{day:0>2}'), + } + + +class TokFMPodcastIE(InfoExtractor): + _VALID_URL = r'(?:https?://audycje\.tokfm\.pl/podcast/|tokfm:podcast:)(?P\d+),?' + IE_NAME = 'tokfm:podcast' + _TESTS = [{ + 'url': 'https://audycje.tokfm.pl/podcast/91275,-Systemowy-rasizm-Czy-zamieszki-w-USA-po-morderstwie-w-Minneapolis-doprowadza-do-zmian-w-sluzbach-panstwowych', + 'info_dict': { + 'id': '91275', + 'ext': 'aac', + 'title': 'md5:a9b15488009065556900169fb8061cce', + 'episode': 'md5:a9b15488009065556900169fb8061cce', + 'series': 'Analizy', + }, + }] + + def _real_extract(self, url): + media_id = self._match_id(url) + + # in case it breaks see this but it returns a lot of useless data + # https://api.podcast.radioagora.pl/api4/getPodcasts?podcast_id=100091&with_guests=true&with_leaders_for_mobile=true + metadata = self._download_json( + f'https://audycje.tokfm.pl/getp/3{media_id}', media_id, 'Downloading podcast metadata') + if not metadata: + raise ExtractorError('No such podcast', expected=True) + metadata = metadata[0] + + formats = [] + for ext in ('aac', 'mp3'): + url_data = self._download_json( + f'https://api.podcast.radioagora.pl/api4/getSongUrl?podcast_id={media_id}&device_id={uuid.uuid4()}&ppre=false&audio={ext}', + media_id, 'Downloading podcast %s URL' % ext) + # prevents inserting the mp3 (default) multiple times + if 'link_ssl' in url_data and f'.{ext}' in url_data['link_ssl']: + formats.append({ + 'url': url_data['link_ssl'], + 'ext': ext, + 'vcodec': 'none', + 'acodec': ext, + }) + + return { + 'id': media_id, + 'formats': formats, + 'title': metadata.get('podcast_name'), + 'series': metadata.get('series_name'), + 'episode': metadata.get('podcast_name'), + } + + +class TokFMAuditionIE(InfoExtractor): + _VALID_URL = r'(?:https?://audycje\.tokfm\.pl/audycja/|tokfm:audition:)(?P\d+),?' + IE_NAME = 'tokfm:audition' + _TESTS = [{ + 'url': 'https://audycje.tokfm.pl/audycja/218,Analizy', + 'info_dict': { + 'id': '218', + 'title': 'Analizy', + 'series': 'Analizy', + }, + 'playlist_count': 1635, + }] + + _PAGE_SIZE = 30 + _HEADERS = { + 'User-Agent': 'Mozilla/5.0 (Linux; Android 9; Redmi 3S Build/PQ3A.190801.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.101 Mobile Safari/537.36', + } + + @staticmethod + def _create_url(id): + return f'https://audycje.tokfm.pl/audycja/{id}' + + def _real_extract(self, url): + audition_id = self._match_id(url) + + data = self._download_json( + f'https://api.podcast.radioagora.pl/api4/getSeries?series_id={audition_id}', + audition_id, 'Downloading audition metadata', headers=self._HEADERS) + if not data: + raise ExtractorError('No such audition', expected=True) + data = data[0] + + entries = OnDemandPagedList(functools.partial( + self._fetch_page, audition_id, data), self._PAGE_SIZE) + + return { + '_type': 'playlist', + 'id': audition_id, + 'title': data.get('series_name'), + 'series': data.get('series_name'), + 'entries': entries, + } + + def _fetch_page(self, audition_id, data, page): + for retry in self.RetryManager(): + podcast_page = self._download_json( + f'https://api.podcast.radioagora.pl/api4/getPodcasts?series_id={audition_id}&limit=30&offset={page}&with_guests=true&with_leaders_for_mobile=true', + audition_id, f'Downloading podcast list page {page + 1}', headers=self._HEADERS) + if not podcast_page: + retry.error = ExtractorError('Agora returned empty page', expected=True) + + for podcast in podcast_page: + yield { + '_type': 'url_transparent', + 'url': podcast['podcast_sharing_url'], + 'ie_key': TokFMPodcastIE.ie_key(), + 'title': podcast.get('podcast_name'), + 'episode': podcast.get('podcast_name'), + 'description': podcast.get('podcast_description'), + 'timestamp': int_or_none(podcast.get('podcast_timestamp')), + 'series': data.get('series_name'), + } diff --git a/yt_dlp/extractor/airtv.py b/yt_dlp/extractor/airtv.py new file mode 100644 index 0000000000..0b73a966ed --- /dev/null +++ b/yt_dlp/extractor/airtv.py @@ -0,0 +1,96 @@ +from .common import InfoExtractor +from .youtube import YoutubeIE +from ..utils import ( + determine_ext, + int_or_none, + mimetype2ext, + parse_iso8601, + traverse_obj +) + + +class AirTVIE(InfoExtractor): + _VALID_URL = r'https?://www\.air\.tv/watch\?v=(?P\w+)' + _TESTS = [{ + # without youtube_id + 'url': 'https://www.air.tv/watch?v=W87jcWleSn2hXZN47zJZsQ', + 'info_dict': { + 'id': 'W87jcWleSn2hXZN47zJZsQ', + 'ext': 'mp4', + 'release_date': '20221003', + 'release_timestamp': 1664792603, + 'channel_id': 'vgfManQlRQKgoFQ8i8peFQ', + 'title': 'md5:c12d49ed367c3dadaa67659aff43494c', + 'upload_date': '20221003', + 'duration': 151, + 'view_count': int, + 'thumbnail': 'https://cdn-sp-gcs.air.tv/videos/W/8/W87jcWleSn2hXZN47zJZsQ/b13fc56464f47d9d62a36d110b9b5a72-4096x2160_9.jpg', + 'timestamp': 1664792603, + } + }, { + # with youtube_id + 'url': 'https://www.air.tv/watch?v=sv57EC8tRXG6h8dNXFUU1Q', + 'info_dict': { + 'id': '2ZTqmpee-bQ', + 'ext': 'mp4', + 'comment_count': int, + 'tags': 'count:11', + 'channel_follower_count': int, + 'like_count': int, + 'uploader': 'Newsflare', + 'thumbnail': 'https://i.ytimg.com/vi_webp/2ZTqmpee-bQ/maxresdefault.webp', + 'availability': 'public', + 'title': 'Geese Chase Alligator Across Golf Course', + 'uploader_id': 'NewsflareBreaking', + 'channel_url': 'https://www.youtube.com/channel/UCzSSoloGEz10HALUAbYhngQ', + 'description': 'md5:99b21d9cea59330149efbd9706e208f5', + 'age_limit': 0, + 'channel_id': 'UCzSSoloGEz10HALUAbYhngQ', + 'uploader_url': 'http://www.youtube.com/user/NewsflareBreaking', + 'view_count': int, + 'categories': ['News & Politics'], + 'live_status': 'not_live', + 'playable_in_embed': True, + 'channel': 'Newsflare', + 'duration': 37, + 'upload_date': '20180511', + } + }] + + def _get_formats_and_subtitle(self, json_data, video_id): + formats, subtitles = [], {} + for source in traverse_obj(json_data, 'sources', 'sources_desktop', ...): + ext = determine_ext(source.get('src'), mimetype2ext(source.get('type'))) + if ext == 'm3u8': + fmts, subs = self._extract_m3u8_formats_and_subtitles(source.get('src'), video_id) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + else: + formats.append({'url': source.get('src'), 'ext': ext}) + return formats, subtitles + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + nextjs_json = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['initialState']['videos'][display_id] + if nextjs_json.get('youtube_id'): + return self.url_result( + f'https://www.youtube.com/watch?v={nextjs_json.get("youtube_id")}', YoutubeIE) + + formats, subtitles = self._get_formats_and_subtitle(nextjs_json, display_id) + return { + 'id': display_id, + 'title': nextjs_json.get('title') or self._html_search_meta('og:title', webpage), + 'formats': formats, + 'subtitles': subtitles, + 'description': nextjs_json.get('description') or None, + 'duration': int_or_none(nextjs_json.get('duration')), + 'thumbnails': [ + {'url': thumbnail} + for thumbnail in traverse_obj(nextjs_json, ('default_thumbnails', ...))], + 'channel_id': traverse_obj(nextjs_json, 'channel', 'channel_slug'), + 'timestamp': parse_iso8601(nextjs_json.get('created')), + 'release_timestamp': parse_iso8601(nextjs_json.get('published')), + 'view_count': int_or_none(nextjs_json.get('views')), + } diff --git a/yt_dlp/extractor/aitube.py b/yt_dlp/extractor/aitube.py new file mode 100644 index 0000000000..89a64503fb --- /dev/null +++ b/yt_dlp/extractor/aitube.py @@ -0,0 +1,60 @@ +from .common import InfoExtractor +from ..utils import int_or_none, merge_dicts + + +class AitubeKZVideoIE(InfoExtractor): + _VALID_URL = r'https?://aitube\.kz/(?:video|embed/)\?(?:[^\?]+)?id=(?P[\w-]+)' + _TESTS = [{ + # id paramater as first parameter + 'url': 'https://aitube.kz/video?id=9291d29b-c038-49a1-ad42-3da2051d353c&playlistId=d55b1f5f-ef2a-4f23-b646-2a86275b86b7&season=1', + 'info_dict': { + 'id': '9291d29b-c038-49a1-ad42-3da2051d353c', + 'ext': 'mp4', + 'duration': 2174.0, + 'channel_id': '94962f73-013b-432c-8853-1bd78ca860fe', + 'like_count': int, + 'channel': 'ASTANA TV', + 'comment_count': int, + 'view_count': int, + 'description': 'Смотреть любимые сериалы и видео, поделиться видео и сериалами с друзьями и близкими', + 'thumbnail': 'https://cdn.static02.aitube.kz/kz.aitudala.aitube.staticaccess/files/ddf2a2ff-bee3-409b-b5f2-2a8202bba75b', + 'upload_date': '20221102', + 'timestamp': 1667370519, + 'title': 'Ангел хранитель 1 серия', + 'channel_follower_count': int, + } + }, { + # embed url + 'url': 'https://aitube.kz/embed/?id=9291d29b-c038-49a1-ad42-3da2051d353c', + 'only_matching': True, + }, { + # id parameter is not as first paramater + 'url': 'https://aitube.kz/video?season=1&id=9291d29b-c038-49a1-ad42-3da2051d353c&playlistId=d55b1f5f-ef2a-4f23-b646-2a86275b86b7', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + nextjs_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['videoInfo'] + json_ld_data = self._search_json_ld(webpage, video_id) + + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + f'https://api-http.aitube.kz/kz.aitudala.aitube.staticaccess/video/{video_id}/video', video_id) + + return merge_dicts({ + 'id': video_id, + 'title': nextjs_data.get('title') or self._html_search_meta(['name', 'og:title'], webpage), + 'description': nextjs_data.get('description'), + 'formats': formats, + 'subtitles': subtitles, + 'view_count': (nextjs_data.get('viewCount') + or int_or_none(self._html_search_meta('ya:ovs:views_total', webpage))), + 'like_count': nextjs_data.get('likeCount'), + 'channel': nextjs_data.get('channelTitle'), + 'channel_id': nextjs_data.get('channelId'), + 'thumbnail': nextjs_data.get('coverUrl'), + 'comment_count': nextjs_data.get('commentCount'), + 'channel_follower_count': int_or_none(nextjs_data.get('channelSubscriberCount')), + }, json_ld_data) diff --git a/yt_dlp/extractor/allocine.py b/yt_dlp/extractor/allocine.py index 1f881e2a09..2d342cf039 100644 --- a/yt_dlp/extractor/allocine.py +++ b/yt_dlp/extractor/allocine.py @@ -112,8 +112,6 @@ class AllocineIE(InfoExtractor): }) duration, view_count, timestamp = [None] * 3 - self._sort_formats(formats) - return { 'id': video_id, 'display_id': display_id, diff --git a/yt_dlp/extractor/alsace20tv.py b/yt_dlp/extractor/alsace20tv.py index d16ab496e9..ea3332e3d5 100644 --- a/yt_dlp/extractor/alsace20tv.py +++ b/yt_dlp/extractor/alsace20tv.py @@ -22,7 +22,6 @@ class Alsace20TVBaseIE(InfoExtractor): self._extract_smil_formats(fmt_url, video_id, fatal=False) if '/smil:_' in fmt_url else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False)) - self._sort_formats(formats) webpage = (url and self._download_webpage(url, video_id, fatal=False)) or '' thumbnail = url_or_none(dict_get(info, ('image', 'preview', )) or self._og_search_thumbnail(webpage)) diff --git a/yt_dlp/extractor/alura.py b/yt_dlp/extractor/alura.py index b76ccb2a19..bfe066bc68 100644 --- a/yt_dlp/extractor/alura.py +++ b/yt_dlp/extractor/alura.py @@ -63,8 +63,6 @@ class AluraIE(InfoExtractor): f['height'] = int('720' if m.group('res') == 'hd' else '480') formats.extend(video_format) - self._sort_formats(formats) - return { 'id': video_id, 'title': video_title, @@ -113,7 +111,7 @@ class AluraIE(InfoExtractor): raise ExtractorError('Unable to log in') -class AluraCourseIE(AluraIE): +class AluraCourseIE(AluraIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'https?://(?:cursos\.)?alura\.com\.br/course/(?P[^/]+)' _LOGIN_URL = 'https://cursos.alura.com.br/loginForm?urlAfterLogin=/loginForm' diff --git a/yt_dlp/extractor/amazon.py b/yt_dlp/extractor/amazon.py index 56a8d844ac..a03f983e0e 100644 --- a/yt_dlp/extractor/amazon.py +++ b/yt_dlp/extractor/amazon.py @@ -1,5 +1,17 @@ +import re + from .common import InfoExtractor -from ..utils import ExtractorError, int_or_none +from ..utils import ( + ExtractorError, + clean_html, + float_or_none, + get_element_by_attribute, + get_element_by_class, + int_or_none, + js_to_json, + traverse_obj, + url_or_none, +) class AmazonStoreIE(InfoExtractor): @@ -9,7 +21,7 @@ class AmazonStoreIE(InfoExtractor): 'url': 'https://www.amazon.co.uk/dp/B098XNCHLD/', 'info_dict': { 'id': 'B098XNCHLD', - 'title': 'md5:5f3194dbf75a8dcfc83079bd63a2abed', + 'title': str, }, 'playlist_mincount': 1, 'playlist': [{ @@ -18,31 +30,45 @@ class AmazonStoreIE(InfoExtractor): 'ext': 'mp4', 'title': 'mcdodo usb c cable 100W 5a', 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 34, }, - }] + }], + 'expected_warnings': ['Unable to extract data'], }, { 'url': 'https://www.amazon.in/Sony-WH-1000XM4-Cancelling-Headphones-Bluetooth/dp/B0863TXGM3', 'info_dict': { 'id': 'B0863TXGM3', - 'title': 'md5:b0bde4881d3cfd40d63af19f7898b8ff', + 'title': str, }, 'playlist_mincount': 4, + 'expected_warnings': ['Unable to extract data'], }, { 'url': 'https://www.amazon.com/dp/B0845NXCXF/', 'info_dict': { 'id': 'B0845NXCXF', - 'title': 'md5:2145cd4e3c7782f1ee73649a3cff1171', + 'title': str, }, 'playlist-mincount': 1, + 'expected_warnings': ['Unable to extract data'], + }, { + 'url': 'https://www.amazon.es/Samsung-Smartphone-s-AMOLED-Quad-c%C3%A1mara-espa%C3%B1ola/dp/B08WX337PQ', + 'info_dict': { + 'id': 'B08WX337PQ', + 'title': str, + }, + 'playlist_mincount': 1, + 'expected_warnings': ['Unable to extract data'], }] def _real_extract(self, url): id = self._match_id(url) - for retry in self.RetryManager(fatal=True): + for retry in self.RetryManager(): webpage = self._download_webpage(url, id) try: - data_json = self._parse_json(self._html_search_regex(r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'(.*)\'\)', webpage, 'data'), id) + data_json = self._search_json( + r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', id, + transform_source=js_to_json) except ExtractorError as e: retry.error = e @@ -55,4 +81,90 @@ class AmazonStoreIE(InfoExtractor): 'height': int_or_none(video.get('videoHeight')), 'width': int_or_none(video.get('videoWidth')), } for video in (data_json.get('videos') or []) if video.get('isVideo') and video.get('url')] - return self.playlist_result(entries, playlist_id=id, playlist_title=data_json['title']) + return self.playlist_result(entries, playlist_id=id, playlist_title=data_json.get('title')) + + +class AmazonReviewsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?amazon\.(?:[a-z]{2,3})(?:\.[a-z]{2})?/gp/customer-reviews/(?P[^/&#$?]+)' + _TESTS = [{ + 'url': 'https://www.amazon.com/gp/customer-reviews/R10VE9VUSY19L3/ref=cm_cr_arp_d_rvw_ttl', + 'info_dict': { + 'id': 'R10VE9VUSY19L3', + 'ext': 'mp4', + 'title': 'Get squad #Suspicious', + 'description': 'md5:7012695052f440a1e064e402d87e0afb', + 'uploader': 'Kimberly Cronkright', + 'average_rating': 1.0, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'expected_warnings': ['Review body was not found in webpage'], + }, { + 'url': 'https://www.amazon.com/gp/customer-reviews/R10VE9VUSY19L3/ref=cm_cr_arp_d_rvw_ttl?language=es_US', + 'info_dict': { + 'id': 'R10VE9VUSY19L3', + 'ext': 'mp4', + 'title': 'Get squad #Suspicious', + 'description': 'md5:7012695052f440a1e064e402d87e0afb', + 'uploader': 'Kimberly Cronkright', + 'average_rating': 1.0, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'expected_warnings': ['Review body was not found in webpage'], + }, { + 'url': 'https://www.amazon.in/gp/customer-reviews/RV1CO8JN5VGXV/', + 'info_dict': { + 'id': 'RV1CO8JN5VGXV', + 'ext': 'mp4', + 'title': 'Not sure about its durability', + 'description': 'md5:1a252c106357f0a3109ebf37d2e87494', + 'uploader': 'Shoaib Gulzar', + 'average_rating': 2.0, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'expected_warnings': ['Review body was not found in webpage'], + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + for retry in self.RetryManager(): + webpage = self._download_webpage(url, video_id) + review_body = get_element_by_attribute('data-hook', 'review-body', webpage) + if not review_body: + retry.error = ExtractorError('Review body was not found in webpage', expected=True) + + formats, subtitles = [], {} + + manifest_url = self._search_regex( + r'data-video-url="([^"]+)"', review_body, 'm3u8 url', default=None) + if url_or_none(manifest_url): + fmts, subtitles = self._extract_m3u8_formats_and_subtitles( + manifest_url, video_id, 'mp4', fatal=False) + formats.extend(fmts) + + video_url = self._search_regex( + r']+\bvalue="([^"]+)"[^>]+\bclass="video-url"', review_body, 'mp4 url', default=None) + if url_or_none(video_url): + formats.append({ + 'url': video_url, + 'ext': 'mp4', + 'format_id': 'http-mp4', + }) + + if not formats: + self.raise_no_formats('No video found for this customer review', expected=True) + + return { + 'id': video_id, + 'title': (clean_html(get_element_by_attribute('data-hook', 'review-title', webpage)) + or self._html_extract_title(webpage)), + 'description': clean_html(traverse_obj(re.findall( + r'(.+?)', review_body), -1)), + 'uploader': clean_html(get_element_by_class('a-profile-name', webpage)), + 'average_rating': float_or_none(clean_html(get_element_by_attribute( + 'data-hook', 'review-star-rating', webpage) or '').partition(' ')[0]), + 'thumbnail': self._search_regex( + r'data-thumbnail-url="([^"]+)"', review_body, 'thumbnail', default=None), + 'formats': formats, + 'subtitles': subtitles, + } diff --git a/yt_dlp/extractor/amazonminitv.py b/yt_dlp/extractor/amazonminitv.py new file mode 100644 index 0000000000..b57d985d10 --- /dev/null +++ b/yt_dlp/extractor/amazonminitv.py @@ -0,0 +1,291 @@ +import json + +from .common import InfoExtractor +from ..utils import ExtractorError, int_or_none, traverse_obj, try_get + + +class AmazonMiniTVBaseIE(InfoExtractor): + def _real_initialize(self): + self._download_webpage( + 'https://www.amazon.in/minitv', None, + note='Fetching guest session cookies') + AmazonMiniTVBaseIE.session_id = self._get_cookies('https://www.amazon.in')['session-id'].value + + def _call_api(self, asin, data=None, note=None): + device = {'clientId': 'ATVIN', 'deviceLocale': 'en_GB'} + if data: + data['variables'].update({ + 'contentType': 'VOD', + 'sessionIdToken': self.session_id, + **device, + }) + + resp = self._download_json( + f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}', + asin, note=note, headers={'Content-Type': 'application/json'}, + data=json.dumps(data).encode() if data else None, + query=None if data else { + 'deviceType': 'A1WMMUXPCUJL4N', + 'contentId': asin, + **device, + }) + + if resp.get('errors'): + raise ExtractorError(f'MiniTV said: {resp["errors"][0]["message"]}') + elif not data: + return resp + return resp['data'][data['operationName']] + + +class AmazonMiniTVIE(AmazonMiniTVBaseIE): + _VALID_URL = r'(?:https?://(?:www\.)?amazon\.in/minitv/tp/|amazonminitv:(?:amzn1\.dv\.gti\.)?)(?P[a-f0-9-]+)' + _TESTS = [{ + 'url': 'https://www.amazon.in/minitv/tp/75fe3a75-b8fe-4499-8100-5c9424344840?referrer=https%3A%2F%2Fwww.amazon.in%2Fminitv', + 'info_dict': { + 'id': 'amzn1.dv.gti.75fe3a75-b8fe-4499-8100-5c9424344840', + 'ext': 'mp4', + 'title': 'May I Kiss You?', + 'language': 'Hindi', + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': 'md5:a549bfc747973e04feb707833474e59d', + 'release_timestamp': 1644710400, + 'release_date': '20220213', + 'duration': 846, + 'chapters': 'count:2', + 'series': 'Couple Goals', + 'series_id': 'amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0', + 'season': 'Season 3', + 'season_number': 3, + 'season_id': 'amzn1.dv.gti.20331016-d9b9-4968-b991-c89fa4927a36', + 'episode': 'May I Kiss You?', + 'episode_number': 2, + 'episode_id': 'amzn1.dv.gti.75fe3a75-b8fe-4499-8100-5c9424344840', + }, + }, { + 'url': 'https://www.amazon.in/minitv/tp/280d2564-584f-452f-9c98-7baf906e01ab?referrer=https%3A%2F%2Fwww.amazon.in%2Fminitv', + 'info_dict': { + 'id': 'amzn1.dv.gti.280d2564-584f-452f-9c98-7baf906e01ab', + 'ext': 'mp4', + 'title': 'Jahaan', + 'language': 'Hindi', + 'thumbnail': r're:^https?://.*\.jpg', + 'description': 'md5:05eb765a77bf703f322f120ec6867339', + 'release_timestamp': 1647475200, + 'release_date': '20220317', + 'duration': 783, + 'chapters': [], + }, + }, { + 'url': 'https://www.amazon.in/minitv/tp/280d2564-584f-452f-9c98-7baf906e01ab', + 'only_matching': True, + }, { + 'url': 'amazonminitv:amzn1.dv.gti.280d2564-584f-452f-9c98-7baf906e01ab', + 'only_matching': True, + }, { + 'url': 'amazonminitv:280d2564-584f-452f-9c98-7baf906e01ab', + 'only_matching': True, + }] + + _GRAPHQL_QUERY_CONTENT = ''' +query content($sessionIdToken: String!, $deviceLocale: String, $contentId: ID!, $contentType: ContentType!, $clientId: String) { + content( + applicationContextInput: {deviceLocale: $deviceLocale, sessionIdToken: $sessionIdToken, clientId: $clientId} + contentId: $contentId + contentType: $contentType + ) { + contentId + name + ... on Episode { + contentId + vodType + name + images + description { + synopsis + contentLengthInSeconds + } + publicReleaseDateUTC + audioTracks + seasonId + seriesId + seriesName + seasonNumber + episodeNumber + timecode { + endCreditsTime + } + } + ... on MovieContent { + contentId + vodType + name + description { + synopsis + contentLengthInSeconds + } + images + publicReleaseDateUTC + audioTracks + } + } +}''' + + def _real_extract(self, url): + asin = f'amzn1.dv.gti.{self._match_id(url)}' + prs = self._call_api(asin, note='Downloading playback info') + + formats, subtitles = [], {} + for type_, asset in prs['playbackAssets'].items(): + if not traverse_obj(asset, 'manifestUrl'): + continue + if type_ == 'hls': + m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles( + asset['manifestUrl'], asin, ext='mp4', entry_protocol='m3u8_native', + m3u8_id=type_, fatal=False) + formats.extend(m3u8_fmts) + subtitles = self._merge_subtitles(subtitles, m3u8_subs) + elif type_ == 'dash': + mpd_fmts, mpd_subs = self._extract_mpd_formats_and_subtitles( + asset['manifestUrl'], asin, mpd_id=type_, fatal=False) + formats.extend(mpd_fmts) + subtitles = self._merge_subtitles(subtitles, mpd_subs) + else: + self.report_warning(f'Unknown asset type: {type_}') + + title_info = self._call_api( + asin, note='Downloading title info', data={ + 'operationName': 'content', + 'variables': {'contentId': asin}, + 'query': self._GRAPHQL_QUERY_CONTENT, + }) + credits_time = try_get(title_info, lambda x: x['timecode']['endCreditsTime'] / 1000) + is_episode = title_info.get('vodType') == 'EPISODE' + + return { + 'id': asin, + 'title': title_info.get('name'), + 'formats': formats, + 'subtitles': subtitles, + 'language': traverse_obj(title_info, ('audioTracks', 0)), + 'thumbnails': [{ + 'id': type_, + 'url': url, + } for type_, url in (title_info.get('images') or {}).items()], + 'description': traverse_obj(title_info, ('description', 'synopsis')), + 'release_timestamp': int_or_none(try_get(title_info, lambda x: x['publicReleaseDateUTC'] / 1000)), + 'duration': traverse_obj(title_info, ('description', 'contentLengthInSeconds')), + 'chapters': [{ + 'start_time': credits_time, + 'title': 'End Credits', + }] if credits_time else [], + 'series': title_info.get('seriesName'), + 'series_id': title_info.get('seriesId'), + 'season_number': title_info.get('seasonNumber'), + 'season_id': title_info.get('seasonId'), + 'episode': title_info.get('name') if is_episode else None, + 'episode_number': title_info.get('episodeNumber'), + 'episode_id': asin if is_episode else None, + } + + +class AmazonMiniTVSeasonIE(AmazonMiniTVBaseIE): + IE_NAME = 'amazonminitv:season' + _VALID_URL = r'amazonminitv:season:(?:amzn1\.dv\.gti\.)?(?P[a-f0-9-]+)' + IE_DESC = 'Amazon MiniTV Season, "minitv:season:" prefix' + _TESTS = [{ + 'url': 'amazonminitv:season:amzn1.dv.gti.0aa996eb-6a1b-4886-a342-387fbd2f1db0', + 'playlist_mincount': 6, + 'info_dict': { + 'id': 'amzn1.dv.gti.0aa996eb-6a1b-4886-a342-387fbd2f1db0', + }, + }, { + 'url': 'amazonminitv:season:0aa996eb-6a1b-4886-a342-387fbd2f1db0', + 'only_matching': True, + }] + + _GRAPHQL_QUERY = ''' +query getEpisodes($sessionIdToken: String!, $clientId: String, $episodeOrSeasonId: ID!, $deviceLocale: String) { + getEpisodes( + applicationContextInput: {sessionIdToken: $sessionIdToken, deviceLocale: $deviceLocale, clientId: $clientId} + episodeOrSeasonId: $episodeOrSeasonId + ) { + episodes { + ... on Episode { + contentId + name + images + seriesName + seasonId + seriesId + seasonNumber + episodeNumber + description { + synopsis + contentLengthInSeconds + } + publicReleaseDateUTC + } + } + } +} +''' + + def _entries(self, asin): + season_info = self._call_api( + asin, note='Downloading season info', data={ + 'operationName': 'getEpisodes', + 'variables': {'episodeOrSeasonId': asin}, + 'query': self._GRAPHQL_QUERY, + }) + + for episode in season_info['episodes']: + yield self.url_result( + f'amazonminitv:{episode["contentId"]}', AmazonMiniTVIE, episode['contentId']) + + def _real_extract(self, url): + asin = f'amzn1.dv.gti.{self._match_id(url)}' + return self.playlist_result(self._entries(asin), asin) + + +class AmazonMiniTVSeriesIE(AmazonMiniTVBaseIE): + IE_NAME = 'amazonminitv:series' + _VALID_URL = r'amazonminitv:series:(?:amzn1\.dv\.gti\.)?(?P[a-f0-9-]+)' + IE_DESC = 'Amazon MiniTV Series, "minitv:series:" prefix' + _TESTS = [{ + 'url': 'amazonminitv:series:amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0', + 'playlist_mincount': 3, + 'info_dict': { + 'id': 'amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0', + }, + }, { + 'url': 'amazonminitv:series:56521d46-b040-4fd5-872e-3e70476a04b0', + 'only_matching': True, + }] + + _GRAPHQL_QUERY = ''' +query getSeasons($sessionIdToken: String!, $deviceLocale: String, $episodeOrSeasonOrSeriesId: ID!, $clientId: String) { + getSeasons( + applicationContextInput: {deviceLocale: $deviceLocale, sessionIdToken: $sessionIdToken, clientId: $clientId} + episodeOrSeasonOrSeriesId: $episodeOrSeasonOrSeriesId + ) { + seasons { + seasonId + } + } +} +''' + + def _entries(self, asin): + season_info = self._call_api( + asin, note='Downloading series info', data={ + 'operationName': 'getSeasons', + 'variables': {'episodeOrSeasonOrSeriesId': asin}, + 'query': self._GRAPHQL_QUERY, + }) + + for season in season_info['seasons']: + yield self.url_result(f'amazonminitv:season:{season["seasonId"]}', AmazonMiniTVSeasonIE, season['seasonId']) + + def _real_extract(self, url): + asin = f'amzn1.dv.gti.{self._match_id(url)}' + return self.playlist_result(self._entries(asin), asin) diff --git a/yt_dlp/extractor/amcnetworks.py b/yt_dlp/extractor/amcnetworks.py index e04ecf65f0..c58bc7bfbf 100644 --- a/yt_dlp/extractor/amcnetworks.py +++ b/yt_dlp/extractor/amcnetworks.py @@ -9,7 +9,7 @@ from ..utils import ( ) -class AMCNetworksIE(ThePlatformIE): +class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'https?://(?:www\.)?(?Pamc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?P(?:movies|shows(?:/[^/]+)+)/[^/?#&]+)' _TESTS = [{ 'url': 'https://www.bbcamerica.com/shows/the-graham-norton-show/videos/tina-feys-adorable-airline-themed-family-dinner--51631', @@ -106,7 +106,6 @@ class AMCNetworksIE(ThePlatformIE): media_url = update_url_query(media_url, query) formats, subtitles = self._extract_theplatform_smil( media_url, video_id) - self._sort_formats(formats) thumbnails = [] thumbnail_urls = [properties.get('imageDesktop')] diff --git a/yt_dlp/extractor/americastestkitchen.py b/yt_dlp/extractor/americastestkitchen.py index f5747cf1e8..e889458a28 100644 --- a/yt_dlp/extractor/americastestkitchen.py +++ b/yt_dlp/extractor/americastestkitchen.py @@ -11,7 +11,7 @@ from ..utils import ( class AmericasTestKitchenIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?Pepisode|videos)/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?:cooks(?:country|illustrated)/)?(?Pepisode|videos)/(?P\d+)' _TESTS = [{ 'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers', 'md5': 'b861c3e365ac38ad319cfd509c30577f', @@ -19,15 +19,20 @@ class AmericasTestKitchenIE(InfoExtractor): 'id': '5b400b9ee338f922cb06450c', 'title': 'Japanese Suppers', 'ext': 'mp4', + 'display_id': 'weeknight-japanese-suppers', 'description': 'md5:64e606bfee910627efc4b5f050de92b3', - 'thumbnail': r're:^https?://', - 'timestamp': 1523318400, - 'upload_date': '20180410', - 'release_date': '20180410', - 'series': "America's Test Kitchen", - 'season_number': 18, + 'timestamp': 1523304000, + 'upload_date': '20180409', + 'release_date': '20180409', + 'series': 'America\'s Test Kitchen', + 'season': 'Season 18', 'episode': 'Japanese Suppers', + 'season_number': 18, 'episode_number': 15, + 'duration': 1376, + 'thumbnail': r're:^https?://', + 'average_rating': 0, + 'view_count': int, }, 'params': { 'skip_download': True, @@ -40,15 +45,20 @@ class AmericasTestKitchenIE(InfoExtractor): 'id': '5fbe8c61bda2010001c6763b', 'title': 'Simple Chicken Dinner', 'ext': 'mp4', + 'display_id': 'atktv_2103_simple-chicken-dinner_full-episode_web-mp4', 'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7', - 'thumbnail': r're:^https?://', - 'timestamp': 1610755200, - 'upload_date': '20210116', - 'release_date': '20210116', - 'series': "America's Test Kitchen", - 'season_number': 21, + 'timestamp': 1610737200, + 'upload_date': '20210115', + 'release_date': '20210115', + 'series': 'America\'s Test Kitchen', + 'season': 'Season 21', 'episode': 'Simple Chicken Dinner', + 'season_number': 21, 'episode_number': 3, + 'duration': 1397, + 'thumbnail': r're:^https?://', + 'view_count': int, + 'average_rating': 0, }, 'params': { 'skip_download': True, @@ -56,6 +66,12 @@ class AmericasTestKitchenIE(InfoExtractor): }, { 'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon', 'only_matching': True, + }, { + 'url': 'https://www.americastestkitchen.com/cookscountry/episode/564-when-only-chocolate-will-do', + 'only_matching': True, + }, { + 'url': 'https://www.americastestkitchen.com/cooksillustrated/videos/4478-beef-wellington', + 'only_matching': True, }, { 'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do', 'only_matching': True, @@ -90,7 +106,7 @@ class AmericasTestKitchenIE(InfoExtractor): class AmericasTestKitchenSeasonIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?Pamericastestkitchen|cookscountry)\.com/episodes/browse/season_(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?(?Pamericastestkitchen|(?Pcooks(?:country|illustrated)))\.com(?:(?:/(?Pcooks(?:country|illustrated)))?(?:/?$|(?\d+)))' _TESTS = [{ # ATK Season 'url': 'https://www.americastestkitchen.com/episodes/browse/season_1', @@ -101,47 +117,91 @@ class AmericasTestKitchenSeasonIE(InfoExtractor): 'playlist_count': 13, }, { # Cooks Country Season - 'url': 'https://www.cookscountry.com/episodes/browse/season_12', + 'url': 'https://www.americastestkitchen.com/cookscountry/episodes/browse/season_12', 'info_dict': { 'id': 'season_12', 'title': 'Season 12', }, 'playlist_count': 13, + }, { + # America's Test Kitchen Series + 'url': 'https://www.americastestkitchen.com/', + 'info_dict': { + 'id': 'americastestkitchen', + 'title': 'America\'s Test Kitchen', + }, + 'playlist_count': 558, + }, { + # Cooks Country Series + 'url': 'https://www.americastestkitchen.com/cookscountry', + 'info_dict': { + 'id': 'cookscountry', + 'title': 'Cook\'s Country', + }, + 'playlist_count': 199, + }, { + 'url': 'https://www.americastestkitchen.com/cookscountry/', + 'only_matching': True, + }, { + 'url': 'https://www.cookscountry.com/episodes/browse/season_12', + 'only_matching': True, + }, { + 'url': 'https://www.cookscountry.com', + 'only_matching': True, + }, { + 'url': 'https://www.americastestkitchen.com/cooksillustrated/', + 'only_matching': True, + }, { + 'url': 'https://www.cooksillustrated.com', + 'only_matching': True, }] def _real_extract(self, url): - show_name, season_number = self._match_valid_url(url).groups() - season_number = int(season_number) + season_number, show1, show = self._match_valid_url(url).group('season', 'show', 'show2') + show_path = ('/' + show) if show else '' + show = show or show1 + season_number = int_or_none(season_number) - slug = 'atk' if show_name == 'americastestkitchen' else 'cco' + slug, title = { + 'americastestkitchen': ('atk', 'America\'s Test Kitchen'), + 'cookscountry': ('cco', 'Cook\'s Country'), + 'cooksillustrated': ('cio', 'Cook\'s Illustrated'), + }[show] - season = 'Season %d' % season_number + facet_filters = [ + 'search_document_klass:episode', + 'search_show_slug:' + slug, + ] + + if season_number: + playlist_id = 'season_%d' % season_number + playlist_title = 'Season %d' % season_number + facet_filters.append('search_season_list:' + playlist_title) + else: + playlist_id = show + playlist_title = title season_search = self._download_json( 'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug, - season, headers={ - 'Origin': 'https://www.%s.com' % show_name, + playlist_id, headers={ + 'Origin': 'https://www.americastestkitchen.com', 'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805', 'X-Algolia-Application-Id': 'Y1FNZXUI30', }, query={ - 'facetFilters': json.dumps([ - 'search_season_list:' + season, - 'search_document_klass:episode', - 'search_show_slug:' + slug, - ]), - 'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title' % slug, + 'facetFilters': json.dumps(facet_filters), + 'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title,search_atk_episode_season' % slug, 'attributesToHighlight': '', 'hitsPerPage': 1000, }) def entries(): for episode in (season_search.get('hits') or []): - search_url = episode.get('search_url') + search_url = episode.get('search_url') # always formatted like '/episode/123-title-of-episode' if not search_url: continue yield { '_type': 'url', - 'url': 'https://www.%s.com%s' % (show_name, search_url), + 'url': f'https://www.americastestkitchen.com{show_path or ""}{search_url}', 'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]), 'title': episode.get('title'), 'description': episode.get('description'), @@ -152,4 +212,4 @@ class AmericasTestKitchenSeasonIE(InfoExtractor): } return self.playlist_result( - entries(), 'season_%d' % season_number, season) + entries(), playlist_id, playlist_title) diff --git a/yt_dlp/extractor/amp.py b/yt_dlp/extractor/amp.py index 73b72b0859..b0cbd775c0 100644 --- a/yt_dlp/extractor/amp.py +++ b/yt_dlp/extractor/amp.py @@ -10,7 +10,7 @@ from ..utils import ( ) -class AMPIE(InfoExtractor): +class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor # parse Akamai Adaptive Media Player feed def _extract_feed_info(self, url): feed = self._download_json( @@ -84,8 +84,6 @@ class AMPIE(InfoExtractor): 'ext': ext, }) - self._sort_formats(formats) - timestamp = unified_timestamp(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date')) return { diff --git a/yt_dlp/extractor/anchorfm.py b/yt_dlp/extractor/anchorfm.py new file mode 100644 index 0000000000..52f2ad057f --- /dev/null +++ b/yt_dlp/extractor/anchorfm.py @@ -0,0 +1,98 @@ +from .common import InfoExtractor +from ..utils import ( + clean_html, + float_or_none, + int_or_none, + str_or_none, + traverse_obj, + unified_timestamp +) + + +class AnchorFMEpisodeIE(InfoExtractor): + _VALID_URL = r'https?://anchor\.fm/(?P\w+)/(?:embed/)?episodes/[\w-]+-(?P\w+)' + _EMBED_REGEX = [rf']+\bsrc=[\'"](?P{_VALID_URL})'] + _TESTS = [{ + 'url': 'https://anchor.fm/lovelyti/episodes/Chrisean-Rock-takes-to-twitter-to-announce-shes-pregnant--Blueface-denies-he-is-the-father-e1tpt3d', + 'info_dict': { + 'id': 'e1tpt3d', + 'ext': 'mp3', + 'title': ' Chrisean Rock takes to twitter to announce she\'s pregnant, Blueface denies he is the father!', + 'description': 'md5:207d167de3e28ceb4ddc1ebf5a30044c', + 'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_nologo/1034827/1034827-1658438968460-5f3bfdf3601e8.jpg', + 'duration': 624.718, + 'uploader': 'Lovelyti ', + 'uploader_id': '991541', + 'channel': 'lovelyti', + 'modified_date': '20230121', + 'modified_timestamp': 1674285178, + 'release_date': '20230121', + 'release_timestamp': 1674285179, + 'episode_id': 'e1tpt3d', + } + }, { + # embed url + 'url': 'https://anchor.fm/apakatatempo/embed/episodes/S2E75-Perang-Bintang-di-Balik-Kasus-Ferdy-Sambo-dan-Ismail-Bolong-e1shjqd', + 'info_dict': { + 'id': 'e1shjqd', + 'ext': 'mp3', + 'title': 'S2E75 Perang Bintang di Balik Kasus Ferdy Sambo dan Ismail Bolong', + 'description': 'md5:9e95ad9293bf00178bf8d33e9cb92c41', + 'duration': 1042.008, + 'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg', + 'release_date': '20221221', + 'release_timestamp': 1671595916, + 'modified_date': '20221221', + 'modified_timestamp': 1671590834, + 'channel': 'apakatatempo', + 'uploader': 'Podcast Tempo', + 'uploader_id': '2585461', + 'season': 'Season 2', + 'season_number': 2, + 'episode_id': 'e1shjqd', + } + }] + + _WEBPAGE_TESTS = [{ + 'url': 'https://podcast.tempo.co/podcast/192/perang-bintang-di-balik-kasus-ferdy-sambo-dan-ismail-bolong', + 'info_dict': { + 'id': 'e1shjqd', + 'ext': 'mp3', + 'release_date': '20221221', + 'duration': 1042.008, + 'season': 'Season 2', + 'modified_timestamp': 1671590834, + 'uploader_id': '2585461', + 'modified_date': '20221221', + 'description': 'md5:9e95ad9293bf00178bf8d33e9cb92c41', + 'season_number': 2, + 'title': 'S2E75 Perang Bintang di Balik Kasus Ferdy Sambo dan Ismail Bolong', + 'release_timestamp': 1671595916, + 'episode_id': 'e1shjqd', + 'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg', + 'uploader': 'Podcast Tempo', + 'channel': 'apakatatempo', + } + }] + + def _real_extract(self, url): + channel_name, episode_id = self._match_valid_url(url).group('channel_name', 'episode_id') + api_data = self._download_json(f'https://anchor.fm/api/v3/episodes/{episode_id}', episode_id) + + return { + 'id': episode_id, + 'title': traverse_obj(api_data, ('episode', 'title')), + 'url': traverse_obj(api_data, ('episode', 'episodeEnclosureUrl'), ('episodeAudios', 0, 'url')), + 'ext': 'mp3', + 'vcodec': 'none', + 'thumbnail': traverse_obj(api_data, ('episode', 'episodeImage')), + 'description': clean_html(traverse_obj(api_data, ('episode', ('description', 'descriptionPreview')), get_all=False)), + 'duration': float_or_none(traverse_obj(api_data, ('episode', 'duration')), 1000), + 'modified_timestamp': unified_timestamp(traverse_obj(api_data, ('episode', 'modified'))), + 'release_timestamp': int_or_none(traverse_obj(api_data, ('episode', 'publishOnUnixTimestamp'))), + 'episode_id': episode_id, + 'uploader': traverse_obj(api_data, ('creator', 'name')), + 'uploader_id': str_or_none(traverse_obj(api_data, ('creator', 'userId'))), + 'season_number': int_or_none(traverse_obj(api_data, ('episode', 'podcastSeasonNumber'))), + 'channel': channel_name or traverse_obj(api_data, ('creator', 'vanitySlug')), + } diff --git a/yt_dlp/extractor/ant1newsgr.py b/yt_dlp/extractor/ant1newsgr.py index fac476e21a..7b384b22d0 100644 --- a/yt_dlp/extractor/ant1newsgr.py +++ b/yt_dlp/extractor/ant1newsgr.py @@ -19,7 +19,6 @@ class Ant1NewsGrBaseIE(InfoExtractor): raise ExtractorError('no source found for %s' % video_id) formats, subs = (self._extract_m3u8_formats_and_subtitles(source, video_id, 'mp4') if determine_ext(source) == 'm3u8' else ([{'url': source}], {})) - self._sort_formats(formats) thumbnails = scale_thumbnails_to_max_format_width( formats, [{'url': info['thumb']}], r'(?<=/imgHandler/)\d+') return { diff --git a/yt_dlp/extractor/anvato.py b/yt_dlp/extractor/anvato.py index cb94835693..79bfe412b2 100644 --- a/yt_dlp/extractor/anvato.py +++ b/yt_dlp/extractor/anvato.py @@ -5,31 +5,70 @@ import random import re import time -from .anvato_token_generator import NFLTokenGenerator from .common import InfoExtractor from ..aes import aes_encrypt -from ..compat import compat_str from ..utils import ( bytes_to_intlist, determine_ext, - intlist_to_bytes, int_or_none, + intlist_to_bytes, join_nonempty, + smuggle_url, strip_jsonp, + traverse_obj, unescapeHTML, unsmuggle_url, ) def md5_text(s): - if not isinstance(s, compat_str): - s = compat_str(s) - return hashlib.md5(s.encode('utf-8')).hexdigest() + return hashlib.md5(str(s).encode()).hexdigest() class AnvatoIE(InfoExtractor): _VALID_URL = r'anvato:(?P[^:]+):(?P\d+)' + _API_BASE_URL = 'https://tkx.mp.lura.live/rest/v2' + _ANVP_RE = r']+\bdata-anvp\s*=\s*(["\'])(?P(?:(?!\1).)+)\1' + _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' # from anvplayer.min.js + + _TESTS = [{ + # from https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14 + 'url': 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:899441', + 'md5': '921919dab3cd0b849ff3d624831ae3e2', + 'info_dict': { + 'id': '899441', + 'ext': 'mp4', + 'title': 'Baker Mayfield\'s game-changing plays from 3-TD game Week 14', + 'description': 'md5:85e05a3cc163f8c344340f220521136d', + 'upload_date': '20201215', + 'timestamp': 1608009755, + 'thumbnail': r're:^https?://.*\.jpg', + 'uploader': 'NFL', + 'tags': ['Baltimore Ravens at Cleveland Browns (2020-REG-14)', 'Baker Mayfield', 'Game Highlights', + 'Player Highlights', 'Cleveland Browns', 'league'], + 'duration': 157, + 'categories': ['Entertainment', 'Game', 'Highlights'], + }, + }, { + # from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/ + 'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455', + 'md5': '837718bcfb3a7778d022f857f7a9b19e', + 'info_dict': { + 'id': '8032455', + 'ext': 'mp4', + 'title': '99-year-old woman learns to fly plane in Torrance, checks off bucket list dream', + 'description': 'md5:0a12bab8159445e78f52a297a35c6609', + 'upload_date': '20220928', + 'timestamp': 1664408881, + 'thumbnail': r're:^https?://.*\.jpg', + 'uploader': 'LIN', + 'tags': ['video', 'news', '5live'], + 'duration': 155, + 'categories': ['News'], + }, + }] + # Copied from anvplayer.min.js _ANVACK_TABLE = { 'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ', @@ -202,86 +241,74 @@ class AnvatoIE(InfoExtractor): 'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582' } + def _generate_nfl_token(self, anvack, mcp_id): + reroute = self._download_json( + 'https://api.nfl.com/v1/reroute', mcp_id, data=b'grant_type=client_credentials', + headers={'X-Domain-Id': 100}, note='Fetching token info') + token_type = reroute.get('token_type') or 'Bearer' + auth_token = f'{token_type} {reroute["access_token"]}' + response = self._download_json( + 'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({ + 'query': '''{ + viewer { + mediaToken(anvack: "%s", id: %s) { + token + } + } +}''' % (anvack, mcp_id), + }).encode(), headers={ + 'Authorization': auth_token, + 'Content-Type': 'application/json', + }, note='Fetching NFL API token') + return traverse_obj(response, ('data', 'viewer', 'mediaToken', 'token')) + _TOKEN_GENERATORS = { - 'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': NFLTokenGenerator, + 'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': _generate_nfl_token, } - _API_KEY = '3hwbSuqqT690uxjNYBktSQpa5ZrpYYR0Iofx7NcJHyA' - - _ANVP_RE = r']+\bdata-anvp\s*=\s*(["\'])(?P(?:(?!\1).)+)\1' - _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' - - _TESTS = [{ - # from https://www.boston25news.com/news/watch-humpback-whale-breaches-right-next-to-fishing-boat-near-nh/817484874 - 'url': 'anvato:8v9BEynrwx8EFLYpgfOWcG1qJqyXKlRM:4465496', - 'info_dict': { - 'id': '4465496', - 'ext': 'mp4', - 'title': 'VIDEO: Humpback whale breaches right next to NH boat', - 'description': 'VIDEO: Humpback whale breaches right next to NH boat. Footage courtesy: Zach Fahey.', - 'duration': 22, - 'timestamp': 1534855680, - 'upload_date': '20180821', - 'uploader': 'ANV', - }, - 'params': { - 'skip_download': True, - }, - }, { - # from https://sanfrancisco.cbslocal.com/2016/06/17/source-oakland-cop-on-leave-for-having-girlfriend-help-with-police-reports/ - 'url': 'anvato:DVzl9QRzox3ZZsP9bNu5Li3X7obQOnqP:3417601', - 'only_matching': True, - }] - - def __init__(self, *args, **kwargs): - super(AnvatoIE, self).__init__(*args, **kwargs) - self.__server_time = None - def _server_time(self, access_key, video_id): - if self.__server_time is not None: - return self.__server_time + return int_or_none(traverse_obj(self._download_json( + f'{self._API_BASE_URL}/server_time', video_id, query={'anvack': access_key}, + note='Fetching server time', fatal=False), 'server_time')) or int(time.time()) - self.__server_time = int(self._download_json( - self._api_prefix(access_key) + 'server_time?anvack=' + access_key, video_id, - note='Fetching server time')['server_time']) - - return self.__server_time - - def _api_prefix(self, access_key): - return 'https://tkx2-%s.anvato.net/rest/v2/' % ('prod' if 'prod' in access_key else 'stage') - - def _get_video_json(self, access_key, video_id): + def _get_video_json(self, access_key, video_id, extracted_token): # See et() in anvplayer.min.js, which is an alias of getVideoJSON() - video_data_url = self._api_prefix(access_key) + 'mcp/video/%s?anvack=%s' % (video_id, access_key) + video_data_url = f'{self._API_BASE_URL}/mcp/video/{video_id}?anvack={access_key}' server_time = self._server_time(access_key, video_id) - input_data = '%d~%s~%s' % (server_time, md5_text(video_data_url), md5_text(server_time)) + input_data = f'{server_time}~{md5_text(video_data_url)}~{md5_text(server_time)}' auth_secret = intlist_to_bytes(aes_encrypt( bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY))) - - video_data_url += '&X-Anvato-Adst-Auth=' + base64.b64encode(auth_secret).decode('ascii') + query = { + 'X-Anvato-Adst-Auth': base64.b64encode(auth_secret).decode('ascii'), + 'rtyp': 'fp', + } anvrid = md5_text(time.time() * 1000 * random.random())[:30] api = { 'anvrid': anvrid, 'anvts': server_time, } - if self._TOKEN_GENERATORS.get(access_key) is not None: - api['anvstk2'] = self._TOKEN_GENERATORS[access_key].generate(self, access_key, video_id) + if extracted_token is not None: + api['anvstk2'] = extracted_token + elif self._TOKEN_GENERATORS.get(access_key) is not None: + api['anvstk2'] = self._TOKEN_GENERATORS[access_key](self, access_key, video_id) + elif self._ANVACK_TABLE.get(access_key) is not None: + api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}') else: - api['anvstk'] = md5_text('%s|%s|%d|%s' % ( - access_key, anvrid, server_time, - self._ANVACK_TABLE.get(access_key, self._API_KEY))) + api['anvstk2'] = 'default' return self._download_json( - video_data_url, video_id, transform_source=strip_jsonp, - data=json.dumps({'api': api}).encode('utf-8')) + video_data_url, video_id, transform_source=strip_jsonp, query=query, + data=json.dumps({'api': api}, separators=(',', ':')).encode('utf-8')) - def _get_anvato_videos(self, access_key, video_id): - video_data = self._get_video_json(access_key, video_id) + def _get_anvato_videos(self, access_key, video_id, token): + video_data = self._get_video_json(access_key, video_id, token) formats = [] for published_url in video_data['published_urls']: - video_url = published_url['embed_url'] + video_url = published_url.get('embed_url') + if not video_url: + continue media_format = published_url.get('format') ext = determine_ext(video_url) @@ -296,15 +323,27 @@ class AnvatoIE(InfoExtractor): 'tbr': tbr or None, } - if media_format == 'm3u8' and tbr is not None: + vtt_subs, hls_subs = {}, {} + if media_format == 'vtt': + _, vtt_subs = self._extract_m3u8_formats_and_subtitles( + video_url, video_id, m3u8_id='vtt', fatal=False) + continue + elif media_format == 'm3u8' and tbr is not None: a_format.update({ 'format_id': join_nonempty('hls', tbr), 'ext': 'mp4', }) elif media_format == 'm3u8-variant' or ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - video_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) + # For some videos the initial m3u8 URL returns JSON instead + manifest_json = self._download_json( + video_url, video_id, note='Downloading manifest JSON', errnote=False) + if manifest_json: + video_url = manifest_json.get('master_m3u8') + if not video_url: + continue + hls_fmts, hls_subs = self._extract_m3u8_formats_and_subtitles( + video_url, video_id, ext='mp4', m3u8_id='hls', fatal=False) + formats.extend(hls_fmts) continue elif ext == 'mp3' or media_format == 'mp3': a_format['vcodec'] = 'none' @@ -315,8 +354,6 @@ class AnvatoIE(InfoExtractor): }) formats.append(a_format) - self._sort_formats(formats) - subtitles = {} for caption in video_data.get('captions', []): a_caption = { @@ -324,6 +361,7 @@ class AnvatoIE(InfoExtractor): 'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None } subtitles.setdefault(caption['language'], []).append(a_caption) + subtitles = self._merge_subtitles(subtitles, hls_subs, vtt_subs) return { 'id': video_id, @@ -349,7 +387,10 @@ class AnvatoIE(InfoExtractor): access_key = cls._MCP_TO_ACCESS_KEY_TABLE.get((anvplayer_data.get('mcp') or '').lower()) if not (video_id or '').isdigit() or not access_key: continue - yield cls.url_result(f'anvato:{access_key}:{video_id}', AnvatoIE, video_id) + url = f'anvato:{access_key}:{video_id}' + if anvplayer_data.get('token'): + url = smuggle_url(url, {'token': anvplayer_data['token']}) + yield cls.url_result(url, AnvatoIE, video_id) def _extract_anvato_videos(self, webpage, video_id): anvplayer_data = self._parse_json( @@ -357,7 +398,7 @@ class AnvatoIE(InfoExtractor): self._ANVP_RE, webpage, 'Anvato player data', group='anvp'), video_id) return self._get_anvato_videos( - anvplayer_data['accessKey'], anvplayer_data['video']) + anvplayer_data['accessKey'], anvplayer_data['video'], 'default') # cbslocal token = 'default' def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) @@ -365,9 +406,7 @@ class AnvatoIE(InfoExtractor): 'countries': smuggled_data.get('geo_countries'), }) - mobj = self._match_valid_url(url) - access_key, video_id = mobj.group('access_key_or_mcp', 'id') + access_key, video_id = self._match_valid_url(url).group('access_key_or_mcp', 'id') if access_key not in self._ANVACK_TABLE: - access_key = self._MCP_TO_ACCESS_KEY_TABLE.get( - access_key) or access_key - return self._get_anvato_videos(access_key, video_id) + access_key = self._MCP_TO_ACCESS_KEY_TABLE.get(access_key) or access_key + return self._get_anvato_videos(access_key, video_id, smuggled_data.get('token')) diff --git a/yt_dlp/extractor/anvato_token_generator/__init__.py b/yt_dlp/extractor/anvato_token_generator/__init__.py deleted file mode 100644 index 6530caf530..0000000000 --- a/yt_dlp/extractor/anvato_token_generator/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .nfl import NFLTokenGenerator - -__all__ = [ - 'NFLTokenGenerator', -] diff --git a/yt_dlp/extractor/anvato_token_generator/common.py b/yt_dlp/extractor/anvato_token_generator/common.py deleted file mode 100644 index 3800b5808e..0000000000 --- a/yt_dlp/extractor/anvato_token_generator/common.py +++ /dev/null @@ -1,3 +0,0 @@ -class TokenGenerator: - def generate(self, anvack, mcp_id): - raise NotImplementedError('This method must be implemented by subclasses') diff --git a/yt_dlp/extractor/anvato_token_generator/nfl.py b/yt_dlp/extractor/anvato_token_generator/nfl.py deleted file mode 100644 index 9ee4aa002e..0000000000 --- a/yt_dlp/extractor/anvato_token_generator/nfl.py +++ /dev/null @@ -1,28 +0,0 @@ -import json - -from .common import TokenGenerator - - -class NFLTokenGenerator(TokenGenerator): - _AUTHORIZATION = None - - def generate(ie, anvack, mcp_id): - if not NFLTokenGenerator._AUTHORIZATION: - reroute = ie._download_json( - 'https://api.nfl.com/v1/reroute', mcp_id, - data=b'grant_type=client_credentials', - headers={'X-Domain-Id': 100}) - NFLTokenGenerator._AUTHORIZATION = '%s %s' % (reroute.get('token_type') or 'Bearer', reroute['access_token']) - return ie._download_json( - 'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({ - 'query': '''{ - viewer { - mediaToken(anvack: "%s", id: %s) { - token - } - } -}''' % (anvack, mcp_id), - }).encode(), headers={ - 'Authorization': NFLTokenGenerator._AUTHORIZATION, - 'Content-Type': 'application/json', - })['data']['viewer']['mediaToken']['token'] diff --git a/yt_dlp/extractor/aol.py b/yt_dlp/extractor/aol.py index b67db2adca..6949ca9740 100644 --- a/yt_dlp/extractor/aol.py +++ b/yt_dlp/extractor/aol.py @@ -9,7 +9,7 @@ from ..utils import ( ) -class AolIE(YahooIE): +class AolIE(YahooIE): # XXX: Do not subclass from concrete IE IE_NAME = 'aol.com' _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P\d{9}|[0-9a-f]{24}|[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})' @@ -119,7 +119,6 @@ class AolIE(YahooIE): 'height': int_or_none(qs.get('h', [None])[0]), }) formats.append(f) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/apa.py b/yt_dlp/extractor/apa.py index c9147e855a..1ea0b1de45 100644 --- a/yt_dlp/extractor/apa.py +++ b/yt_dlp/extractor/apa.py @@ -72,7 +72,6 @@ class APAIE(InfoExtractor): 'format_id': format_id, 'height': height, }) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/aparat.py b/yt_dlp/extractor/aparat.py index 90464556db..4a989d837b 100644 --- a/yt_dlp/extractor/aparat.py +++ b/yt_dlp/extractor/aparat.py @@ -73,7 +73,6 @@ class AparatIE(InfoExtractor): r'(\d+)[pP]', label or '', 'height', default=None)), }) - self._sort_formats(formats) info = self._search_json_ld(webpage, video_id, default={}) diff --git a/yt_dlp/extractor/appletrailers.py b/yt_dlp/extractor/appletrailers.py index 6b63f070d6..2e0b0a8c93 100644 --- a/yt_dlp/extractor/appletrailers.py +++ b/yt_dlp/extractor/appletrailers.py @@ -120,7 +120,6 @@ class AppleTrailersIE(InfoExtractor): 'height': int_or_none(size_data.get('height')), 'language': version[:2], }) - self._sort_formats(formats) entries.append({ 'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(), @@ -185,8 +184,6 @@ class AppleTrailersIE(InfoExtractor): 'height': int_or_none(format['height']), }) - self._sort_formats(formats) - playlist.append({ '_type': 'video', 'id': video_id, diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py index 0f40774ce1..4ccd398257 100644 --- a/yt_dlp/extractor/archiveorg.py +++ b/yt_dlp/extractor/archiveorg.py @@ -1,8 +1,10 @@ import json import re +import urllib.error import urllib.parse from .common import InfoExtractor +from .naver import NaverBaseIE from .youtube import YoutubeBaseInfoExtractor, YoutubeIE from ..compat import compat_HTTPError, compat_urllib_parse_unquote from ..utils import ( @@ -16,6 +18,7 @@ from ..utils import ( get_element_by_id, int_or_none, join_nonempty, + js_to_json, merge_dicts, mimetype2ext, orderedSet, @@ -311,7 +314,7 @@ class ArchiveOrgIE(InfoExtractor): }) for entry in entries.values(): - self._sort_formats(entry['formats'], ('source', )) + entry['_format_sort_fields'] = ('source', ) if len(entries) == 1: # If there's only one item, use it as the main info dict @@ -367,7 +370,9 @@ class YoutubeWebArchiveIE(InfoExtractor): 'channel_id': 'UCukCyHaD-bK3in_pKpfH9Eg', 'duration': 32, 'uploader_id': 'Zeurel', - 'uploader_url': 'http://www.youtube.com/user/Zeurel' + 'uploader_url': 'https://www.youtube.com/user/Zeurel', + 'thumbnail': r're:https?://.*\.(jpg|webp)', + 'channel_url': 'https://www.youtube.com/channel/UCukCyHaD-bK3in_pKpfH9Eg', } }, { # Internal link @@ -382,7 +387,9 @@ class YoutubeWebArchiveIE(InfoExtractor): 'channel_id': 'UCHnyfMqiRRG1u-2MsSQLbXA', 'duration': 771, 'uploader_id': '1veritasium', - 'uploader_url': 'http://www.youtube.com/user/1veritasium' + 'uploader_url': 'https://www.youtube.com/user/1veritasium', + 'thumbnail': r're:https?://.*\.(jpg|webp)', + 'channel_url': 'https://www.youtube.com/channel/UCHnyfMqiRRG1u-2MsSQLbXA', } }, { # Video from 2012, webm format itag 45. Newest capture is deleted video, with an invalid description. @@ -396,7 +403,9 @@ class YoutubeWebArchiveIE(InfoExtractor): 'duration': 398, 'description': 'md5:ff4de6a7980cb65d951c2f6966a4f2f3', 'uploader_id': 'machinima', - 'uploader_url': 'http://www.youtube.com/user/machinima' + 'uploader_url': 'https://www.youtube.com/user/machinima', + 'thumbnail': r're:https?://.*\.(jpg|webp)', + 'uploader': 'machinima' } }, { # FLV video. Video file URL does not provide itag information @@ -410,7 +419,10 @@ class YoutubeWebArchiveIE(InfoExtractor): 'duration': 19, 'description': 'md5:10436b12e07ac43ff8df65287a56efb4', 'uploader_id': 'jawed', - 'uploader_url': 'http://www.youtube.com/user/jawed' + 'uploader_url': 'https://www.youtube.com/user/jawed', + 'channel_url': 'https://www.youtube.com/channel/UC4QobU6STFB0P71PMvOGN5A', + 'thumbnail': r're:https?://.*\.(jpg|webp)', + 'uploader': 'jawed', } }, { 'url': 'https://web.archive.org/web/20110712231407/http://www.youtube.com/watch?v=lTx3G6h2xyA', @@ -424,7 +436,9 @@ class YoutubeWebArchiveIE(InfoExtractor): 'duration': 204, 'description': 'md5:f7535343b6eda34a314eff8b85444680', 'uploader_id': 'itsmadeon', - 'uploader_url': 'http://www.youtube.com/user/itsmadeon' + 'uploader_url': 'https://www.youtube.com/user/itsmadeon', + 'channel_url': 'https://www.youtube.com/channel/UCqMDNf3Pn5L7pcNkuSEeO3w', + 'thumbnail': r're:https?://.*\.(jpg|webp)', } }, { # First capture is of dead video, second is the oldest from CDX response. @@ -435,10 +449,13 @@ class YoutubeWebArchiveIE(InfoExtractor): 'title': 'Fake Teen Doctor Strikes AGAIN! - Weekly Weird News', 'upload_date': '20160218', 'channel_id': 'UCdIaNUarhzLSXGoItz7BHVA', - 'duration': 1236, + 'duration': 1235, 'description': 'md5:21032bae736421e89c2edf36d1936947', 'uploader_id': 'MachinimaETC', - 'uploader_url': 'http://www.youtube.com/user/MachinimaETC' + 'uploader_url': 'https://www.youtube.com/user/MachinimaETC', + 'channel_url': 'https://www.youtube.com/channel/UCdIaNUarhzLSXGoItz7BHVA', + 'thumbnail': r're:https?://.*\.(jpg|webp)', + 'uploader': 'ETC News', } }, { # First capture of dead video, capture date in link links to dead capture. @@ -449,10 +466,13 @@ class YoutubeWebArchiveIE(InfoExtractor): 'title': 'WTF: Video Games Still Launch BROKEN?! - T.U.G.S.', 'upload_date': '20160219', 'channel_id': 'UCdIaNUarhzLSXGoItz7BHVA', - 'duration': 798, + 'duration': 797, 'description': 'md5:a1dbf12d9a3bd7cb4c5e33b27d77ffe7', 'uploader_id': 'MachinimaETC', - 'uploader_url': 'http://www.youtube.com/user/MachinimaETC' + 'uploader_url': 'https://www.youtube.com/user/MachinimaETC', + 'channel_url': 'https://www.youtube.com/channel/UCdIaNUarhzLSXGoItz7BHVA', + 'thumbnail': r're:https?://.*\.(jpg|webp)', + 'uploader': 'ETC News', }, 'expected_warnings': [ r'unable to download capture webpage \(it may not be archived\)' @@ -472,12 +492,11 @@ class YoutubeWebArchiveIE(InfoExtractor): 'title': 'It\'s Bootleg AirPods Time.', 'upload_date': '20211021', 'channel_id': 'UC7Jwj9fkrf1adN4fMmTkpug', - 'channel_url': 'http://www.youtube.com/channel/UC7Jwj9fkrf1adN4fMmTkpug', + 'channel_url': 'https://www.youtube.com/channel/UC7Jwj9fkrf1adN4fMmTkpug', 'duration': 810, 'description': 'md5:7b567f898d8237b256f36c1a07d6d7bc', + 'thumbnail': r're:https?://.*\.(jpg|webp)', 'uploader': 'DankPods', - 'uploader_id': 'UC7Jwj9fkrf1adN4fMmTkpug', - 'uploader_url': 'http://www.youtube.com/channel/UC7Jwj9fkrf1adN4fMmTkpug' } }, { # player response contains '};' See: https://github.com/ytdl-org/youtube-dl/issues/27093 @@ -488,12 +507,135 @@ class YoutubeWebArchiveIE(InfoExtractor): 'title': 'bitch lasagna', 'upload_date': '20181005', 'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw', - 'channel_url': 'http://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw', + 'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw', 'duration': 135, 'description': 'md5:2dbe4051feeff2dab5f41f82bb6d11d0', 'uploader': 'PewDiePie', 'uploader_id': 'PewDiePie', - 'uploader_url': 'http://www.youtube.com/user/PewDiePie' + 'uploader_url': 'https://www.youtube.com/user/PewDiePie', + 'thumbnail': r're:https?://.*\.(jpg|webp)', + } + }, { + # ~June 2010 Capture. swfconfig + 'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=8XeW5ilk-9Y', + 'info_dict': { + 'id': '8XeW5ilk-9Y', + 'ext': 'flv', + 'title': 'Story of Stuff, The Critique Part 4 of 4', + 'duration': 541, + 'description': 'md5:28157da06f2c5e94c97f7f3072509972', + 'uploader': 'HowTheWorldWorks', + 'uploader_id': 'HowTheWorldWorks', + 'thumbnail': r're:https?://.*\.(jpg|webp)', + 'uploader_url': 'https://www.youtube.com/user/HowTheWorldWorks', + 'upload_date': '20090520', + } + }, { + # Jan 2011: watch-video-date/eow-date surrounded by whitespace + 'url': 'https://web.archive.org/web/20110126141719/http://www.youtube.com/watch?v=Q_yjX80U7Yc', + 'info_dict': { + 'id': 'Q_yjX80U7Yc', + 'ext': 'flv', + 'title': 'Spray Paint Art by Clay Butler: Purple Fantasy Forest', + 'uploader_id': 'claybutlermusic', + 'description': 'md5:4595264559e3d0a0ceb3f011f6334543', + 'upload_date': '20090803', + 'uploader': 'claybutlermusic', + 'thumbnail': r're:https?://.*\.(jpg|webp)', + 'duration': 132, + 'uploader_url': 'https://www.youtube.com/user/claybutlermusic', + } + }, { + # ~May 2009 swfArgs. ytcfg is spread out over various vars + 'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=c5uJgG05xUY', + 'info_dict': { + 'id': 'c5uJgG05xUY', + 'ext': 'webm', + 'title': 'Story of Stuff, The Critique Part 1 of 4', + 'uploader_id': 'HowTheWorldWorks', + 'uploader': 'HowTheWorldWorks', + 'uploader_url': 'https://www.youtube.com/user/HowTheWorldWorks', + 'upload_date': '20090513', + 'description': 'md5:4ca77d79538064e41e4cc464e93f44f0', + 'thumbnail': r're:https?://.*\.(jpg|webp)', + 'duration': 754, + } + }, { + # ~June 2012. Upload date is in another lang so cannot extract. + 'url': 'https://web.archive.org/web/20120607174520/http://www.youtube.com/watch?v=xWTLLl-dQaA', + 'info_dict': { + 'id': 'xWTLLl-dQaA', + 'ext': 'mp4', + 'title': 'Black Nerd eHarmony Video Bio Parody (SPOOF)', + 'uploader_url': 'https://www.youtube.com/user/BlackNerdComedy', + 'description': 'md5:e25f0133aaf9e6793fb81c18021d193e', + 'uploader_id': 'BlackNerdComedy', + 'uploader': 'BlackNerdComedy', + 'duration': 182, + 'thumbnail': r're:https?://.*\.(jpg|webp)', + } + }, { + # ~July 2013 + 'url': 'https://web.archive.org/web/*/https://www.youtube.com/watch?v=9eO1aasHyTM', + 'info_dict': { + 'id': '9eO1aasHyTM', + 'ext': 'mp4', + 'title': 'Polar-oid', + 'description': 'Cameras and bears are dangerous!', + 'uploader_url': 'https://www.youtube.com/user/punkybird', + 'uploader_id': 'punkybird', + 'duration': 202, + 'channel_id': 'UC62R2cBezNBOqxSerfb1nMQ', + 'channel_url': 'https://www.youtube.com/channel/UC62R2cBezNBOqxSerfb1nMQ', + 'upload_date': '20060428', + 'uploader': 'punkybird', + } + }, { + # April 2020: Player response in player config + 'url': 'https://web.archive.org/web/20200416034815/https://www.youtube.com/watch?v=Cf7vS8jc7dY&gl=US&hl=en', + 'info_dict': { + 'id': 'Cf7vS8jc7dY', + 'ext': 'mp4', + 'title': 'A Dramatic Pool Story (by Jamie Spicer-Lewis) - Game Grumps Animated', + 'duration': 64, + 'upload_date': '20200408', + 'uploader_id': 'GameGrumps', + 'uploader': 'GameGrumps', + 'channel_url': 'https://www.youtube.com/channel/UC9CuvdOVfMPvKCiwdGKL3cQ', + 'channel_id': 'UC9CuvdOVfMPvKCiwdGKL3cQ', + 'thumbnail': r're:https?://.*\.(jpg|webp)', + 'description': 'md5:c625bb3c02c4f5fb4205971e468fa341', + 'uploader_url': 'https://www.youtube.com/user/GameGrumps', + } + }, { + # watch7-user-header with yt-user-info + 'url': 'ytarchive:kbh4T_b4Ixw:20160307085057', + 'info_dict': { + 'id': 'kbh4T_b4Ixw', + 'ext': 'mp4', + 'title': 'Shovel Knight OST - Strike the Earth! Plains of Passage 16 bit SNES style remake / remix', + 'channel_url': 'https://www.youtube.com/channel/UCnTaGvsHmMy792DWeT6HbGA', + 'uploader': 'Nelward music', + 'duration': 213, + 'description': 'md5:804b4a9ce37b050a5fefdbb23aeba54d', + 'thumbnail': r're:https?://.*\.(jpg|webp)', + 'upload_date': '20150503', + 'channel_id': 'UCnTaGvsHmMy792DWeT6HbGA', + } + }, { + # April 2012 + 'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=SOm7mPoPskU', + 'info_dict': { + 'id': 'SOm7mPoPskU', + 'ext': 'mp4', + 'title': 'Boyfriend - Justin Bieber Parody', + 'uploader_url': 'https://www.youtube.com/user/thecomputernerd01', + 'uploader': 'thecomputernerd01', + 'thumbnail': r're:https?://.*\.(jpg|webp)', + 'description': 'md5:dd7fa635519c2a5b4d566beaecad7491', + 'duration': 200, + 'upload_date': '20120407', + 'uploader_id': 'thecomputernerd01', } }, { 'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw', @@ -526,9 +668,10 @@ class YoutubeWebArchiveIE(InfoExtractor): }, ] _YT_INITIAL_DATA_RE = YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE - _YT_INITIAL_PLAYER_RESPONSE_RE = fr'''(?x) + _YT_INITIAL_PLAYER_RESPONSE_RE = fr'''(?x: (?:window\s*\[\s*["\']ytInitialPlayerResponse["\']\s*\]|ytInitialPlayerResponse)\s*=[(\s]*| - {YoutubeBaseInfoExtractor._YT_INITIAL_PLAYER_RESPONSE_RE}''' + {YoutubeBaseInfoExtractor._YT_INITIAL_PLAYER_RESPONSE_RE} + )''' _YT_DEFAULT_THUMB_SERVERS = ['i.ytimg.com'] # thumbnails most likely archived on these servers _YT_ALL_THUMB_SERVERS = orderedSet( @@ -573,6 +716,27 @@ class YoutubeWebArchiveIE(InfoExtractor): initial_data = self._search_json( self._YT_INITIAL_DATA_RE, webpage, 'initial data', video_id, default={}) + ytcfg = {} + for j in re.findall(r'yt\.setConfig\(\s*(?P{\s*(?s:.+?)\s*})\s*\);', webpage): # ~June 2010 + ytcfg.update(self._parse_json(j, video_id, fatal=False, ignore_extra=True, transform_source=js_to_json, errnote='') or {}) + + # XXX: this also may contain a 'ptchn' key + player_config = ( + self._search_json( + r'(?:yt\.playerConfig|ytplayer\.config|swfConfig)\s*=', + webpage, 'player config', video_id, default=None) + or ytcfg.get('PLAYER_CONFIG') or {}) + + # XXX: this may also contain a 'creator' key. + swf_args = self._search_json(r'swfArgs\s*=', webpage, 'swf config', video_id, default={}) + if swf_args and not traverse_obj(player_config, ('args',)): + player_config['args'] = swf_args + + if not player_response: + # April 2020 + player_response = self._parse_json( + traverse_obj(player_config, ('args', 'player_response')) or '{}', video_id, fatal=False) + initial_data_video = traverse_obj( initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'videoPrimaryInfoRenderer'), expected_type=dict, get_all=False, default={}) @@ -587,21 +751,64 @@ class YoutubeWebArchiveIE(InfoExtractor): video_details.get('title') or YoutubeBaseInfoExtractor._get_text(microformats, 'title') or YoutubeBaseInfoExtractor._get_text(initial_data_video, 'title') + or traverse_obj(player_config, ('args', 'title')) or self._extract_webpage_title(webpage) or search_meta(['og:title', 'twitter:title', 'title'])) + def id_from_url(url, type_): + return self._search_regex( + rf'(?:{type_})/([^/#&?]+)', url or '', f'{type_} id', default=None) + + # XXX: would the get_elements_by_... functions be better suited here? + _CHANNEL_URL_HREF_RE = r'href="[^"]*(?Phttps?://www\.youtube\.com/(?:user|channel)/[^"]+)"' + uploader_or_channel_url = self._search_regex( + [fr'<(?:link\s*itemprop=\"url\"|a\s*id=\"watch-username\").*?\b{_CHANNEL_URL_HREF_RE}>', # @fd05024 + fr']*>\s*]*\b{_CHANNEL_URL_HREF_RE}'], # ~ May 2009, ~June 2012 + webpage, 'uploader or channel url', default=None) + + owner_profile_url = url_or_none(microformats.get('ownerProfileUrl')) # @a6211d2 + + # Uploader refers to the /user/ id ONLY + uploader_id = ( + id_from_url(owner_profile_url, 'user') + or id_from_url(uploader_or_channel_url, 'user') + or ytcfg.get('VIDEO_USERNAME')) + uploader_url = f'https://www.youtube.com/user/{uploader_id}' if uploader_id else None + + # XXX: do we want to differentiate uploader and channel? + uploader = ( + self._search_regex( + [r']*>\s*([^<]+)', # June 2010 + r'var\s*watchUsername\s*=\s*\'(.+?)\';', # ~May 2009 + r']*>\s*]*>\s*(.+?)\s*]*title="\s*(.+?)\s*"'], # ~June 2012 + webpage, 'uploader', default=None) + or self._html_search_regex( + [r'(?s)]*[^>]*>\s*(.*?)\s*]*yt-user-name[^>]*>\s*(.*?)\s*(?:(?!\1).)+)\1', # @b45a9e6 - webpage, 'channel id', default=None, group='id')) - channel_url = f'http://www.youtube.com/channel/{channel_id}' if channel_id else None + webpage, 'channel id', default=None, group='id') + or id_from_url(owner_profile_url, 'channel') + or id_from_url(uploader_or_channel_url, 'channel') + or traverse_obj(player_config, ('args', 'ucid'))) + channel_url = f'https://www.youtube.com/channel/{channel_id}' if channel_id else None duration = int_or_none( video_details.get('lengthSeconds') or microformats.get('lengthSeconds') + or traverse_obj(player_config, ('args', ('length_seconds', 'l')), get_all=False) or parse_duration(search_meta('duration'))) description = ( video_details.get('shortDescription') @@ -609,26 +816,13 @@ class YoutubeWebArchiveIE(InfoExtractor): or clean_html(get_element_by_id('eow-description', webpage)) # @9e6dd23 or search_meta(['description', 'og:description', 'twitter:description'])) - uploader = video_details.get('author') - - # Uploader ID and URL - uploader_mobj = re.search( - r'', # @fd05024 - webpage) - if uploader_mobj is not None: - uploader_id, uploader_url = uploader_mobj.group('uploader_id'), uploader_mobj.group('uploader_url') - else: - # @a6211d2 - uploader_url = url_or_none(microformats.get('ownerProfileUrl')) - uploader_id = self._search_regex( - r'(?:user|channel)/([^/]+)', uploader_url or '', 'uploader id', default=None) - upload_date = unified_strdate( dict_get(microformats, ('uploadDate', 'publishDate')) or search_meta(['uploadDate', 'datePublished']) or self._search_regex( - [r'(?s)id="eow-date.*?>(.*?)', - r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'], # @7998520 + [r'(?s)id="eow-date.*?>\s*(.*?)\s*', + r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']', # @7998520 + r'class\s*=\s*"(?:watch-video-date|watch-video-added post-date)"[^>]*>\s*([^<]+?)\s*<'], # ~June 2010, ~Jan 2009 (respectively) webpage, 'upload date', default=None)) return { @@ -697,18 +891,22 @@ class YoutubeWebArchiveIE(InfoExtractor): url_date = url_date or url_date_2 urlh = None - try: - urlh = self._request_webpage( - HEADRequest('https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s' % video_id), - video_id, note='Fetching archived video file url', expected_status=True) - except ExtractorError as e: - # HTTP Error 404 is expected if the video is not saved. - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: - self.raise_no_formats( - 'The requested video is not archived, indexed, or there is an issue with web.archive.org', - expected=True) - else: - raise + retry_manager = self.RetryManager(fatal=False) + for retry in retry_manager: + try: + urlh = self._request_webpage( + HEADRequest('https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s' % video_id), + video_id, note='Fetching archived video file url', expected_status=True) + except ExtractorError as e: + # HTTP Error 404 is expected if the video is not saved. + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: + self.raise_no_formats( + 'The requested video is not archived, indexed, or there is an issue with web.archive.org (try again later)', expected=True) + else: + retry.error = e + + if retry_manager.error: + self.raise_no_formats(retry_manager.error, expected=True, video_id=video_id) capture_dates = self._get_capture_dates(video_id, int_or_none(url_date)) self.write_debug('Captures to try: ' + join_nonempty(*capture_dates, delim=', ')) @@ -749,3 +947,237 @@ class YoutubeWebArchiveIE(InfoExtractor): if not info.get('title'): info['title'] = video_id return info + + +class VLiveWebArchiveIE(InfoExtractor): + IE_NAME = 'web.archive:vlive' + IE_DESC = 'web.archive.org saved vlive videos' + _VALID_URL = r'''(?x) + (?:https?://)?web\.archive\.org/ + (?:web/)?(?:(?P[0-9]{14})?[0-9A-Za-z_*]*/)? # /web and the version index is optional + (?:https?(?::|%3[Aa])//)?(?: + (?:(?:www|m)\.)?vlive\.tv(?::(?:80|443))?/(?:video|embed)/(?P[0-9]+) # VLive URL + ) + ''' + _TESTS = [{ + 'url': 'https://web.archive.org/web/20221221144331/http://www.vlive.tv/video/1326', + 'md5': 'cc7314812855ce56de70a06a27314983', + 'info_dict': { + 'id': '1326', + 'ext': 'mp4', + 'title': "Girl's Day's Broadcast", + 'creator': "Girl's Day", + 'view_count': int, + 'uploader_id': 'muploader_a', + 'uploader_url': None, + 'uploader': None, + 'upload_date': '20150817', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', + 'timestamp': 1439816449, + 'like_count': int, + 'channel': 'Girl\'s Day', + 'channel_id': 'FDF27', + 'comment_count': int, + 'release_timestamp': 1439818140, + 'release_date': '20150817', + 'duration': 1014, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://web.archive.org/web/20221221182103/http://www.vlive.tv/video/16937', + 'info_dict': { + 'id': '16937', + 'ext': 'mp4', + 'title': '첸백시 걍방', + 'creator': 'EXO', + 'view_count': int, + 'subtitles': 'mincount:12', + 'uploader_id': 'muploader_j', + 'uploader_url': 'http://vlive.tv', + 'uploader': None, + 'upload_date': '20161112', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', + 'timestamp': 1478923074, + 'like_count': int, + 'channel': 'EXO', + 'channel_id': 'F94BD', + 'comment_count': int, + 'release_timestamp': 1478924280, + 'release_date': '20161112', + 'duration': 906, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870', + 'info_dict': { + 'id': '101870', + 'ext': 'mp4', + 'title': '[ⓓ xV] “레벨이들 매력에 반해? 안 반해?” 움직이는 HD 포토 (레드벨벳:Red Velvet)', + 'creator': 'Dispatch', + 'view_count': int, + 'subtitles': 'mincount:6', + 'uploader_id': 'V__FRA08071', + 'uploader_url': 'http://vlive.tv', + 'uploader': None, + 'upload_date': '20181130', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', + 'timestamp': 1543601327, + 'like_count': int, + 'channel': 'Dispatch', + 'channel_id': 'C796F3', + 'comment_count': int, + 'release_timestamp': 1543601040, + 'release_date': '20181130', + 'duration': 279, + }, + 'params': { + 'skip_download': True, + }, + }] + + # The wayback machine has special timestamp and "mode" values: + # timestamp: + # 1 = the first capture + # 2 = the last capture + # mode: + # id_ = Identity - perform no alterations of the original resource, return it as it was archived. + _WAYBACK_BASE_URL = 'https://web.archive.org/web/2id_/' + + def _download_archived_page(self, url, video_id, *, timestamp='2', **kwargs): + for retry in self.RetryManager(): + try: + return self._download_webpage(f'https://web.archive.org/web/{timestamp}id_/{url}', video_id, **kwargs) + except ExtractorError as e: + if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 404: + raise ExtractorError('Page was not archived', expected=True) + retry.error = e + continue + + def _download_archived_json(self, url, video_id, **kwargs): + page = self._download_archived_page(url, video_id, **kwargs) + if not page: + raise ExtractorError('Page was not archived', expected=True) + else: + return self._parse_json(page, video_id) + + def _extract_formats_from_m3u8(self, m3u8_url, params, video_id): + m3u8_doc = self._download_archived_page(m3u8_url, video_id, note='Downloading m3u8', query=params, fatal=False) + if not m3u8_doc: + return + + # M3U8 document should be changed to archive domain + m3u8_doc = m3u8_doc.splitlines() + url_base = m3u8_url.rsplit('/', 1)[0] + first_segment = None + for i, line in enumerate(m3u8_doc): + if not line.startswith('#'): + m3u8_doc[i] = f'{self._WAYBACK_BASE_URL}{url_base}/{line}?{urllib.parse.urlencode(params)}' + first_segment = first_segment or m3u8_doc[i] + + # Segments may not have been archived. See https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870 + urlh = self._request_webpage(HEADRequest(first_segment), video_id, errnote=False, + fatal=False, note='Check first segment availablity') + if urlh: + formats, subtitles = self._parse_m3u8_formats_and_subtitles('\n'.join(m3u8_doc), ext='mp4', video_id=video_id) + if subtitles: + self._report_ignoring_subs('m3u8') + return formats + + # Closely follows the logic of the ArchiveTeam grab script + # See: https://github.com/ArchiveTeam/vlive-grab/blob/master/vlive.lua + def _real_extract(self, url): + video_id, url_date = self._match_valid_url(url).group('id', 'date') + + webpage = self._download_archived_page(f'https://www.vlive.tv/video/{video_id}', video_id, timestamp=url_date) + + player_info = self._search_json(r'__PRELOADED_STATE__\s*=', webpage, 'player info', video_id) + user_country = traverse_obj(player_info, ('common', 'userCountry')) + + main_script_url = self._search_regex(r' 1: + self.report_warning('Multiple streams found. Only the first stream will be downloaded.') + stream = streams[0] + + max_stream = max( + stream.get('videos') or [], + key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None) + if max_stream is not None: + params = {arg.get('name'): arg.get('value') for arg in stream.get('keys', []) if arg.get('type') == 'param'} + formats = self._extract_formats_from_m3u8(max_stream.get('source'), params, video_id) or [] + + # For parts of the project MP4 files were archived + max_video = max( + traverse_obj(vod_data, ('videos', 'list', ...)), + key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None) + if max_video is not None: + video_url = self._WAYBACK_BASE_URL + max_video.get('source') + urlh = self._request_webpage(HEADRequest(video_url), video_id, errnote=False, + fatal=False, note='Check video availablity') + if urlh: + formats.append({'url': video_url}) + + return { + 'id': video_id, + 'formats': formats, + **traverse_obj(player_info, ('postDetail', 'post', { + 'title': ('officialVideo', 'title', {str}), + 'creator': ('author', 'nickname', {str}), + 'channel': ('channel', 'channelName', {str}), + 'channel_id': ('channel', 'channelCode', {str}), + 'duration': ('officialVideo', 'playTime', {int_or_none}), + 'view_count': ('officialVideo', 'playCount', {int_or_none}), + 'like_count': ('officialVideo', 'likeCount', {int_or_none}), + 'comment_count': ('officialVideo', 'commentCount', {int_or_none}), + 'timestamp': ('officialVideo', 'createdAt', {lambda x: int_or_none(x, scale=1000)}), + 'release_timestamp': ('officialVideo', 'willStartAt', {lambda x: int_or_none(x, scale=1000)}), + })), + **traverse_obj(vod_data, ('meta', { + 'uploader_id': ('user', 'id', {str}), + 'uploader': ('user', 'name', {str}), + 'uploader_url': ('user', 'url', {url_or_none}), + 'thumbnail': ('cover', 'source', {url_or_none}), + }), expected_type=lambda x: x or None), + **NaverBaseIE.process_subtitles(vod_data, lambda x: [self._WAYBACK_BASE_URL + x]), + } diff --git a/yt_dlp/extractor/arcpublishing.py b/yt_dlp/extractor/arcpublishing.py index de9ccc5383..febd3d28a5 100644 --- a/yt_dlp/extractor/arcpublishing.py +++ b/yt_dlp/extractor/arcpublishing.py @@ -144,7 +144,6 @@ class ArcPublishingIE(InfoExtractor): 'url': s_url, 'quality': -10, }) - self._sort_formats(formats) subtitles = {} for subtitle in (try_get(video, lambda x: x['subtitles']['urls'], list) or []): diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py index f294679eff..8660741ce4 100644 --- a/yt_dlp/extractor/ard.py +++ b/yt_dlp/extractor/ard.py @@ -40,14 +40,15 @@ class ARDMediathekBaseIE(InfoExtractor): 'This video is not available due to geoblocking', countries=self._GEO_COUNTRIES, metadata_available=True) - self._sort_formats(formats) - subtitles = {} subtitle_url = media_info.get('_subtitleUrl') if subtitle_url: subtitles['de'] = [{ 'ext': 'ttml', 'url': subtitle_url, + }, { + 'ext': 'vtt', + 'url': subtitle_url.replace('/ebutt/', '/webvtt/') + '.vtt', }] return { @@ -262,7 +263,6 @@ class ARDMediathekIE(ARDMediathekBaseIE): 'format_id': fid, 'url': furl, }) - self._sort_formats(formats) info = { 'formats': formats, } @@ -289,16 +289,16 @@ class ARDMediathekIE(ARDMediathekBaseIE): class ARDIE(InfoExtractor): _VALID_URL = r'(?Phttps?://(?:www\.)?daserste\.de/(?:[^/?#&]+/)+(?P[^/?#&]+))\.html' _TESTS = [{ - # available till 7.01.2022 - 'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-die-woche-video100.html', - 'md5': '867d8aa39eeaf6d76407c5ad1bb0d4c1', + # available till 7.12.2023 + 'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html', + 'md5': 'a438f671e87a7eba04000336a119ccc4', 'info_dict': { - 'id': 'maischberger-die-woche-video100', - 'display_id': 'maischberger-die-woche-video100', + 'id': 'maischberger-video-424', + 'display_id': 'maischberger-video-424', 'ext': 'mp4', - 'duration': 3687.0, - 'title': 'maischberger. die woche vom 7. Januar 2021', - 'upload_date': '20210107', + 'duration': 4452.0, + 'title': 'maischberger am 07.12.2022', + 'upload_date': '20221207', 'thumbnail': r're:^https?://.*\.jpg$', }, }, { @@ -371,7 +371,6 @@ class ARDIE(InfoExtractor): continue f['url'] = format_url formats.append(f) - self._sort_formats(formats) _SUB_FORMATS = ( ('./dataTimedText', 'ttml'), diff --git a/yt_dlp/extractor/arkena.py b/yt_dlp/extractor/arkena.py index 9a0273e2c3..de36ec8868 100644 --- a/yt_dlp/extractor/arkena.py +++ b/yt_dlp/extractor/arkena.py @@ -136,7 +136,6 @@ class ArkenaIE(InfoExtractor): elif mime_type == 'application/vnd.ms-sstr+xml': formats.extend(self._extract_ism_formats( href, video_id, ism_id='mss', fatal=False)) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/arnes.py b/yt_dlp/extractor/arnes.py index c80ce22337..a493714d1f 100644 --- a/yt_dlp/extractor/arnes.py +++ b/yt_dlp/extractor/arnes.py @@ -73,7 +73,6 @@ class ArnesIE(InfoExtractor): 'width': int_or_none(media.get('width')), 'height': int_or_none(media.get('height')), }) - self._sort_formats(formats) channel = video.get('channel') or {} channel_id = channel.get('url') diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index 25ecb42301..e3cc5afb05 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -65,6 +65,21 @@ class ArteTVIE(ArteTVBaseIE): }, { 'url': 'https://api.arte.tv/api/player/v2/config/de/LIVE', 'only_matching': True, + }, { + 'url': 'https://www.arte.tv/de/videos/110203-006-A/zaz/', + 'info_dict': { + 'id': '110203-006-A', + 'chapters': 'count:16', + 'description': 'md5:cf592f1df52fe52007e3f8eac813c084', + 'alt_title': 'Zaz', + 'title': 'Baloise Session 2022', + 'timestamp': 1668445200, + 'duration': 4054, + 'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/ubQjmVCGyRx3hmBuZEK9QZ/940x530', + 'upload_date': '20221114', + 'ext': 'mp4', + }, + 'expected_warnings': ['geo restricted'] }] _GEO_BYPASS = True @@ -135,6 +150,7 @@ class ArteTVIE(ArteTVBaseIE): 'Video is not available in this language edition of Arte or broadcast rights expired', expected=True) formats, subtitles = [], {} + secondary_formats = [] for stream in config['data']['attributes']['streams']: # official player contains code like `e.get("versions")[0].eStat.ml5` stream_version = stream['versions'][0] @@ -152,22 +168,26 @@ class ArteTVIE(ArteTVBaseIE): not m.group('sdh_sub'), # and we prefer not the hard-of-hearing subtitles if there are subtitles ))) + short_label = traverse_obj(stream_version, 'shortLabel', expected_type=str, default='?') if stream['protocol'].startswith('HLS'): fmts, subs = self._extract_m3u8_formats_and_subtitles( stream['url'], video_id=video_id, ext='mp4', m3u8_id=stream_version_code, fatal=False) for fmt in fmts: fmt.update({ - 'format_note': f'{stream_version.get("label", "unknown")} [{stream_version.get("shortLabel", "?")}]', + 'format_note': f'{stream_version.get("label", "unknown")} [{short_label}]', 'language_preference': lang_pref, }) - formats.extend(fmts) + if any(map(short_label.startswith, ('cc', 'OGsub'))): + secondary_formats.extend(fmts) + else: + formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) elif stream['protocol'] in ('HTTPS', 'RTMP'): formats.append({ 'format_id': f'{stream["protocol"]}-{stream_version_code}', 'url': stream['url'], - 'format_note': f'{stream_version.get("label", "unknown")} [{stream_version.get("shortLabel", "?")}]', + 'format_note': f'{stream_version.get("label", "unknown")} [{short_label}]', 'language_preference': lang_pref, # 'ext': 'mp4', # XXX: may or may not be necessary, at least for HTTPS }) @@ -175,11 +195,8 @@ class ArteTVIE(ArteTVBaseIE): else: self.report_warning(f'Skipping stream with unknown protocol {stream["protocol"]}') - # TODO: chapters from stream['segments']? - # The JS also looks for chapters in config['data']['attributes']['chapters'], - # but I am yet to find a video having those - - self._sort_formats(formats) + formats.extend(secondary_formats) + self._remove_duplicate_formats(formats) metadata = config['data']['attributes']['metadata'] @@ -199,6 +216,11 @@ class ArteTVIE(ArteTVBaseIE): {'url': image['url'], 'id': image.get('caption')} for image in metadata.get('images') or [] if url_or_none(image.get('url')) ], + # TODO: chapters may also be in stream['segments']? + 'chapters': traverse_obj(config, ('data', 'attributes', 'chapters', 'elements', ..., { + 'start_time': 'startTime', + 'title': 'title', + })) or None, } @@ -296,9 +318,7 @@ class ArteTVCategoryIE(ArteTVBaseIE): if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )): items.append(video) - title = (self._og_search_title(webpage, default=None) - or self._html_search_regex(r']*>([^<]+)', default=None)) - title = strip_or_none(title.rsplit('|', 1)[0]) or self._generic_title(url) + title = strip_or_none(self._generic_title('', webpage, default='').rsplit('|', 1)[0]) or None return self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=title, description=self._og_search_description(webpage, default=None)) diff --git a/yt_dlp/extractor/atresplayer.py b/yt_dlp/extractor/atresplayer.py index 39d1f1cc56..a20e7f9889 100644 --- a/yt_dlp/extractor/atresplayer.py +++ b/yt_dlp/extractor/atresplayer.py @@ -84,7 +84,6 @@ class AtresPlayerIE(InfoExtractor): elif src_type == 'application/dash+xml': formats, subtitles = self._extract_mpd_formats( src, video_id, mpd_id='dash', fatal=False) - self._sort_formats(formats) heartbeat = episode.get('heartbeat') or {} omniture = episode.get('omniture') or {} diff --git a/yt_dlp/extractor/atvat.py b/yt_dlp/extractor/atvat.py index 2311837e91..d6ed9e4958 100644 --- a/yt_dlp/extractor/atvat.py +++ b/yt_dlp/extractor/atvat.py @@ -49,7 +49,6 @@ class ATVAtIE(InfoExtractor): 'url': source_url, 'format_id': protocol, }) - self._sort_formats(formats) return { 'id': clip_id, diff --git a/yt_dlp/extractor/audimedia.py b/yt_dlp/extractor/audimedia.py index c1c4f67d0a..35114e5455 100644 --- a/yt_dlp/extractor/audimedia.py +++ b/yt_dlp/extractor/audimedia.py @@ -76,7 +76,6 @@ class AudiMediaIE(InfoExtractor): 'format_id': 'http-%s' % bitrate, }) formats.append(f) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/audioboom.py b/yt_dlp/extractor/audioboom.py index dc19a3874b..a23fcd2999 100644 --- a/yt_dlp/extractor/audioboom.py +++ b/yt_dlp/extractor/audioboom.py @@ -1,24 +1,33 @@ from .common import InfoExtractor -from ..utils import ( - clean_html, - float_or_none, -) +from ..utils import clean_html, float_or_none, traverse_obj, unescapeHTML class AudioBoomIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?audioboom\.com/(?:boos|posts)/(?P[0-9]+)' _TESTS = [{ 'url': 'https://audioboom.com/posts/7398103-asim-chaudhry', - 'md5': '7b00192e593ff227e6a315486979a42d', + 'md5': '4d68be11c9f9daf3dab0778ad1e010c3', 'info_dict': { 'id': '7398103', 'ext': 'mp3', 'title': 'Asim Chaudhry', - 'description': 'md5:2f3fef17dacc2595b5362e1d7d3602fc', + 'description': 'md5:0ed714ae0e81e5d9119cac2f618ad679', 'duration': 4000.99, 'uploader': 'Sue Perkins: An hour or so with...', 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins', } + }, { # Direct mp3-file link + 'url': 'https://audioboom.com/posts/8128496.mp3', + 'md5': 'e329edf304d450def95c7f86a9165ee1', + 'info_dict': { + 'id': '8128496', + 'ext': 'mp3', + 'title': 'TCRNo8 / DAILY 03 - In Control', + 'description': 'md5:44665f142db74858dfa21c5b34787948', + 'duration': 1689.7, + 'uploader': 'Lost Dot Podcast: The Trans Pyrenees and Transcontinental Race', + 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channels/5003904', + } }, { 'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0', 'only_matching': True, @@ -26,45 +35,23 @@ class AudioBoomIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(f'https://audioboom.com/posts/{video_id}', video_id) - webpage = self._download_webpage(url, video_id) - - clip = None - - clip_store = self._parse_json( - self._html_search_regex( - r'data-new-clip-store=(["\'])(?P{.+?})\1', - webpage, 'clip store', default='{}', group='json'), - video_id, fatal=False) - if clip_store: - clips = clip_store.get('clips') - if clips and isinstance(clips, list) and isinstance(clips[0], dict): - clip = clips[0] - - def from_clip(field): - if clip: - return clip.get(field) - - audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property( - 'audio', webpage, 'audio url') - title = from_clip('title') or self._html_search_meta( - ['og:title', 'og:audio:title', 'audio_title'], webpage) - description = from_clip('description') or clean_html(from_clip('formattedDescription')) or self._og_search_description(webpage) - - duration = float_or_none(from_clip('duration') or self._html_search_meta( - 'weibo:audio:duration', webpage)) - - uploader = from_clip('author') or self._html_search_meta( - ['og:audio:artist', 'twitter:audio:artist_name', 'audio_artist'], webpage, 'uploader') - uploader_url = from_clip('author_url') or self._html_search_meta( - 'audioboo:channel', webpage, 'uploader url') + clip_store = self._search_json( + r'data-react-class="V5DetailPagePlayer"\s*data-react-props=["\']', + webpage, 'clip store', video_id, fatal=False, transform_source=unescapeHTML) + clip = traverse_obj(clip_store, ('clips', 0), expected_type=dict) or {} return { 'id': video_id, - 'url': audio_url, - 'title': title, - 'description': description, - 'duration': duration, - 'uploader': uploader, - 'uploader_url': uploader_url, + 'url': clip.get('clipURLPriorToLoading') or self._og_search_property('audio', webpage, 'audio url'), + 'title': clip.get('title') or self._html_search_meta(['og:title', 'og:audio:title', 'audio_title'], webpage), + 'description': (clip.get('description') or clean_html(clip.get('formattedDescription')) + or self._og_search_description(webpage)), + 'duration': float_or_none(clip.get('duration') or self._html_search_meta('weibo:audio:duration', webpage)), + 'uploader': clip.get('author') or self._html_search_meta( + ['og:audio:artist', 'twitter:audio:artist_name', 'audio_artist'], webpage, 'uploader'), + 'uploader_url': clip.get('author_url') or self._html_search_regex( + r'
\s*\w+)''' IE_NAME = 'audius:track' IE_DESC = 'Audius track ID or API link. Prepend with "audius:"' @@ -243,7 +243,7 @@ class AudiusPlaylistIE(AudiusBaseIE): playlist_data.get('description')) -class AudiusProfileIE(AudiusPlaylistIE): +class AudiusProfileIE(AudiusPlaylistIE): # XXX: Do not subclass from concrete IE IE_NAME = 'audius:artist' IE_DESC = 'Audius.co profile/artist pages' _VALID_URL = r'https?://(?:www)?audius\.co/(?P[^\/]+)/?(?:[?#]|$)' diff --git a/yt_dlp/extractor/aws.py b/yt_dlp/extractor/aws.py index c2b22922be..eb831a1530 100644 --- a/yt_dlp/extractor/aws.py +++ b/yt_dlp/extractor/aws.py @@ -6,7 +6,7 @@ from .common import InfoExtractor from ..compat import compat_urllib_parse_urlencode -class AWSIE(InfoExtractor): +class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor _AWS_ALGORITHM = 'AWS4-HMAC-SHA256' _AWS_REGION = 'us-east-1' diff --git a/yt_dlp/extractor/banbye.py b/yt_dlp/extractor/banbye.py index 92f567c5db..c873425656 100644 --- a/yt_dlp/extractor/banbye.py +++ b/yt_dlp/extractor/banbye.py @@ -80,8 +80,6 @@ class BanByeIE(BanByeBaseIE): 'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.mp4', } for quality in data['quality']] - self._sort_formats(formats) - return { 'id': video_id, 'title': data.get('title'), diff --git a/yt_dlp/extractor/bandaichannel.py b/yt_dlp/extractor/bandaichannel.py index 2e32333761..d7fcf44bd9 100644 --- a/yt_dlp/extractor/bandaichannel.py +++ b/yt_dlp/extractor/bandaichannel.py @@ -1,8 +1,8 @@ -from .brightcove import BrightcoveNewIE +from .brightcove import BrightcoveNewBaseIE from ..utils import extract_attributes -class BandaiChannelIE(BrightcoveNewIE): +class BandaiChannelIE(BrightcoveNewBaseIE): IE_NAME = 'bandaichannel' _VALID_URL = r'https?://(?:www\.)?b-ch\.com/titles/(?P\d+/\d+)' _TESTS = [{ diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py index 2dae49e770..e89b3a69b3 100644 --- a/yt_dlp/extractor/bandcamp.py +++ b/yt_dlp/extractor/bandcamp.py @@ -5,16 +5,16 @@ import time from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + KNOWN_EXTENSIONS, ExtractorError, float_or_none, int_or_none, - KNOWN_EXTENSIONS, parse_filesize, str_or_none, try_get, - update_url_query, unified_strdate, unified_timestamp, + update_url_query, url_or_none, urljoin, ) @@ -29,11 +29,18 @@ class BandcampIE(InfoExtractor): 'info_dict': { 'id': '1812978515', 'ext': 'mp3', - 'title': "youtube-dl \"'/\\ä↭ - youtube-dl \"'/\\ä↭ - youtube-dl test song \"'/\\ä↭", + 'title': 'youtube-dl "\'/\\ä↭ - youtube-dl "\'/\\ä↭ - youtube-dl test song "\'/\\ä↭', 'duration': 9.8485, - 'uploader': 'youtube-dl "\'/\\ä↭', + 'uploader': 'youtube-dl "\'/\\ä↭', 'upload_date': '20121129', 'timestamp': 1354224127, + 'track': 'youtube-dl "\'/\\ä↭ - youtube-dl test song "\'/\\ä↭', + 'album_artist': 'youtube-dl "\'/\\ä↭', + 'track_id': '1812978515', + 'artist': 'youtube-dl "\'/\\ä↭', + 'uploader_url': 'https://youtube-dl.bandcamp.com', + 'uploader_id': 'youtube-dl', + 'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg', }, '_skip': 'There is a limit of 200 free downloads / month for the test song' }, { @@ -41,7 +48,8 @@ class BandcampIE(InfoExtractor): 'url': 'http://benprunty.bandcamp.com/track/lanius-battle', 'info_dict': { 'id': '2650410135', - 'ext': 'aiff', + 'ext': 'm4a', + 'acodec': r're:[fa]lac', 'title': 'Ben Prunty - Lanius (Battle)', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Ben Prunty', @@ -54,7 +62,10 @@ class BandcampIE(InfoExtractor): 'track_number': 1, 'track_id': '2650410135', 'artist': 'Ben Prunty', + 'album_artist': 'Ben Prunty', 'album': 'FTL: Advanced Edition Soundtrack', + 'uploader_url': 'https://benprunty.bandcamp.com', + 'uploader_id': 'benprunty', }, }, { # no free download, mp3 128 @@ -75,7 +86,34 @@ class BandcampIE(InfoExtractor): 'track_number': 5, 'track_id': '2584466013', 'artist': 'Mastodon', + 'album_artist': 'Mastodon', 'album': 'Call of the Mastodon', + 'uploader_url': 'https://relapsealumni.bandcamp.com', + 'uploader_id': 'relapsealumni', + }, + }, { + # track from compilation album (artist/album_artist difference) + 'url': 'https://diskotopia.bandcamp.com/track/safehouse', + 'md5': '19c5337bca1428afa54129f86a2f6a69', + 'info_dict': { + 'id': '1978174799', + 'ext': 'mp3', + 'title': 'submerse - submerse - Safehouse', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'submerse', + 'timestamp': 1480779297, + 'upload_date': '20161203', + 'release_timestamp': 1481068800, + 'release_date': '20161207', + 'duration': 154.066, + 'track': 'submerse - Safehouse', + 'track_number': 3, + 'track_id': '1978174799', + 'artist': 'submerse', + 'album_artist': 'Diskotopia', + 'album': 'DSK F/W 2016-2017 Free Compilation', + 'uploader_url': 'https://diskotopia.bandcamp.com', + 'uploader_id': 'diskotopia', }, }] @@ -121,6 +159,9 @@ class BandcampIE(InfoExtractor): embed = self._extract_data_attr(webpage, title, 'embed', False) current = tralbum.get('current') or {} artist = embed.get('artist') or current.get('artist') or tralbum.get('artist') + album_artist = self._html_search_regex( + r'

[\S\s]*?by\s*\s*\s*([^>]+?)\s*', + webpage, 'album artist', fatal=False) timestamp = unified_timestamp( current.get('publish_date') or tralbum.get('album_publish_date')) @@ -184,8 +225,6 @@ class BandcampIE(InfoExtractor): 'acodec': format_id.split('-')[0], }) - self._sort_formats(formats) - title = '%s - %s' % (artist, track) if artist else track if not duration: @@ -207,11 +246,12 @@ class BandcampIE(InfoExtractor): 'track_id': track_id, 'artist': artist, 'album': embed.get('album_title'), + 'album_artist': album_artist, 'formats': formats, } -class BandcampAlbumIE(BandcampIE): +class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE IE_NAME = 'Bandcamp:album' _VALID_URL = r'https?://(?:(?P[^.]+)\.)?bandcamp\.com/album/(?P[^/?#&]+)' @@ -314,7 +354,7 @@ class BandcampAlbumIE(BandcampIE): } -class BandcampWeeklyIE(BandcampIE): +class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE IE_NAME = 'Bandcamp:weekly' _VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P\d+)' _TESTS = [{ @@ -363,7 +403,6 @@ class BandcampWeeklyIE(BandcampIE): 'ext': ext, 'vcodec': 'none', }) - self._sort_formats(formats) title = show.get('audio_title') or 'Bandcamp Weekly' subtitle = show.get('subtitle') diff --git a/yt_dlp/extractor/bannedvideo.py b/yt_dlp/extractor/bannedvideo.py index ec9bdd8cad..51e7220578 100644 --- a/yt_dlp/extractor/bannedvideo.py +++ b/yt_dlp/extractor/bannedvideo.py @@ -135,7 +135,6 @@ query GetCommentReplies($id: String!) { formats.extend(self._extract_m3u8_formats( video_info.get('streamUrl'), video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', live=True)) - self._sort_formats(formats) return { 'id': video_id, diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index 9a0a4414e7..9d28e70a3a 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -575,8 +575,6 @@ class BBCCoUkIE(InfoExtractor): else: programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id) - self._sort_formats(formats) - return { 'id': programme_id, 'title': title, @@ -588,10 +586,15 @@ class BBCCoUkIE(InfoExtractor): } -class BBCIE(BBCCoUkIE): +class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE IE_NAME = 'bbc' IE_DESC = 'BBC' - _VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P[^/#?]+)' + _VALID_URL = r'''(?x) + https?://(?:www\.)?(?: + bbc\.(?:com|co\.uk)| + bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd\.onion| + bbcweb3hytmzhn5d532owbu6oqadra5z3ar726vq5kgwwn6aucdccrad\.onion + )/(?:[^/]+/)+(?P[^/#?]+)''' _MEDIA_SETS = [ 'pc', @@ -841,6 +844,12 @@ class BBCIE(BBCCoUkIE): 'upload_date': '20190604', 'categories': ['Psychology'], }, + }, { # onion routes + 'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576', + 'only_matching': True, + }, { + 'url': 'https://www.bbcweb3hytmzhn5d532owbu6oqadra5z3ar726vq5kgwwn6aucdccrad.onion/sport/av/football/63195681', + 'only_matching': True, }] @classmethod @@ -879,7 +888,6 @@ class BBCIE(BBCCoUkIE): def _extract_from_playlist_sxml(self, url, playlist_id, timestamp): programme_id, title, description, duration, formats, subtitles = \ self._process_legacy_playlist_url(url, playlist_id) - self._sort_formats(formats) return { 'id': programme_id, 'title': title, @@ -898,12 +906,8 @@ class BBCIE(BBCCoUkIE): json_ld_info = self._search_json_ld(webpage, playlist_id, default={}) timestamp = json_ld_info.get('timestamp') - playlist_title = json_ld_info.get('title') - if not playlist_title: - playlist_title = (self._og_search_title(webpage, default=None) - or self._html_extract_title(webpage, 'playlist title', default=None)) - if playlist_title: - playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip() + playlist_title = json_ld_info.get('title') or re.sub( + r'(.+)\s*-\s*BBC.*?$', r'\1', self._generic_title('', webpage, default='')).strip() or None playlist_description = json_ld_info.get( 'description') or self._og_search_description(webpage, default=None) @@ -947,7 +951,6 @@ class BBCIE(BBCCoUkIE): duration = int_or_none(items[0].get('duration')) programme_id = items[0].get('vpid') formats, subtitles = self._download_media_selector(programme_id) - self._sort_formats(formats) entries.append({ 'id': programme_id, 'title': title, @@ -984,7 +987,6 @@ class BBCIE(BBCCoUkIE): continue raise if entry: - self._sort_formats(entry['formats']) entries.append(entry) if entries: @@ -1008,7 +1010,6 @@ class BBCIE(BBCCoUkIE): if programme_id: formats, subtitles = self._download_media_selector(programme_id) - self._sort_formats(formats) # digitalData may be missing (e.g. http://www.bbc.com/autos/story/20130513-hyundais-rock-star) digital_data = self._parse_json( self._search_regex( @@ -1040,7 +1041,6 @@ class BBCIE(BBCCoUkIE): if version_id: title = smp_data['title'] formats, subtitles = self._download_media_selector(version_id) - self._sort_formats(formats) image_url = smp_data.get('holdingImageURL') display_date = init_data.get('displayDate') topic_title = init_data.get('topicTitle') @@ -1082,7 +1082,6 @@ class BBCIE(BBCCoUkIE): continue title = lead_media.get('title') or self._og_search_title(webpage) formats, subtitles = self._download_media_selector(programme_id) - self._sort_formats(formats) description = lead_media.get('summary') uploader = lead_media.get('masterBrand') uploader_id = lead_media.get('mid') @@ -1111,7 +1110,6 @@ class BBCIE(BBCCoUkIE): if current_programme and programme_id and current_programme.get('type') == 'playable_item': title = current_programme.get('titles', {}).get('tertiary') or playlist_title formats, subtitles = self._download_media_selector(programme_id) - self._sort_formats(formats) synopses = current_programme.get('synopses') or {} network = current_programme.get('network') or {} duration = int_or_none( @@ -1144,7 +1142,6 @@ class BBCIE(BBCCoUkIE): clip_title = clip.get('title') if clip_vpid and clip_title: formats, subtitles = self._download_media_selector(clip_vpid) - self._sort_formats(formats) return { 'id': clip_vpid, 'title': clip_title, @@ -1166,7 +1163,6 @@ class BBCIE(BBCCoUkIE): if not programme_id: continue formats, subtitles = self._download_media_selector(programme_id) - self._sort_formats(formats) entries.append({ 'id': programme_id, 'title': playlist_title, @@ -1198,7 +1194,6 @@ class BBCIE(BBCCoUkIE): if not (item_id and item_title): continue formats, subtitles = self._download_media_selector(item_id) - self._sort_formats(formats) item_desc = None blocks = try_get(media, lambda x: x['summary']['blocks'], list) if blocks: @@ -1299,7 +1294,6 @@ class BBCIE(BBCCoUkIE): formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id) if not formats and not self.get_param('ignore_no_formats'): continue - self._sort_formats(formats) video_id = media_meta.get('externalId') if not video_id: diff --git a/yt_dlp/extractor/beatbump.py b/yt_dlp/extractor/beatbump.py new file mode 100644 index 0000000000..0f40ebe7ac --- /dev/null +++ b/yt_dlp/extractor/beatbump.py @@ -0,0 +1,101 @@ +from .common import InfoExtractor +from .youtube import YoutubeIE, YoutubeTabIE + + +class BeatBumpVideoIE(InfoExtractor): + _VALID_URL = r'https://beatbump\.ml/listen\?id=(?P[\w-]+)' + _TESTS = [{ + 'url': 'https://beatbump.ml/listen?id=MgNrAu2pzNs', + 'md5': '5ff3fff41d3935b9810a9731e485fe66', + 'info_dict': { + 'id': 'MgNrAu2pzNs', + 'ext': 'mp4', + 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA', + 'artist': 'Stephen', + 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp', + 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA', + 'upload_date': '20190312', + 'categories': ['Music'], + 'playable_in_embed': True, + 'duration': 169, + 'like_count': int, + 'alt_title': 'Voyeur Girl', + 'view_count': int, + 'track': 'Voyeur Girl', + 'uploader': 'Stephen - Topic', + 'title': 'Voyeur Girl', + 'channel_follower_count': int, + 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA', + 'age_limit': 0, + 'availability': 'public', + 'live_status': 'not_live', + 'album': 'it\'s too much love to know my dear', + 'channel': 'Stephen', + 'comment_count': int, + 'description': 'md5:7ae382a65843d6df2685993e90a8628f', + 'tags': 'count:11', + 'creator': 'Stephen', + 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA', + } + }] + + def _real_extract(self, url): + id_ = self._match_id(url) + return self.url_result(f'https://music.youtube.com/watch?v={id_}', YoutubeIE, id_) + + +class BeatBumpPlaylistIE(InfoExtractor): + _VALID_URL = r'https://beatbump\.ml/(?:release\?id=|artist/|playlist/)(?P[\w-]+)' + _TESTS = [{ + 'url': 'https://beatbump.ml/release?id=MPREb_gTAcphH99wE', + 'playlist_count': 50, + 'info_dict': { + 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0', + 'availability': 'unlisted', + 'view_count': int, + 'title': 'Album - Royalty Free Music Library V2 (50 Songs)', + 'description': '', + 'tags': [], + 'modified_date': '20221223', + } + }, { + 'url': 'https://beatbump.ml/artist/UC_aEa8K-EOJ3D6gOs7HcyNg', + 'playlist_mincount': 1, + 'params': {'flatplaylist': True}, + 'info_dict': { + 'id': 'UC_aEa8K-EOJ3D6gOs7HcyNg', + 'uploader_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg', + 'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg', + 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg', + 'channel_follower_count': int, + 'title': 'NoCopyrightSounds - Videos', + 'uploader': 'NoCopyrightSounds', + 'description': 'md5:cd4fd53d81d363d05eee6c1b478b491a', + 'channel': 'NoCopyrightSounds', + 'tags': 'count:12', + 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg', + }, + }, { + 'url': 'https://beatbump.ml/playlist/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', + 'playlist_mincount': 1, + 'params': {'flatplaylist': True}, + 'info_dict': { + 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', + 'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds', + 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!', + 'view_count': int, + 'channel_url': 'https://www.youtube.com/@NoCopyrightSounds', + 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg', + 'title': 'NCS : All Releases 💿', + 'uploader': 'NoCopyrightSounds', + 'availability': 'public', + 'channel': 'NoCopyrightSounds', + 'tags': [], + 'modified_date': '20221225', + 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg', + } + }] + + def _real_extract(self, url): + id_ = self._match_id(url) + return self.url_result(f'https://music.youtube.com/browse/{id_}', YoutubeTabIE, id_) diff --git a/yt_dlp/extractor/beatport.py b/yt_dlp/extractor/beatport.py index f71f1f3080..0aecbd089d 100644 --- a/yt_dlp/extractor/beatport.py +++ b/yt_dlp/extractor/beatport.py @@ -74,7 +74,6 @@ class BeatportIE(InfoExtractor): fmt['abr'] = 96 fmt['asr'] = 44100 formats.append(fmt) - self._sort_formats(formats) images = [] for name, info in track['images'].items(): diff --git a/yt_dlp/extractor/beeg.py b/yt_dlp/extractor/beeg.py index 5957e370ab..52ee68eca7 100644 --- a/yt_dlp/extractor/beeg.py +++ b/yt_dlp/extractor/beeg.py @@ -76,8 +76,6 @@ class BeegIE(InfoExtractor): f['height'] = height formats.extend(current_formats) - self._sort_formats(formats) - return { 'id': video_id, 'display_id': first_fact.get('id'), diff --git a/yt_dlp/extractor/berufetv.py b/yt_dlp/extractor/berufetv.py new file mode 100644 index 0000000000..8160cbd9a7 --- /dev/null +++ b/yt_dlp/extractor/berufetv.py @@ -0,0 +1,70 @@ +from .common import InfoExtractor +from ..utils import float_or_none, mimetype2ext, traverse_obj + + +class BerufeTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?web\.arbeitsagentur\.de/berufetv/[^?#]+/film;filmId=(?P[\w-]+)' + _TESTS = [{ + 'url': 'https://web.arbeitsagentur.de/berufetv/studienberufe/wirtschaftswissenschaften/wirtschaftswissenschaften-volkswirtschaft/film;filmId=DvKC3DUpMKvUZ_6fEnfg3u', + 'md5': '041b6432ec8e6838f84a5c30f31cc795', + 'info_dict': { + 'id': 'DvKC3DUpMKvUZ_6fEnfg3u', + 'ext': 'mp4', + 'title': 'Volkswirtschaftslehre', + 'description': 'md5:6bd87d0c63163480a6489a37526ee1c1', + 'categories': ['Studien­beruf'], + 'tags': ['Studienfilm'], + 'duration': 602.440, + 'thumbnail': r're:^https://asset-out-cdn\.video-cdn\.net/private/videos/DvKC3DUpMKvUZ_6fEnfg3u/thumbnails/793063\?quality=thumbnail&__token__=[^\s]+$', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + movie_metadata = self._download_json( + 'https://rest.arbeitsagentur.de/infosysbub/berufetv/pc/v1/film-metadata', + video_id, 'Downloading JSON metadata', + headers={'X-API-Key': '79089773-4892-4386-86e6-e8503669f426'}, fatal=False) + + meta = traverse_obj( + movie_metadata, ('metadaten', lambda _, i: video_id == i['miId']), + get_all=False, default={}) + + video = self._download_json( + f'https://d.video-cdn.net/play/player/8YRzUk6pTzmBdrsLe9Y88W/video/{video_id}', + video_id, 'Downloading video JSON') + + formats, subtitles = [], {} + for key, source in video['videoSources']['html'].items(): + if key == 'auto': + fmts, subs = self._extract_m3u8_formats_and_subtitles(source[0]['source'], video_id) + formats += fmts + subtitles = subs + else: + formats.append({ + 'url': source[0]['source'], + 'ext': mimetype2ext(source[0]['mimeType']), + 'format_id': key, + }) + + for track in video.get('videoTracks') or []: + if track.get('type') != 'SUBTITLES': + continue + subtitles.setdefault(track['language'], []).append({ + 'url': track['source'], + 'name': track.get('label'), + 'ext': 'vtt' + }) + + return { + 'id': video_id, + 'title': meta.get('titel') or traverse_obj(video, ('videoMetaData', 'title')), + 'description': meta.get('beschreibung'), + 'thumbnail': meta.get('thumbnail') or f'https://asset-out-cdn.video-cdn.net/private/videos/{video_id}/thumbnails/active', + 'duration': float_or_none(video.get('duration'), scale=1000), + 'categories': [meta['kategorie']] if meta.get('kategorie') else None, + 'tags': meta.get('themengebiete'), + 'subtitles': subtitles, + 'formats': formats, + } diff --git a/yt_dlp/extractor/bfmtv.py b/yt_dlp/extractor/bfmtv.py index 48526e38b6..a7be0e67de 100644 --- a/yt_dlp/extractor/bfmtv.py +++ b/yt_dlp/extractor/bfmtv.py @@ -5,7 +5,7 @@ from ..utils import extract_attributes class BFMTVBaseIE(InfoExtractor): - _VALID_URL_BASE = r'https?://(?:www\.)?bfmtv\.com/' + _VALID_URL_BASE = r'https?://(?:www\.|rmc\.)?bfmtv\.com/' _VALID_URL_TMPL = _VALID_URL_BASE + r'(?:[^/]+/)*[^/?&#]+_%s[A-Z]-(?P\d{12})\.html' _VIDEO_BLOCK_REGEX = r'(]+class="video_block"[^>]*>)' BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s' @@ -31,6 +31,9 @@ class BFMTVIE(BFMTVBaseIE): 'uploader_id': '876450610001', 'upload_date': '20201002', 'timestamp': 1601629620, + 'duration': 44.757, + 'tags': ['bfmactu', 'politique'], + 'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/876450610001/5041f4c1-bc48-4af8-a256-1b8300ad8ef0/cf2f9114-e8e2-4494-82b4-ab794ea4bc7d/1920x1080/match/image.jpg', }, }] @@ -42,7 +45,7 @@ class BFMTVIE(BFMTVBaseIE): return self._brightcove_url_result(video_block['videoid'], video_block) -class BFMTVLiveIE(BFMTVIE): +class BFMTVLiveIE(BFMTVIE): # XXX: Do not subclass from concrete IE IE_NAME = 'bfmtv:live' _VALID_URL = BFMTVBaseIE._VALID_URL_BASE + '(?P(?:[^/]+/)?en-direct)' _TESTS = [{ @@ -81,6 +84,20 @@ class BFMTVArticleIE(BFMTVBaseIE): }, { 'url': 'https://www.bfmtv.com/sante/covid-19-oui-le-vaccin-de-pfizer-distribue-en-france-a-bien-ete-teste-sur-des-personnes-agees_AN-202101060275.html', 'only_matching': True, + }, { + 'url': 'https://rmc.bfmtv.com/actualites/societe/transports/ce-n-est-plus-tout-rentable-le-bioethanol-e85-depasse-1eu-le-litre-des-automobilistes-regrettent_AV-202301100268.html', + 'info_dict': { + 'id': '6318445464112', + 'ext': 'mp4', + 'title': 'Le plein de bioéthanol fait de plus en plus mal à la pompe', + 'description': None, + 'uploader_id': '876630703001', + 'upload_date': '20230110', + 'timestamp': 1673341692, + 'duration': 109.269, + 'tags': ['rmc', 'show', 'apolline de malherbe', 'info', 'talk', 'matinale', 'radio'], + 'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/876630703001/5bef74b8-9d5e-4480-a21f-60c2e2480c46/96c88b74-f9db-45e1-8040-e199c5da216c/1920x1080/match/image.jpg' + } }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/bigflix.py b/yt_dlp/extractor/bigflix.py index 6b2797ca00..02d1ba0e3f 100644 --- a/yt_dlp/extractor/bigflix.py +++ b/yt_dlp/extractor/bigflix.py @@ -63,8 +63,6 @@ class BigflixIE(InfoExtractor): 'url': decode_url(file_url), }) - self._sort_formats(formats) - description = self._html_search_meta('description', webpage) return { diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 2c29bf3ce4..c344397792 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -1,424 +1,118 @@ import base64 -import hashlib -import itertools import functools -import re +import itertools import math +import urllib.error +import urllib.parse from .common import InfoExtractor, SearchInfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urlparse, - compat_urllib_parse_urlparse -) +from ..dependencies import Cryptodome from ..utils import ( ExtractorError, + GeoRestrictedError, + InAdvancePagedList, + OnDemandPagedList, filter_dict, - int_or_none, float_or_none, + format_field, + int_or_none, + make_archive_id, + merge_dicts, mimetype2ext, - parse_iso8601, - qualities, - traverse_obj, parse_count, + parse_qs, + qualities, smuggle_url, srt_subtitles_timecode, str_or_none, - strip_jsonp, - unified_timestamp, + traverse_obj, unsmuggle_url, - urlencode_postdata, url_or_none, - OnDemandPagedList + urlencode_postdata, ) -class BiliBiliIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?:// - (?:(?:www|bangumi)\.)? - bilibili\.(?:tv|com)/ - (?: - (?: - video/[aA][vV]| - anime/(?P\d+)/play\# - )(?P\d+)| - (s/)?video/[bB][vV](?P[^/?#&]+) - ) - (?:/?\?p=(?P\d+))? - ''' - - _TESTS = [{ - 'url': 'http://www.bilibili.com/video/av1074402/', - 'md5': '7ac275ec84a99a6552c5d229659a0fe1', - 'info_dict': { - 'id': '1074402_part1', - 'ext': 'mp4', - 'title': '【金坷垃】金泡沫', - 'uploader_id': '156160', - 'uploader': '菊子桑', - 'upload_date': '20140420', - 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923', - 'timestamp': 1398012678, - 'tags': ['顶上去报复社会', '该来的总会来的', '金克拉是检验歌曲的唯一标准', '坷垃教主', '金坷垃', '邓紫棋', '治愈系坷垃'], - 'bv_id': 'BV11x411K7CN', - 'cid': '1554319', - 'thumbnail': 'http://i2.hdslb.com/bfs/archive/c79a8cf0347cd7a897c53a2f756e96aead128e8c.jpg', - 'duration': 308.36, - }, - }, { - # Tested in BiliBiliBangumiIE - 'url': 'http://bangumi.bilibili.com/anime/1869/play#40062', - 'only_matching': True, - }, { - # bilibili.tv - 'url': 'http://www.bilibili.tv/video/av1074402/', - 'only_matching': True, - }, { - 'url': 'http://bangumi.bilibili.com/anime/5802/play#100643', - 'md5': '3f721ad1e75030cc06faf73587cfec57', - 'info_dict': { - 'id': '100643_part1', - 'ext': 'mp4', - 'title': 'CHAOS;CHILD', - 'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...', - }, - 'skip': 'Geo-restricted to China', - }, { - 'url': 'http://www.bilibili.com/video/av8903802/', - 'info_dict': { - 'id': '8903802_part1', - 'ext': 'mp4', - 'title': '阿滴英文|英文歌分享#6 "Closer', - 'upload_date': '20170301', - 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文', - 'timestamp': 1488382634, - 'uploader_id': '65880958', - 'uploader': '阿滴英文', - 'thumbnail': 'http://i2.hdslb.com/bfs/archive/49267ce20bc246be6304bf369a3ded0256854c23.jpg', - 'cid': '14694589', - 'duration': 554.117, - 'bv_id': 'BV13x41117TL', - 'tags': ['人文', '英语', '文化', '公开课', '阿滴英文'], - }, - 'params': { - 'skip_download': True, - }, - }, { - # new BV video id format - 'url': 'https://www.bilibili.com/video/BV1JE411F741', - 'only_matching': True, - }, { - # Anthology - 'url': 'https://www.bilibili.com/video/BV1bK411W797', - 'info_dict': { - 'id': 'BV1bK411W797', - 'title': '物语中的人物是如何吐槽自己的OP的' - }, - 'playlist_count': 17, - }, { - # Correct matching of single and double quotes in title - 'url': 'https://www.bilibili.com/video/BV1NY411E7Rx/', - 'info_dict': { - 'id': '255513412_part1', - 'ext': 'mp4', - 'title': 'Vid"eo" Te\'st', - 'cid': '570602418', - 'thumbnail': 'http://i2.hdslb.com/bfs/archive/0c0de5a90b6d5b991b8dcc6cde0afbf71d564791.jpg', - 'upload_date': '20220408', - 'timestamp': 1649436552, - 'description': 'Vid"eo" Te\'st', - 'uploader_id': '1630758804', - 'bv_id': 'BV1NY411E7Rx', - 'duration': 60.394, - 'uploader': 'bili_31244483705', - 'tags': ['VLOG'], - }, - 'params': { - 'skip_download': True, - }, - }] - - _APP_KEY = 'iVGUTjsxvpLeuDCf' - _BILIBILI_KEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt' - - def _report_error(self, result): - if 'message' in result: - raise ExtractorError('%s said: %s' % (self.IE_NAME, result['message']), expected=True) - elif 'code' in result: - raise ExtractorError('%s returns error %d' % (self.IE_NAME, result['code']), expected=True) - else: - raise ExtractorError('Can\'t extract Bangumi episode ID') - - def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url, {}) - - mobj = self._match_valid_url(url) - video_id = mobj.group('id_bv') or mobj.group('id') - - av_id, bv_id = self._get_video_id_set(video_id, mobj.group('id_bv') is not None) - video_id = av_id - - info = {} - anime_id = mobj.group('anime_id') - page_id = mobj.group('page') - webpage = self._download_webpage(url, video_id) - - # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself. - # If the video has no page argument, check to see if it's an anthology - if page_id is None: - if not self.get_param('noplaylist'): - r = self._extract_anthology_entries(bv_id, video_id, webpage) - if r is not None: - self.to_screen('Downloading anthology %s - add --no-playlist to just download video' % video_id) - return r - else: - self.to_screen('Downloading just video %s because of --no-playlist' % video_id) - - if 'anime/' not in url: - cid = self._search_regex( - r'\bcid(?:["\']:|=)(\d+),["\']page(?:["\']:|=)' + str(page_id), webpage, 'cid', - default=None - ) or self._search_regex( - r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid', - default=None - ) or compat_parse_qs(self._search_regex( - [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)', - r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)', - r']+src="https://secure\.bilibili\.com/secure,([^"]+)"'], - webpage, 'player parameters'))['cid'][0] - else: - if 'no_bangumi_tip' not in smuggled_data: - self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run yt-dlp with %s' % ( - video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id))) - headers = { - 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', - 'Referer': url - } - headers.update(self.geo_verification_headers()) - - js = self._download_json( - 'http://bangumi.bilibili.com/web_api/get_source', video_id, - data=urlencode_postdata({'episode_id': video_id}), - headers=headers) - if 'result' not in js: - self._report_error(js) - cid = js['result']['cid'] - - headers = { - 'Accept': 'application/json', - 'Referer': url +class BilibiliBaseIE(InfoExtractor): + def extract_formats(self, play_info): + format_names = { + r['quality']: traverse_obj(r, 'new_description', 'display_desc') + for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality'])) } - headers.update(self.geo_verification_headers()) - video_info = self._parse_json( - self._search_regex(r'window.__playinfo__\s*=\s*({.+?})', webpage, 'video info', default=None) or '{}', - video_id, fatal=False) - video_info = video_info.get('data') or {} - - durl = traverse_obj(video_info, ('dash', 'video')) - audios = traverse_obj(video_info, ('dash', 'audio')) or [] - flac_audio = traverse_obj(video_info, ('dash', 'flac', 'audio')) + audios = traverse_obj(play_info, ('dash', 'audio', ...)) + flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio')) if flac_audio: audios.append(flac_audio) - entries = [] + formats = [{ + 'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'), + 'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')), + 'acodec': audio.get('codecs'), + 'vcodec': 'none', + 'tbr': float_or_none(audio.get('bandwidth'), scale=1000), + 'filesize': int_or_none(audio.get('size')) + } for audio in audios] - RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4') - for num, rendition in enumerate(RENDITIONS, start=1): - payload = 'appkey=%s&cid=%s&otype=json&%s' % (self._APP_KEY, cid, rendition) - sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest() - if not video_info: - video_info = self._download_json( - 'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign), - video_id, note='Downloading video info page', - headers=headers, fatal=num == len(RENDITIONS)) - if not video_info: - continue + formats.extend({ + 'url': traverse_obj(video, 'baseUrl', 'base_url', 'url'), + 'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')), + 'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')), + 'width': int_or_none(video.get('width')), + 'height': int_or_none(video.get('height')), + 'vcodec': video.get('codecs'), + 'acodec': 'none' if audios else None, + 'tbr': float_or_none(video.get('bandwidth'), scale=1000), + 'filesize': int_or_none(video.get('size')), + 'quality': int_or_none(video.get('id')), + 'format': format_names.get(video.get('id')), + } for video in traverse_obj(play_info, ('dash', 'video', ...))) - if not durl and 'durl' not in video_info: - if num < len(RENDITIONS): - continue - self._report_error(video_info) + missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality'))) + if missing_formats: + self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; ' + f'you have to login or become premium member to download them. {self._login_hint()}') - formats = [] - for idx, durl in enumerate(durl or video_info['durl']): - formats.append({ - 'url': durl.get('baseUrl') or durl.get('base_url') or durl.get('url'), - 'ext': mimetype2ext(durl.get('mimeType') or durl.get('mime_type')), - 'fps': int_or_none(durl.get('frameRate') or durl.get('frame_rate')), - 'width': int_or_none(durl.get('width')), - 'height': int_or_none(durl.get('height')), - 'vcodec': durl.get('codecs'), - 'acodec': 'none' if audios else None, - 'tbr': float_or_none(durl.get('bandwidth'), scale=1000), - 'filesize': int_or_none(durl.get('size')), - }) - for backup_url in traverse_obj(durl, 'backup_url', expected_type=list) or []: - formats.append({ - 'url': backup_url, - 'quality': -2 if 'hd.mp4' in backup_url else -3, - }) + return formats - for audio in audios: - formats.append({ - 'url': audio.get('baseUrl') or audio.get('base_url') or audio.get('url'), - 'ext': mimetype2ext(audio.get('mimeType') or audio.get('mime_type')), - 'fps': int_or_none(audio.get('frameRate') or audio.get('frame_rate')), - 'width': int_or_none(audio.get('width')), - 'height': int_or_none(audio.get('height')), - 'acodec': audio.get('codecs'), - 'vcodec': 'none', - 'tbr': float_or_none(audio.get('bandwidth'), scale=1000), - 'filesize': int_or_none(audio.get('size')) - }) - for backup_url in traverse_obj(audio, 'backup_url', expected_type=list) or []: - formats.append({ - 'url': backup_url, - # backup URLs have lower priorities - 'quality': -3, - }) + def json2srt(self, json_data): + srt_data = '' + for idx, line in enumerate(json_data.get('body') or []): + srt_data += (f'{idx + 1}\n' + f'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n' + f'{line["content"]}\n\n') + return srt_data - info.update({ - 'id': video_id, - 'duration': float_or_none(durl.get('length'), 1000), - 'formats': formats, - 'http_headers': { - 'Referer': url, - }, - }) - break - - self._sort_formats(formats) - - title = self._html_search_regex(( - r']+title=(["])(?P[^"]+)', - r']+title=([\'])(?P[^\']+)', - r'(?s)]*>(?P.+?)

', - self._meta_regex('title') - ), webpage, 'title', group='content', fatal=False) - - # Get part title for anthologies - if page_id is not None: - # TODO: The json is already downloaded by _extract_anthology_entries. Don't redownload for each video. - part_info = traverse_obj(self._download_json( - f'https://api.bilibili.com/x/player/pagelist?bvid={bv_id}&jsonp=jsonp', - video_id, note='Extracting videos in anthology'), 'data', expected_type=list) - title = title if len(part_info) == 1 else traverse_obj(part_info, (int(page_id) - 1, 'part')) or title - - description = self._html_search_meta('description', webpage) - timestamp = unified_timestamp(self._html_search_regex( - r']+datetime="([^"]+)"', webpage, 'upload time', - default=None) or self._html_search_meta( - 'uploadDate', webpage, 'timestamp', default=None)) - thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage) - - # TODO 'view_count' requires deobfuscating Javascript - info.update({ - 'id': f'{video_id}_part{page_id or 1}', - 'cid': cid, - 'title': title, - 'description': description, - 'timestamp': timestamp, - 'thumbnail': thumbnail, - 'duration': float_or_none(video_info.get('timelength'), scale=1000), - }) - - uploader_mobj = re.search( - r']+href="(?:https?:)?//space\.bilibili\.com/(?P\d+)"[^>]*>\s*(?P[^<]+?)\s*<', - webpage) - if uploader_mobj: - info.update({ - 'uploader': uploader_mobj.group('name').strip(), - 'uploader_id': uploader_mobj.group('id'), - }) - - if not info.get('uploader'): - info['uploader'] = self._html_search_meta( - 'author', webpage, 'uploader', default=None) - - top_level_info = { - 'tags': traverse_obj(self._download_json( - f'https://api.bilibili.com/x/tag/archive/tags?aid={video_id}', - video_id, fatal=False, note='Downloading tags'), ('data', ..., 'tag_name')), - } - - info['subtitles'] = { + def _get_subtitles(self, video_id, aid, cid): + subtitles = { 'danmaku': [{ 'ext': 'xml', 'url': f'https://comment.bilibili.com/{cid}.xml', }] } - r''' - # Requires https://github.com/m13253/danmaku2ass which is licenced under GPL3 - # See https://github.com/animelover1984/youtube-dl + video_info_json = self._download_json(f'https://api.bilibili.com/x/player/v2?aid={aid}&cid={cid}', video_id) + for s in traverse_obj(video_info_json, ('data', 'subtitle', 'subtitles', ...)): + subtitles.setdefault(s['lan'], []).append({ + 'ext': 'srt', + 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)) + }) + return subtitles - raw_danmaku = self._download_webpage( - f'https://comment.bilibili.com/{cid}.xml', video_id, fatal=False, note='Downloading danmaku comments') - danmaku = NiconicoIE.CreateDanmaku(raw_danmaku, commentType='Bilibili', x=1024, y=576) - entries[0]['subtitles'] = { - 'danmaku': [{ - 'ext': 'ass', - 'data': danmaku - }] - } - ''' + def _get_chapters(self, aid, cid): + chapters = aid and cid and self._download_json( + 'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid}, + note='Extracting chapters', fatal=False) + return traverse_obj(chapters, ('data', 'view_points', ..., { + 'title': 'content', + 'start_time': 'from', + 'end_time': 'to', + })) or None - top_level_info['__post_extractor'] = self.extract_comments(video_id) - - for entry in entries: - entry.update(info) - - if len(entries) == 1: - entries[0].update(top_level_info) - return entries[0] - - for idx, entry in enumerate(entries): - entry['id'] = '%s_part%d' % (video_id, (idx + 1)) - - return { - 'id': str(video_id), - 'bv_id': bv_id, - 'title': title, - 'description': description, - **info, **top_level_info - } - - def _extract_anthology_entries(self, bv_id, video_id, webpage): - title = self._html_search_regex( - (r']+\btitle=(["\'])(?P(?:(?!\1).)+)\1', - r'(?s)<h1[^>]*>(?P<title>.+?)</h1>', - r'<title>(?P<title>.+?)'), webpage, 'title', - group='title') - json_data = self._download_json( - f'https://api.bilibili.com/x/player/pagelist?bvid={bv_id}&jsonp=jsonp', - video_id, note='Extracting videos in anthology') - - if json_data['data']: - return self.playlist_from_matches( - json_data['data'], bv_id, title, ie=BiliBiliIE.ie_key(), - getter=lambda entry: 'https://www.bilibili.com/video/%s?p=%d' % (bv_id, entry['page'])) - - def _get_video_id_set(self, id, is_bv): - query = {'bvid': id} if is_bv else {'aid': id} - response = self._download_json( - "http://api.bilibili.cn/x/web-interface/view", - id, query=query, - note='Grabbing original ID via API') - - if response['code'] == -400: - raise ExtractorError('Video ID does not exist', expected=True, video_id=id) - elif response['code'] != 0: - raise ExtractorError(f'Unknown error occurred during API check (code {response["code"]})', - expected=True, video_id=id) - return response['data']['aid'], response['data']['bvid'] - - def _get_comments(self, video_id, commentPageNumber=0): + def _get_comments(self, aid): for idx in itertools.count(1): replies = traverse_obj( self._download_json( - f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={video_id}&type=1&jsonp=jsonp&sort=2&_=1567227301685', - video_id, note=f'Extracting comments from page {idx}', fatal=False), + f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685', + aid, note=f'Extracting comments from page {idx}', fatal=False), ('data', 'replies')) if not replies: return @@ -434,110 +128,440 @@ class BiliBiliIE(InfoExtractor): 'timestamp': reply.get('ctime'), 'parent': reply.get('parent') or 'root', } - for children in map(self._get_all_children, reply.get('replies') or []): + for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))): yield from children -class BiliBiliBangumiIE(InfoExtractor): - _VALID_URL = r'https?://bangumi\.bilibili\.com/anime/(?P\d+)' - - IE_NAME = 'bangumi.bilibili.com' - IE_DESC = 'BiliBili番剧' +class BiliBiliIE(BilibiliBaseIE): + _VALID_URL = r'https?://www\.bilibili\.com/video/[aAbB][vV](?P[^/?#&]+)' _TESTS = [{ - 'url': 'http://bangumi.bilibili.com/anime/1869', + 'url': 'https://www.bilibili.com/video/BV13x41117TL', 'info_dict': { - 'id': '1869', - 'title': '混沌武士', - 'description': 'md5:6a9622b911565794c11f25f81d6a97d2', + 'id': 'BV13x41117TL', + 'title': '阿滴英文|英文歌分享#6 "Closer', + 'ext': 'mp4', + 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文', + 'uploader_id': '65880958', + 'uploader': '阿滴英文', + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + 'duration': 554.117, + 'tags': list, + 'comment_count': int, + 'upload_date': '20170301', + 'timestamp': 1488353834, + 'like_count': int, + 'view_count': int, }, - 'playlist_count': 26, }, { - 'url': 'http://bangumi.bilibili.com/anime/1869', + # old av URL version + 'url': 'http://www.bilibili.com/video/av1074402/', 'info_dict': { - 'id': '1869', - 'title': '混沌武士', - 'description': 'md5:6a9622b911565794c11f25f81d6a97d2', + 'thumbnail': r're:^https?://.*\.(jpg|jpeg)$', + 'ext': 'mp4', + 'uploader': '菊子桑', + 'uploader_id': '156160', + 'id': 'BV11x411K7CN', + 'title': '【金坷垃】金泡沫', + 'duration': 308.36, + 'upload_date': '20140420', + 'timestamp': 1397983878, + 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923', + 'like_count': int, + 'comment_count': int, + 'view_count': int, + 'tags': list, }, + 'params': {'skip_download': True}, + }, { + 'note': 'Anthology', + 'url': 'https://www.bilibili.com/video/BV1bK411W797', + 'info_dict': { + 'id': 'BV1bK411W797', + 'title': '物语中的人物是如何吐槽自己的OP的' + }, + 'playlist_count': 18, 'playlist': [{ - 'md5': '91da8621454dd58316851c27c68b0c13', 'info_dict': { - 'id': '40062', + 'id': 'BV1bK411W797_p1', 'ext': 'mp4', - 'title': '混沌武士', - 'description': '故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子...', - 'timestamp': 1414538739, - 'upload_date': '20141028', - 'episode': '疾风怒涛 Tempestuous Temperaments', - 'episode_number': 1, - }, - }], - 'params': { - 'playlist_items': '1', + 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川', + 'tags': 'count:11', + 'timestamp': 1589601697, + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + 'uploader': '打牌还是打桩', + 'uploader_id': '150259984', + 'like_count': int, + 'comment_count': int, + 'upload_date': '20200516', + 'view_count': int, + 'description': 'md5:e3c401cf7bc363118d1783dd74068a68', + 'duration': 90.314, + } + }] + }, { + 'note': 'Specific page of Anthology', + 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1', + 'info_dict': { + 'id': 'BV1bK411W797_p1', + 'ext': 'mp4', + 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川', + 'tags': 'count:11', + 'timestamp': 1589601697, + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + 'uploader': '打牌还是打桩', + 'uploader_id': '150259984', + 'like_count': int, + 'comment_count': int, + 'upload_date': '20200516', + 'view_count': int, + 'description': 'md5:e3c401cf7bc363118d1783dd74068a68', + 'duration': 90.314, + } + }, { + 'note': 'video has subtitles', + 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh', + 'info_dict': { + 'id': 'BV12N4y1M7rh', + 'ext': 'mp4', + 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1', + 'tags': list, + 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4', + 'duration': 313.557, + 'upload_date': '20220709', + 'uploader': '小夫Tech', + 'timestamp': 1657347907, + 'uploader_id': '1326814124', + 'comment_count': int, + 'view_count': int, + 'like_count': int, + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + 'subtitles': 'count:2' }, + 'params': {'listsubtitles': True}, + }, { + 'url': 'https://www.bilibili.com/video/av8903802/', + 'info_dict': { + 'id': 'BV13x41117TL', + 'ext': 'mp4', + 'title': '阿滴英文|英文歌分享#6 "Closer', + 'upload_date': '20170301', + 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a', + 'timestamp': 1488353834, + 'uploader_id': '65880958', + 'uploader': '阿滴英文', + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + 'duration': 554.117, + 'tags': list, + 'comment_count': int, + 'view_count': int, + 'like_count': int, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'note': 'video has chapter', + 'url': 'https://www.bilibili.com/video/BV1vL411G7N7/', + 'info_dict': { + 'id': 'BV1vL411G7N7', + 'ext': 'mp4', + 'title': '如何为你的B站视频添加进度条分段', + 'timestamp': 1634554558, + 'upload_date': '20211018', + 'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d', + 'tags': list, + 'uploader': '爱喝咖啡的当麻', + 'duration': 669.482, + 'uploader_id': '1680903', + 'chapters': 'count:6', + 'comment_count': int, + 'view_count': int, + 'like_count': int, + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + }, + 'params': {'skip_download': True}, }] - @classmethod - def suitable(cls, url): - return False if BiliBiliIE.suitable(url) else super(BiliBiliBangumiIE, cls).suitable(url) + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id) + play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data'] + + video_data = initial_state['videoData'] + video_id, title = video_data['bvid'], video_data.get('title') + + # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself. + page_list_json = traverse_obj( + self._download_json( + 'https://api.bilibili.com/x/player/pagelist', video_id, + fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'}, + note='Extracting videos in anthology'), + 'data', expected_type=list) or [] + is_anthology = len(page_list_json) > 1 + + part_id = int_or_none(parse_qs(url).get('p', [None])[-1]) + if is_anthology and not part_id and self._yes_playlist(video_id, video_id): + return self.playlist_from_matches( + page_list_json, video_id, title, ie=BiliBiliIE, + getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}') + + if is_anthology: + part_id = part_id or 1 + title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}' + + aid = video_data.get('aid') + old_video_id = format_field(aid, None, f'%s_part{part_id or 1}') + + cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid') + + return { + 'id': f'{video_id}{format_field(part_id, None, "_p%d")}', + 'formats': self.extract_formats(play_info), + '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None, + 'title': title, + 'description': traverse_obj(initial_state, ('videoData', 'desc')), + 'view_count': traverse_obj(initial_state, ('videoData', 'stat', 'view')), + 'uploader': traverse_obj(initial_state, ('upData', 'name')), + 'uploader_id': traverse_obj(initial_state, ('upData', 'mid')), + 'like_count': traverse_obj(initial_state, ('videoData', 'stat', 'like')), + 'comment_count': traverse_obj(initial_state, ('videoData', 'stat', 'reply')), + 'tags': traverse_obj(initial_state, ('tags', ..., 'tag_name')), + 'thumbnail': traverse_obj(initial_state, ('videoData', 'pic')), + 'timestamp': traverse_obj(initial_state, ('videoData', 'pubdate')), + 'duration': float_or_none(play_info.get('timelength'), scale=1000), + 'chapters': self._get_chapters(aid, cid), + 'subtitles': self.extract_subtitles(video_id, aid, cid), + '__post_extractor': self.extract_comments(aid), + 'http_headers': {'Referer': url}, + } + + +class BiliBiliBangumiIE(BilibiliBaseIE): + _VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/(?P(?:ss|ep)\d+)' + + _TESTS = [{ + 'url': 'https://www.bilibili.com/bangumi/play/ss897', + 'info_dict': { + 'id': 'ss897', + 'ext': 'mp4', + 'series': '神的记事本', + 'season': '神的记事本', + 'season_id': 897, + 'season_number': 1, + 'episode': '你与旅行包', + 'episode_number': 2, + 'title': '神的记事本:第2话 你与旅行包', + 'duration': 1428.487, + 'timestamp': 1310809380, + 'upload_date': '20110716', + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + }, + }, { + 'url': 'https://www.bilibili.com/bangumi/play/ep508406', + 'only_matching': True, + }] def _real_extract(self, url): - bangumi_id = self._match_id(url) + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) - # Sometimes this API returns a JSONP response - season_info = self._download_json( - 'http://bangumi.bilibili.com/jsonp/seasoninfo/%s.ver' % bangumi_id, - bangumi_id, transform_source=strip_jsonp)['result'] + if '您所在的地区无法观看本片' in webpage: + raise GeoRestrictedError('This video is restricted') + elif ('开通大会员观看' in webpage and '__playinfo__' not in webpage + or '正在观看预览,大会员免费看全片' in webpage): + self.raise_login_required('This video is for premium members only') - entries = [{ - '_type': 'url_transparent', - 'url': smuggle_url(episode['webplay_url'], {'no_bangumi_tip': 1}), - 'ie_key': BiliBiliIE.ie_key(), - 'timestamp': parse_iso8601(episode.get('update_time'), delimiter=' '), - 'episode': episode.get('index_title'), - 'episode_number': int_or_none(episode.get('index')), - } for episode in season_info['episodes']] + play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data'] + formats = self.extract_formats(play_info) + if (not formats and '成为大会员抢先看' in webpage + and play_info.get('durl') and not play_info.get('dash')): + self.raise_login_required('This video is for premium members only') - entries = sorted(entries, key=lambda entry: entry.get('episode_number')) + initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id) - return self.playlist_result( - entries, bangumi_id, - season_info.get('bangumi_title'), season_info.get('evaluate')) + season_id = traverse_obj(initial_state, ('mediaInfo', 'season_id')) + season_number = season_id and next(( + idx + 1 for idx, e in enumerate( + traverse_obj(initial_state, ('mediaInfo', 'seasons', ...))) + if e.get('season_id') == season_id + ), None) + + return { + 'id': video_id, + 'formats': formats, + 'title': traverse_obj(initial_state, 'h1Title'), + 'episode': traverse_obj(initial_state, ('epInfo', 'long_title')), + 'episode_number': int_or_none(traverse_obj(initial_state, ('epInfo', 'title'))), + 'series': traverse_obj(initial_state, ('mediaInfo', 'series')), + 'season': traverse_obj(initial_state, ('mediaInfo', 'season_title')), + 'season_id': season_id, + 'season_number': season_number, + 'thumbnail': traverse_obj(initial_state, ('epInfo', 'cover')), + 'timestamp': traverse_obj(initial_state, ('epInfo', 'pub_time')), + 'duration': float_or_none(play_info.get('timelength'), scale=1000), + 'subtitles': self.extract_subtitles( + video_id, initial_state, traverse_obj(initial_state, ('epInfo', 'cid'))), + '__post_extractor': self.extract_comments(traverse_obj(initial_state, ('epInfo', 'aid'))), + 'http_headers': {'Referer': url, **self.geo_verification_headers()}, + } -class BilibiliChannelIE(InfoExtractor): - _VALID_URL = r'https?://space.bilibili\.com/(?P\d+)' - _API_URL = "https://api.bilibili.com/x/space/arc/search?mid=%s&pn=%d&jsonp=jsonp" +class BiliBiliBangumiMediaIE(InfoExtractor): + _VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P\d+)' + _TESTS = [{ + 'url': 'https://www.bilibili.com/bangumi/media/md24097891', + 'info_dict': { + 'id': '24097891', + }, + 'playlist_mincount': 25, + }] + + def _real_extract(self, url): + media_id = self._match_id(url) + webpage = self._download_webpage(url, media_id) + + initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id) + episode_list = self._download_json( + 'https://api.bilibili.com/pgc/web/season/section', media_id, + query={'season_id': initial_state['mediaInfo']['season_id']}, + note='Downloading season info')['result']['main_section']['episodes'] + + return self.playlist_result(( + self.url_result(entry['share_url'], BiliBiliBangumiIE, entry['aid']) + for entry in episode_list), media_id) + + +class BilibiliSpaceBaseIE(InfoExtractor): + def _extract_playlist(self, fetch_page, get_metadata, get_entries): + first_page = fetch_page(0) + metadata = get_metadata(first_page) + + paged_list = InAdvancePagedList( + lambda idx: get_entries(fetch_page(idx) if idx else first_page), + metadata['page_count'], metadata['page_size']) + + return metadata, paged_list + + +class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE): + _VALID_URL = r'https?://space\.bilibili\.com/(?P\d+)(?P